mirror of
https://github.com/loro-dev/loro.git
synced 2024-11-28 09:25:36 +00:00
feat: add insert_utf8 and delete_utf8 for Rust Text API (#396)
* feat: add insert_utf8 * chore: merge insert functions * fix: use utf8_to_unicode_index to reslove index * fix: add bound-check and use unicode PosType * feat: add delete_utf8 * perf: O(LogN) insert_utf8 * feat: add utf-16 cross unicode check * perf: O(LogN) delete_utf8 * chore: add api * chore: remove unused function * fix: api name and bindgen name * test: add utf8 js test --------- Co-authored-by: Zixuan Chen <remch183@outlook.com>
This commit is contained in:
parent
9eaaaeada9
commit
86c760abd0
8 changed files with 545 additions and 81 deletions
|
@ -1,4 +1,3 @@
|
|||
|
||||
use serde_columnar::ColumnarError;
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -69,6 +68,10 @@ pub enum LoroError {
|
|||
UndoWithDifferentPeerId { expected: PeerID, actual: PeerID },
|
||||
#[error("The input JSON schema is invalid")]
|
||||
InvalidJsonSchema,
|
||||
#[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")]
|
||||
UTF8InUnicodeCodePoint { pos: usize },
|
||||
#[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")]
|
||||
UTF16InUnicodeCodePoint { pos: usize },
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
|
|
|
@ -4,7 +4,10 @@ use generic_btree::{
|
|||
rle::{CanRemove, HasLength, Mergeable, Sliceable, TryInsert},
|
||||
BTree, BTreeTrait, Cursor,
|
||||
};
|
||||
use loro_common::{Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroValue, ID};
|
||||
use loro_common::{
|
||||
Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroError, LoroResult, LoroValue, ID,
|
||||
};
|
||||
use query::{ByteQuery, ByteQueryT};
|
||||
use serde::{ser::SerializeStruct, Serialize};
|
||||
use std::{
|
||||
fmt::{Display, Formatter},
|
||||
|
@ -118,6 +121,11 @@ mod text_chunk {
|
|||
self.unicode_len
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn utf8_len(&self) -> i32 {
|
||||
self.bytes.len() as i32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn unicode_len(&self) -> i32 {
|
||||
self.unicode_len
|
||||
|
@ -636,8 +644,7 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
|
|||
let mut current_utf16_index = 0;
|
||||
let mut current_unicode_index = 0;
|
||||
for (i, c) in s.chars().enumerate() {
|
||||
let len = c.len_utf16();
|
||||
current_utf16_index += len;
|
||||
current_utf16_index += c.len_utf16();
|
||||
if current_utf16_index == utf16_index {
|
||||
return Ok(i + 1);
|
||||
}
|
||||
|
@ -652,9 +659,38 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
|
|||
Err(current_unicode_index)
|
||||
}
|
||||
|
||||
pub(crate) fn utf8_to_unicode_index(s: &str, utf8_index: usize) -> Result<usize, usize> {
|
||||
if utf8_index == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut current_utf8_index = 0;
|
||||
let mut current_unicode_index = 0;
|
||||
for (i, c) in s.chars().enumerate() {
|
||||
let char_start = current_utf8_index;
|
||||
current_utf8_index += c.len_utf8();
|
||||
|
||||
if utf8_index == char_start {
|
||||
return Ok(i);
|
||||
}
|
||||
|
||||
if utf8_index < current_utf8_index {
|
||||
tracing::info!("WARNING: UTF-8 index is in the middle of a codepoint!");
|
||||
return Err(i);
|
||||
}
|
||||
current_unicode_index = i + 1;
|
||||
}
|
||||
|
||||
if current_utf8_index == utf8_index {
|
||||
Ok(current_unicode_index)
|
||||
} else {
|
||||
Err(current_unicode_index)
|
||||
}
|
||||
}
|
||||
|
||||
fn pos_to_unicode_index(s: &str, pos: usize, kind: PosType) -> Option<usize> {
|
||||
match kind {
|
||||
PosType::Bytes => todo!(),
|
||||
PosType::Bytes => utf8_to_unicode_index(s, pos).ok(),
|
||||
PosType::Unicode => Some(pos),
|
||||
PosType::Utf16 => utf16_to_unicode_index(s, pos).ok(),
|
||||
PosType::Entity => Some(pos),
|
||||
|
@ -910,6 +946,7 @@ mod query {
|
|||
|
||||
// Allow left to not at the correct utf16 boundary. If so fallback to the last position.
|
||||
// TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
|
||||
// WARNING: Unable to report error!!!
|
||||
let offset = utf16_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
|
||||
(offset, true)
|
||||
}
|
||||
|
@ -963,13 +1000,55 @@ mod query {
|
|||
cache.entity_len as usize
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct ByteQueryT;
|
||||
pub(super) type ByteQuery = IndexQuery<ByteQueryT, RichtextTreeTrait>;
|
||||
impl QueryByLen<RichtextTreeTrait> for ByteQueryT {
|
||||
fn get_cache_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
|
||||
cache.bytes as usize
|
||||
}
|
||||
fn get_elem_len(elem: &<RichtextTreeTrait as BTreeTrait>::Elem) -> usize {
|
||||
match elem {
|
||||
RichtextStateChunk::Text(s) => s.utf8_len() as usize,
|
||||
RichtextStateChunk::Style { .. } => 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_offset_and_found(
|
||||
left: usize,
|
||||
elem: &<RichtextTreeTrait as BTreeTrait>::Elem,
|
||||
) -> (usize, bool) {
|
||||
match elem {
|
||||
RichtextStateChunk::Text(s) => {
|
||||
if left == 0 {
|
||||
return (0, true);
|
||||
}
|
||||
|
||||
// Allow left to not at the correct utf16 boundary. If so fallback to the last position.
|
||||
// TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
|
||||
// WARNING: Unable to report error!!!
|
||||
let offset = utf8_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
|
||||
(offset, true)
|
||||
}
|
||||
RichtextStateChunk::Style { .. } => (1, false),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_cache_entity_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
|
||||
cache.entity_len as usize
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod cursor_cache {
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
|
||||
use super::{pos_to_unicode_index, unicode_to_utf16_index, PosType, RichtextTreeTrait};
|
||||
use super::{
|
||||
pos_to_unicode_index, unicode_to_utf16_index, unicode_to_utf8_index, PosType,
|
||||
RichtextTreeTrait,
|
||||
};
|
||||
use generic_btree::{rle::HasLength, BTree, Cursor, LeafIndex};
|
||||
use loro_common::LoroError;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CursorCacheItem {
|
||||
|
@ -1038,9 +1117,34 @@ mod cursor_cache {
|
|||
entity_index: usize,
|
||||
cursor: Cursor,
|
||||
tree: &BTree<RichtextTreeTrait>,
|
||||
) {
|
||||
) -> Result<(), usize> {
|
||||
match kind {
|
||||
PosType::Bytes => todo!(),
|
||||
PosType::Bytes => {
|
||||
if cursor.offset == 0 {
|
||||
self.entity = Some(EntityIndexCacheItem {
|
||||
pos,
|
||||
pos_type: kind,
|
||||
entity_index,
|
||||
leaf: cursor.leaf,
|
||||
});
|
||||
} else {
|
||||
let elem = tree.get_elem(cursor.leaf).unwrap();
|
||||
let Some(s) = elem.as_str() else {
|
||||
return Ok(());
|
||||
};
|
||||
let utf8offset = unicode_to_utf8_index(s, cursor.offset).unwrap();
|
||||
if pos < utf8offset {
|
||||
return Err(pos);
|
||||
}
|
||||
self.entity = Some(EntityIndexCacheItem {
|
||||
pos: pos - utf8offset,
|
||||
pos_type: kind,
|
||||
entity_index: entity_index - cursor.offset,
|
||||
leaf: cursor.leaf,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
PosType::Unicode | PosType::Entity => {
|
||||
self.entity = Some(EntityIndexCacheItem {
|
||||
pos: pos - cursor.offset,
|
||||
|
@ -1048,6 +1152,7 @@ mod cursor_cache {
|
|||
entity_index: entity_index - cursor.offset,
|
||||
leaf: cursor.leaf,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
PosType::Event if cfg!(not(feature = "wasm")) => {
|
||||
self.entity = Some(EntityIndexCacheItem {
|
||||
|
@ -1056,6 +1161,7 @@ mod cursor_cache {
|
|||
entity_index: entity_index - cursor.offset,
|
||||
leaf: cursor.leaf,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
_ => {
|
||||
// utf16
|
||||
|
@ -1068,8 +1174,13 @@ mod cursor_cache {
|
|||
});
|
||||
} else {
|
||||
let elem = tree.get_elem(cursor.leaf).unwrap();
|
||||
let Some(s) = elem.as_str() else { return };
|
||||
let Some(s) = elem.as_str() else {
|
||||
return Ok(());
|
||||
};
|
||||
let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap();
|
||||
if pos < utf16offset {
|
||||
return Err(pos);
|
||||
}
|
||||
self.entity = Some(EntityIndexCacheItem {
|
||||
pos: pos - utf16offset,
|
||||
pos_type: kind,
|
||||
|
@ -1077,6 +1188,7 @@ mod cursor_cache {
|
|||
leaf: cursor.leaf,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1196,9 +1308,9 @@ impl RichtextState {
|
|||
&mut self,
|
||||
pos: usize,
|
||||
pos_type: PosType,
|
||||
) -> usize {
|
||||
) -> Result<usize, LoroError> {
|
||||
if self.tree.is_empty() {
|
||||
return 0;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if let Some(pos) =
|
||||
|
@ -1211,11 +1323,11 @@ impl RichtextState {
|
|||
&self.tree,
|
||||
&self.cursor_cache
|
||||
);
|
||||
return pos;
|
||||
return Ok(pos);
|
||||
}
|
||||
|
||||
let (c, entity_index) = match pos_type {
|
||||
PosType::Bytes => todo!(),
|
||||
PosType::Bytes => self.find_best_insert_pos::<ByteQueryT>(pos),
|
||||
PosType::Unicode => self.find_best_insert_pos::<UnicodeQueryT>(pos),
|
||||
PosType::Utf16 => self.find_best_insert_pos::<Utf16QueryT>(pos),
|
||||
PosType::Entity => self.find_best_insert_pos::<EntityQueryT>(pos),
|
||||
|
@ -1227,12 +1339,23 @@ impl RichtextState {
|
|||
self.cursor_cache
|
||||
.record_cursor(entity_index, PosType::Entity, c, &self.tree);
|
||||
if !self.has_styles() {
|
||||
self.cursor_cache
|
||||
.record_entity_index(pos, pos_type, entity_index, c, &self.tree);
|
||||
if let Err(pos) = self.cursor_cache.record_entity_index(
|
||||
pos,
|
||||
pos_type,
|
||||
entity_index,
|
||||
c,
|
||||
&self.tree,
|
||||
) {
|
||||
return match pos_type {
|
||||
PosType::Bytes => Err(LoroError::UTF8InUnicodeCodePoint { pos: pos }),
|
||||
PosType::Utf16 => Err(LoroError::UTF16InUnicodeCodePoint { pos: pos }),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entity_index
|
||||
Ok(entity_index)
|
||||
}
|
||||
|
||||
fn has_styles(&self) -> bool {
|
||||
|
@ -1251,8 +1374,12 @@ impl RichtextState {
|
|||
return (0..0, None);
|
||||
}
|
||||
|
||||
let start = self.get_entity_index_for_text_insert(range.start, pos_type);
|
||||
let end = self.get_entity_index_for_text_insert(range.end, pos_type);
|
||||
let start = self
|
||||
.get_entity_index_for_text_insert(range.start, pos_type)
|
||||
.unwrap();
|
||||
let end = self
|
||||
.get_entity_index_for_text_insert(range.end, pos_type)
|
||||
.unwrap();
|
||||
if self.has_styles() {
|
||||
(
|
||||
start..end,
|
||||
|
@ -1656,22 +1783,25 @@ impl RichtextState {
|
|||
pos: usize,
|
||||
len: usize,
|
||||
pos_type: PosType,
|
||||
) -> Vec<EntityRangeInfo> {
|
||||
) -> LoroResult<Vec<EntityRangeInfo>> {
|
||||
if self.tree.is_empty() {
|
||||
return Vec::new();
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
if len == 0 {
|
||||
return Vec::new();
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
if pos + len > self.len(pos_type) {
|
||||
return Vec::new();
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut ans: Vec<EntityRangeInfo> = Vec::new();
|
||||
let (start, end) = match pos_type {
|
||||
PosType::Bytes => todo!(),
|
||||
PosType::Bytes => (
|
||||
self.tree.query::<ByteQuery>(&pos).unwrap().cursor,
|
||||
self.tree.query::<ByteQuery>(&(pos + len)).unwrap().cursor,
|
||||
),
|
||||
PosType::Unicode => (
|
||||
self.tree.query::<UnicodeQuery>(&pos).unwrap().cursor,
|
||||
self.tree
|
||||
|
@ -1735,7 +1865,7 @@ impl RichtextState {
|
|||
}
|
||||
}
|
||||
|
||||
ans
|
||||
Ok(ans)
|
||||
}
|
||||
|
||||
// PERF: can be splitted into two methods. One is without cursor_to_event_index
|
||||
|
@ -2272,7 +2402,7 @@ impl RichtextState {
|
|||
pos: usize,
|
||||
kind: PosType,
|
||||
) -> Option<ID> {
|
||||
let v = &self.get_text_entity_ranges(pos, 1, kind);
|
||||
let v = &self.get_text_entity_ranges(pos, 1, kind).unwrap();
|
||||
let a = v.first()?;
|
||||
Some(a.id_start)
|
||||
}
|
||||
|
@ -2395,7 +2525,9 @@ mod test {
|
|||
{
|
||||
let state = &mut self.state;
|
||||
let text = self.bytes.slice(start..);
|
||||
let entity_index = state.get_entity_index_for_text_insert(pos, PosType::Unicode);
|
||||
let entity_index = state
|
||||
.get_entity_index_for_text_insert(pos, PosType::Unicode)
|
||||
.unwrap();
|
||||
state.insert_at_entity_index(entity_index, text, IdFull::new(0, 0, 0));
|
||||
};
|
||||
}
|
||||
|
@ -2403,7 +2535,8 @@ mod test {
|
|||
fn delete(&mut self, pos: usize, len: usize) {
|
||||
let ranges = self
|
||||
.state
|
||||
.get_text_entity_ranges(pos, len, PosType::Unicode);
|
||||
.get_text_entity_ranges(pos, len, PosType::Unicode)
|
||||
.unwrap();
|
||||
for range in ranges.into_iter().rev() {
|
||||
self.state.drain_by_entity_index(
|
||||
range.entity_start,
|
||||
|
@ -2416,10 +2549,12 @@ mod test {
|
|||
fn mark(&mut self, range: Range<usize>, style: Arc<StyleOp>) {
|
||||
let start = self
|
||||
.state
|
||||
.get_entity_index_for_text_insert(range.start, PosType::Unicode);
|
||||
.get_entity_index_for_text_insert(range.start, PosType::Unicode)
|
||||
.unwrap();
|
||||
let end = self
|
||||
.state
|
||||
.get_entity_index_for_text_insert(range.end, PosType::Unicode);
|
||||
.get_entity_index_for_text_insert(range.end, PosType::Unicode)
|
||||
.unwrap();
|
||||
self.state.mark_with_entity_index(start..end, style);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ use crate::{
|
|||
richtext::{richtext_state::PosType, RichtextState, StyleOp, TextStyleInfoFlag},
|
||||
},
|
||||
cursor::{Cursor, Side},
|
||||
delta::{DeltaItem, StyleMeta, TreeExternalDiff},
|
||||
delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff},
|
||||
event::{Diff, TextDiffItem},
|
||||
op::ListSlice,
|
||||
state::{ContainerState, IndexType, State},
|
||||
|
@ -16,7 +16,7 @@ use crate::{
|
|||
};
|
||||
use append_only_bytes::BytesSlice;
|
||||
use enum_as_inner::EnumAsInner;
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use fxhash::FxHashMap;
|
||||
use generic_btree::rle::HasLength;
|
||||
use loro_common::{
|
||||
ContainerID, ContainerType, IdFull, InternalString, LoroError, LoroResult, LoroValue, TreeID,
|
||||
|
@ -31,7 +31,8 @@ use std::{
|
|||
ops::Deref,
|
||||
sync::{Arc, Mutex, Weak},
|
||||
};
|
||||
use tracing::{debug, error, info, instrument, trace};
|
||||
|
||||
use tracing::{debug, error, info, instrument, Event};
|
||||
|
||||
mod tree;
|
||||
pub use tree::TreeHandler;
|
||||
|
@ -1337,7 +1338,8 @@ impl TextHandler {
|
|||
let mut t = t.try_lock().unwrap();
|
||||
let index = t
|
||||
.value
|
||||
.get_entity_index_for_text_insert(pos, PosType::Event);
|
||||
.get_entity_index_for_text_insert(pos, PosType::Event)
|
||||
.unwrap();
|
||||
t.value.insert_at_entity_index(
|
||||
index,
|
||||
BytesSlice::from_bytes(s.as_bytes()),
|
||||
|
@ -1349,16 +1351,89 @@ impl TextHandler {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
|
||||
match &self.inner {
|
||||
MaybeDetached::Detached(t) => {
|
||||
let mut t = t.try_lock().unwrap();
|
||||
let index = t
|
||||
.value
|
||||
.get_entity_index_for_text_insert(pos, PosType::Bytes)
|
||||
.unwrap();
|
||||
t.value.insert_at_entity_index(
|
||||
index,
|
||||
BytesSlice::from_bytes(s.as_bytes()),
|
||||
IdFull::NONE_ID,
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
MaybeDetached::Attached(a) => a.with_txn(|txn| self.insert_with_txn_utf8(txn, pos, s)),
|
||||
}
|
||||
}
|
||||
|
||||
/// `pos` is a Event Index:
|
||||
///
|
||||
/// - if feature="wasm", pos is a UTF-16 index
|
||||
/// - if feature!="wasm", pos is a Unicode index
|
||||
pub fn insert_with_txn(&self, txn: &mut Transaction, pos: usize, s: &str) -> LoroResult<()> {
|
||||
self.insert_with_txn_and_attr(txn, pos, s, None)?;
|
||||
self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Event)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// If attr is specified, it will be used as the attribute of the inserted text.
|
||||
pub fn insert_with_txn_utf8(
|
||||
&self,
|
||||
txn: &mut Transaction,
|
||||
pos: usize,
|
||||
s: &str,
|
||||
) -> LoroResult<()> {
|
||||
self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `pos` is a Event Index:
|
||||
///
|
||||
/// - if feature="wasm", pos is a UTF-16 index
|
||||
/// - if feature!="wasm", pos is a Unicode index
|
||||
///
|
||||
/// This method requires auto_commit to be enabled.
|
||||
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
|
||||
match &self.inner {
|
||||
MaybeDetached::Detached(t) => {
|
||||
let mut t = t.try_lock().unwrap();
|
||||
let ranges = t
|
||||
.value
|
||||
.get_text_entity_ranges(pos, len, PosType::Event)
|
||||
.unwrap();
|
||||
for range in ranges.iter().rev() {
|
||||
t.value
|
||||
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
|
||||
match &self.inner {
|
||||
MaybeDetached::Detached(t) => {
|
||||
let mut t = t.try_lock().unwrap();
|
||||
let ranges = match t.value.get_text_entity_ranges(pos, len, PosType::Bytes) {
|
||||
Err(x) => return Err(x),
|
||||
Ok(x) => x,
|
||||
};
|
||||
for range in ranges.iter().rev() {
|
||||
t.value
|
||||
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
MaybeDetached::Attached(a) => {
|
||||
a.with_txn(|txn| self.delete_with_txn_utf8(txn, pos, len))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If attr is specified, it will be used as the at tribute of the inserted text.
|
||||
/// It will override the existing attribute of the text.
|
||||
fn insert_with_txn_and_attr(
|
||||
&self,
|
||||
|
@ -1366,27 +1441,51 @@ impl TextHandler {
|
|||
pos: usize,
|
||||
s: &str,
|
||||
attr: Option<&FxHashMap<String, LoroValue>>,
|
||||
pos_type: PosType,
|
||||
) -> Result<Vec<(InternalString, LoroValue)>, LoroError> {
|
||||
if s.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
if pos > self.len_event() {
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos,
|
||||
len: self.len_event(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
match pos_type {
|
||||
PosType::Event => {
|
||||
if pos > self.len_event() {
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos,
|
||||
len: self.len_event(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
}
|
||||
}
|
||||
PosType::Bytes => {
|
||||
if pos > self.len_utf8() {
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos,
|
||||
len: self.len_utf8(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let inner = self.inner.try_attached_state()?;
|
||||
let (entity_index, styles) = inner.with_state(|state| {
|
||||
let richtext_state = state.as_richtext_state_mut().unwrap();
|
||||
let pos = richtext_state.get_entity_index_for_text_insert(pos);
|
||||
let pos = richtext_state.get_entity_index_for_text_insert(pos, pos_type);
|
||||
let pos = match pos {
|
||||
Err(_) => return (pos, StyleMeta::empty()),
|
||||
Ok(x) => x,
|
||||
};
|
||||
let styles = richtext_state.get_styles_at_entity_index(pos);
|
||||
(pos, styles)
|
||||
(Ok(pos), styles)
|
||||
});
|
||||
|
||||
let entity_index = match entity_index {
|
||||
Err(x) => return Err(x),
|
||||
_ => entity_index.unwrap(),
|
||||
};
|
||||
|
||||
let mut override_styles = Vec::new();
|
||||
if let Some(attr) = attr {
|
||||
// current styles
|
||||
|
@ -1442,50 +1541,66 @@ impl TextHandler {
|
|||
///
|
||||
/// - if feature="wasm", pos is a UTF-16 index
|
||||
/// - if feature!="wasm", pos is a Unicode index
|
||||
///
|
||||
/// This method requires auto_commit to be enabled.
|
||||
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
|
||||
match &self.inner {
|
||||
MaybeDetached::Detached(t) => {
|
||||
let mut t = t.try_lock().unwrap();
|
||||
let ranges = t.value.get_text_entity_ranges(pos, len, PosType::Event);
|
||||
for range in ranges.iter().rev() {
|
||||
t.value
|
||||
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
|
||||
}
|
||||
pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
|
||||
self.delete_with_txn_inline(txn, pos, len, PosType::Event)
|
||||
}
|
||||
|
||||
/// `pos` is a Event Index:
|
||||
///
|
||||
/// - if feature="wasm", pos is a UTF-16 index
|
||||
/// - if feature!="wasm", pos is a Unicode index
|
||||
pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
|
||||
pub fn delete_with_txn_utf8(
|
||||
&self,
|
||||
txn: &mut Transaction,
|
||||
pos: usize,
|
||||
len: usize,
|
||||
) -> LoroResult<()> {
|
||||
self.delete_with_txn_inline(txn, pos, len, PosType::Bytes)
|
||||
}
|
||||
|
||||
fn delete_with_txn_inline(
|
||||
&self,
|
||||
txn: &mut Transaction,
|
||||
pos: usize,
|
||||
len: usize,
|
||||
pos_type: PosType,
|
||||
) -> LoroResult<()> {
|
||||
if len == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if pos + len > self.len_event() {
|
||||
error!("pos={} len={} len_event={}", pos, len, self.len_event());
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos: pos + len,
|
||||
len: self.len_event(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
match pos_type {
|
||||
PosType::Event => {
|
||||
if pos + len > self.len_event() {
|
||||
error!("pos={} len={} len_event={}", pos, len, self.len_event());
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos: pos + len,
|
||||
len: self.len_event(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
}
|
||||
}
|
||||
PosType::Bytes => {
|
||||
if pos + len > self.len_utf8() {
|
||||
error!("pos={} len={} len_event={}", pos, len, self.len_event());
|
||||
return Err(LoroError::OutOfBound {
|
||||
pos: pos + len,
|
||||
len: self.len_event(),
|
||||
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let inner = self.inner.try_attached_state()?;
|
||||
let s = tracing::span!(tracing::Level::INFO, "delete", "pos={} len={}", pos, len);
|
||||
let _e = s.enter();
|
||||
let ranges = inner.with_state(|state| {
|
||||
let ranges = match inner.with_state(|state| {
|
||||
let richtext_state = state.as_richtext_state_mut().unwrap();
|
||||
richtext_state.get_text_entity_ranges_in_event_index_range(pos, len)
|
||||
});
|
||||
richtext_state.get_text_entity_ranges_in_event_index_range(pos, len, pos_type)
|
||||
}) {
|
||||
Err(x) => return Err(x),
|
||||
Ok(x) => x,
|
||||
};
|
||||
|
||||
debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
|
||||
//debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
|
||||
let mut event_end = (pos + len) as isize;
|
||||
for range in ranges.iter().rev() {
|
||||
let event_start = event_end - range.event_len as isize;
|
||||
|
@ -1749,6 +1864,7 @@ impl TextHandler {
|
|||
index,
|
||||
insert.as_str(),
|
||||
Some(attributes.as_ref().unwrap_or(&Default::default())),
|
||||
PosType::Event,
|
||||
)?;
|
||||
|
||||
for (key, value) in override_styles {
|
||||
|
@ -3558,14 +3674,14 @@ pub mod counter {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::{HandlerTrait, TextDelta};
|
||||
use crate::container::richtext::richtext_state::PosType;
|
||||
use crate::loro::LoroDoc;
|
||||
use crate::version::Frontiers;
|
||||
use crate::{fx_map, ToJson};
|
||||
use loro_common::ID;
|
||||
use serde_json::json;
|
||||
|
||||
use super::{HandlerTrait, TextDelta};
|
||||
|
||||
#[test]
|
||||
fn import() {
|
||||
let loro = LoroDoc::new();
|
||||
|
|
|
@ -5,7 +5,7 @@ use std::{
|
|||
|
||||
use fxhash::{FxHashMap, FxHashSet};
|
||||
use generic_btree::rle::HasLength;
|
||||
use loro_common::{ContainerID, InternalString, LoroResult, LoroValue, ID};
|
||||
use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID};
|
||||
use loro_delta::DeltaRopeBuilder;
|
||||
|
||||
use crate::{
|
||||
|
@ -743,10 +743,14 @@ impl RichtextState {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn get_entity_index_for_text_insert(&mut self, event_index: usize) -> usize {
|
||||
pub(crate) fn get_entity_index_for_text_insert(
|
||||
&mut self,
|
||||
event_index: usize,
|
||||
pos_type: PosType,
|
||||
) -> Result<usize, LoroError> {
|
||||
self.state
|
||||
.get_mut()
|
||||
.get_entity_index_for_text_insert(event_index, PosType::Event)
|
||||
.get_entity_index_for_text_insert(event_index, pos_type)
|
||||
}
|
||||
|
||||
pub(crate) fn get_entity_range_and_styles_at_range(
|
||||
|
@ -771,10 +775,11 @@ impl RichtextState {
|
|||
&mut self,
|
||||
pos: usize,
|
||||
len: usize,
|
||||
) -> Vec<EntityRangeInfo> {
|
||||
pos_type: PosType,
|
||||
) -> LoroResult<Vec<EntityRangeInfo>> {
|
||||
self.state
|
||||
.get_mut()
|
||||
.get_text_entity_ranges(pos, len, PosType::Event)
|
||||
.get_text_entity_ranges(pos, len, pos_type)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
|
@ -960,3 +960,147 @@ fn counter() {
|
|||
let doc2 = LoroDoc::new_auto_commit();
|
||||
doc2.import_json_updates(json).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_utf8() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "Hello ").unwrap();
|
||||
text.insert_utf8(6, "World").unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"Hello World"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_utf8_cross_unicode_1() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "你好").unwrap();
|
||||
text.insert_utf8(3, "World").unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"你World好"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_utf8_cross_unicode_2() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "你好").unwrap();
|
||||
text.insert_utf8(6, "World").unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"你好World"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_utf8_detached() {
|
||||
let text = TextHandler::new_detached();
|
||||
text.insert_utf8(0, "Hello ").unwrap();
|
||||
text.insert_utf8(6, "World").unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"Hello World"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_insert_utf8_panic_cross_unicode() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "你好").unwrap();
|
||||
text.insert_utf8(1, "World").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_insert_utf8_panic_out_bound() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "Hello ").unwrap();
|
||||
text.insert_utf8(7, "World").unwrap();
|
||||
}
|
||||
|
||||
// println!("{}", text.get_richtext_value().to_json_value().to_string());
|
||||
|
||||
#[test]
|
||||
fn test_delete_utf8() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "Hello").unwrap();
|
||||
text.delete_utf8(1, 3).unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"Ho"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_utf8_with_zero_len() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "Hello").unwrap();
|
||||
text.delete_utf8(1, 0).unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"Hello"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_utf8_cross_unicode() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert_utf8(0, "你好").unwrap();
|
||||
text.delete_utf8(0, 3).unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"好"}])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_utf8_detached() {
|
||||
let text = TextHandler::new_detached();
|
||||
text.insert_utf8(0, "Hello").unwrap();
|
||||
text.delete_utf8(1, 3).unwrap();
|
||||
assert_eq!(
|
||||
text.get_richtext_value().to_json_value(),
|
||||
json!([{"insert":"Ho"}])
|
||||
)
|
||||
}
|
||||
|
||||
// WARNING:
|
||||
// Due to the current inability to report an error on
|
||||
// get_offset_and_found on BTree, this test won't be ok.
|
||||
// #[test]
|
||||
// #[should_panic]
|
||||
// fn test_delete_utf8_panic_cross_unicode() {
|
||||
// let doc = LoroDoc::new_auto_commit();
|
||||
// let text = doc.get_text("text");
|
||||
// text.insert_utf8(0, "你好").unwrap();
|
||||
// text.delete_utf8(0, 2).unwrap();
|
||||
// }
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_delete_utf8_panic_out_bound_pos() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert(0, "Hello").unwrap();
|
||||
text.delete_utf8(10, 1).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_delete_utf8_panic_out_bound_len() {
|
||||
let doc = LoroDoc::new_auto_commit();
|
||||
let text = doc.get_text("text");
|
||||
text.insert(0, "Hello").unwrap();
|
||||
text.delete_utf8(1, 10).unwrap();
|
||||
}
|
||||
|
|
|
@ -1515,6 +1515,22 @@ impl LoroText {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Insert some string at utf-8 index.
|
||||
///
|
||||
/// @example
|
||||
/// ```ts
|
||||
/// import { Loro } from "loro-crdt";
|
||||
///
|
||||
/// const doc = new Loro();
|
||||
/// const text = doc.getText("text");
|
||||
/// text.insertUtf8(0, "Hello");
|
||||
/// ```
|
||||
#[wasm_bindgen(js_name = "insertUtf8")]
|
||||
pub fn insert_utf8(&mut self, index: usize, content: &str) -> JsResult<()> {
|
||||
self.handler.insert_utf8(index, content)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete elements from index to index + len
|
||||
///
|
||||
/// @example
|
||||
|
@ -1533,6 +1549,25 @@ impl LoroText {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete elements from index to utf-8 index + len
|
||||
///
|
||||
/// @example
|
||||
/// ```ts
|
||||
/// import { Loro } from "loro-crdt";
|
||||
///
|
||||
/// const doc = new Loro();
|
||||
/// const text = doc.getText("text");
|
||||
/// text.insertUtf8(0, "Hello");
|
||||
/// text.deleteUtf8(1, 3);
|
||||
/// const s = text.toString();
|
||||
/// console.log(s); // "Ho"
|
||||
/// ```
|
||||
#[wasm_bindgen(js_name = "deleteUtf8")]
|
||||
pub fn delete_utf8(&mut self, index: usize, len: usize) -> JsResult<()> {
|
||||
self.handler.delete_utf8(index, len)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mark a range of text with a key and a value.
|
||||
///
|
||||
/// > You should call `configTextStyle` before using `mark` and `unmark`.
|
||||
|
|
|
@ -983,11 +983,21 @@ impl LoroText {
|
|||
self.handler.insert(pos, s)
|
||||
}
|
||||
|
||||
/// Insert a string at the given utf-8 position.
|
||||
pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
|
||||
self.handler.insert_utf8(pos, s)
|
||||
}
|
||||
|
||||
/// Delete a range of text at the given unicode position with unicode length.
|
||||
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
|
||||
self.handler.delete(pos, len)
|
||||
}
|
||||
|
||||
/// Delete a range of text at the given utf-8 position with utf-8 length.
|
||||
pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
|
||||
self.handler.delete_utf8(pos, len)
|
||||
}
|
||||
|
||||
/// Whether the text container is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.handler.is_empty()
|
||||
|
|
|
@ -286,4 +286,20 @@ describe("richtext", () => {
|
|||
const text = doc.getText("text");
|
||||
text.insert(0, `“aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);
|
||||
});
|
||||
|
||||
it("Insert/delete by utf8 index", () => {
|
||||
const doc = new Loro();
|
||||
const text = doc.getText('t');
|
||||
text.insert(0, "你好");
|
||||
text.insertUtf8(3, "a");
|
||||
text.insertUtf8(7, "b");
|
||||
expect(text.toDelta()).toStrictEqual([
|
||||
{ insert: "你a好b" },
|
||||
]);
|
||||
text.deleteUtf8(3, 4);
|
||||
expect(text.toDelta()).toStrictEqual([
|
||||
{ insert: "你b"},
|
||||
]);
|
||||
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue