diff --git a/crates/loro-common/src/error.rs b/crates/loro-common/src/error.rs index 1fb53b5c..cd838639 100644 --- a/crates/loro-common/src/error.rs +++ b/crates/loro-common/src/error.rs @@ -1,4 +1,3 @@ - use serde_columnar::ColumnarError; use thiserror::Error; @@ -69,6 +68,10 @@ pub enum LoroError { UndoWithDifferentPeerId { expected: PeerID, actual: PeerID }, #[error("The input JSON schema is invalid")] InvalidJsonSchema, + #[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")] + UTF8InUnicodeCodePoint { pos: usize }, + #[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")] + UTF16InUnicodeCodePoint { pos: usize }, } #[derive(Error, Debug)] diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index ce853ead..3d4492ab 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -4,7 +4,10 @@ use generic_btree::{ rle::{CanRemove, HasLength, Mergeable, Sliceable, TryInsert}, BTree, BTreeTrait, Cursor, }; -use loro_common::{Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroValue, ID}; +use loro_common::{ + Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroError, LoroResult, LoroValue, ID, +}; +use query::{ByteQuery, ByteQueryT}; use serde::{ser::SerializeStruct, Serialize}; use std::{ fmt::{Display, Formatter}, @@ -118,6 +121,11 @@ mod text_chunk { self.unicode_len } + #[inline] + pub fn utf8_len(&self) -> i32 { + self.bytes.len() as i32 + } + #[inline] pub fn unicode_len(&self) -> i32 { self.unicode_len @@ -636,8 +644,7 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result Result Result { + if utf8_index == 0 { + return Ok(0); + } + + let mut current_utf8_index = 0; + let mut current_unicode_index = 0; + for (i, c) in s.chars().enumerate() { + let char_start = current_utf8_index; + current_utf8_index += c.len_utf8(); + + if utf8_index == char_start { + return Ok(i); + } + + if utf8_index < current_utf8_index { + tracing::info!("WARNING: UTF-8 index is in the middle of a codepoint!"); + return Err(i); + } + current_unicode_index = i + 1; + } + + if current_utf8_index == utf8_index { + Ok(current_unicode_index) + } else { + Err(current_unicode_index) + } +} + fn pos_to_unicode_index(s: &str, pos: usize, kind: PosType) -> Option { match kind { - PosType::Bytes => todo!(), + PosType::Bytes => utf8_to_unicode_index(s, pos).ok(), PosType::Unicode => Some(pos), PosType::Utf16 => utf16_to_unicode_index(s, pos).ok(), PosType::Entity => Some(pos), @@ -910,6 +946,7 @@ mod query { // Allow left to not at the correct utf16 boundary. If so fallback to the last position. // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior + // WARNING: Unable to report error!!! let offset = utf16_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e); (offset, true) } @@ -963,13 +1000,55 @@ mod query { cache.entity_len as usize } } + + pub(super) struct ByteQueryT; + pub(super) type ByteQuery = IndexQuery; + impl QueryByLen for ByteQueryT { + fn get_cache_len(cache: &::Cache) -> usize { + cache.bytes as usize + } + fn get_elem_len(elem: &::Elem) -> usize { + match elem { + RichtextStateChunk::Text(s) => s.utf8_len() as usize, + RichtextStateChunk::Style { .. } => 0, + } + } + + fn get_offset_and_found( + left: usize, + elem: &::Elem, + ) -> (usize, bool) { + match elem { + RichtextStateChunk::Text(s) => { + if left == 0 { + return (0, true); + } + + // Allow left to not at the correct utf16 boundary. If so fallback to the last position. + // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior + // WARNING: Unable to report error!!! + let offset = utf8_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e); + (offset, true) + } + RichtextStateChunk::Style { .. } => (1, false), + } + } + + fn get_cache_entity_len(cache: &::Cache) -> usize { + cache.entity_len as usize + } + } } mod cursor_cache { use std::sync::atomic::AtomicUsize; - use super::{pos_to_unicode_index, unicode_to_utf16_index, PosType, RichtextTreeTrait}; + use super::{ + pos_to_unicode_index, unicode_to_utf16_index, unicode_to_utf8_index, PosType, + RichtextTreeTrait, + }; use generic_btree::{rle::HasLength, BTree, Cursor, LeafIndex}; + use loro_common::LoroError; #[derive(Debug, Clone)] struct CursorCacheItem { @@ -1038,9 +1117,34 @@ mod cursor_cache { entity_index: usize, cursor: Cursor, tree: &BTree, - ) { + ) -> Result<(), usize> { match kind { - PosType::Bytes => todo!(), + PosType::Bytes => { + if cursor.offset == 0 { + self.entity = Some(EntityIndexCacheItem { + pos, + pos_type: kind, + entity_index, + leaf: cursor.leaf, + }); + } else { + let elem = tree.get_elem(cursor.leaf).unwrap(); + let Some(s) = elem.as_str() else { + return Ok(()); + }; + let utf8offset = unicode_to_utf8_index(s, cursor.offset).unwrap(); + if pos < utf8offset { + return Err(pos); + } + self.entity = Some(EntityIndexCacheItem { + pos: pos - utf8offset, + pos_type: kind, + entity_index: entity_index - cursor.offset, + leaf: cursor.leaf, + }); + } + Ok(()) + } PosType::Unicode | PosType::Entity => { self.entity = Some(EntityIndexCacheItem { pos: pos - cursor.offset, @@ -1048,6 +1152,7 @@ mod cursor_cache { entity_index: entity_index - cursor.offset, leaf: cursor.leaf, }); + Ok(()) } PosType::Event if cfg!(not(feature = "wasm")) => { self.entity = Some(EntityIndexCacheItem { @@ -1056,6 +1161,7 @@ mod cursor_cache { entity_index: entity_index - cursor.offset, leaf: cursor.leaf, }); + Ok(()) } _ => { // utf16 @@ -1068,8 +1174,13 @@ mod cursor_cache { }); } else { let elem = tree.get_elem(cursor.leaf).unwrap(); - let Some(s) = elem.as_str() else { return }; + let Some(s) = elem.as_str() else { + return Ok(()); + }; let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap(); + if pos < utf16offset { + return Err(pos); + } self.entity = Some(EntityIndexCacheItem { pos: pos - utf16offset, pos_type: kind, @@ -1077,6 +1188,7 @@ mod cursor_cache { leaf: cursor.leaf, }); } + Ok(()) } } } @@ -1196,9 +1308,9 @@ impl RichtextState { &mut self, pos: usize, pos_type: PosType, - ) -> usize { + ) -> Result { if self.tree.is_empty() { - return 0; + return Ok(0); } if let Some(pos) = @@ -1211,11 +1323,11 @@ impl RichtextState { &self.tree, &self.cursor_cache ); - return pos; + return Ok(pos); } let (c, entity_index) = match pos_type { - PosType::Bytes => todo!(), + PosType::Bytes => self.find_best_insert_pos::(pos), PosType::Unicode => self.find_best_insert_pos::(pos), PosType::Utf16 => self.find_best_insert_pos::(pos), PosType::Entity => self.find_best_insert_pos::(pos), @@ -1227,12 +1339,23 @@ impl RichtextState { self.cursor_cache .record_cursor(entity_index, PosType::Entity, c, &self.tree); if !self.has_styles() { - self.cursor_cache - .record_entity_index(pos, pos_type, entity_index, c, &self.tree); + if let Err(pos) = self.cursor_cache.record_entity_index( + pos, + pos_type, + entity_index, + c, + &self.tree, + ) { + return match pos_type { + PosType::Bytes => Err(LoroError::UTF8InUnicodeCodePoint { pos: pos }), + PosType::Utf16 => Err(LoroError::UTF16InUnicodeCodePoint { pos: pos }), + _ => unreachable!(), + }; + } } } - entity_index + Ok(entity_index) } fn has_styles(&self) -> bool { @@ -1251,8 +1374,12 @@ impl RichtextState { return (0..0, None); } - let start = self.get_entity_index_for_text_insert(range.start, pos_type); - let end = self.get_entity_index_for_text_insert(range.end, pos_type); + let start = self + .get_entity_index_for_text_insert(range.start, pos_type) + .unwrap(); + let end = self + .get_entity_index_for_text_insert(range.end, pos_type) + .unwrap(); if self.has_styles() { ( start..end, @@ -1656,22 +1783,25 @@ impl RichtextState { pos: usize, len: usize, pos_type: PosType, - ) -> Vec { + ) -> LoroResult> { if self.tree.is_empty() { - return Vec::new(); + return Ok(Vec::new()); } if len == 0 { - return Vec::new(); + return Ok(Vec::new()); } if pos + len > self.len(pos_type) { - return Vec::new(); + return Ok(Vec::new()); } let mut ans: Vec = Vec::new(); let (start, end) = match pos_type { - PosType::Bytes => todo!(), + PosType::Bytes => ( + self.tree.query::(&pos).unwrap().cursor, + self.tree.query::(&(pos + len)).unwrap().cursor, + ), PosType::Unicode => ( self.tree.query::(&pos).unwrap().cursor, self.tree @@ -1735,7 +1865,7 @@ impl RichtextState { } } - ans + Ok(ans) } // PERF: can be splitted into two methods. One is without cursor_to_event_index @@ -2272,7 +2402,7 @@ impl RichtextState { pos: usize, kind: PosType, ) -> Option { - let v = &self.get_text_entity_ranges(pos, 1, kind); + let v = &self.get_text_entity_ranges(pos, 1, kind).unwrap(); let a = v.first()?; Some(a.id_start) } @@ -2395,7 +2525,9 @@ mod test { { let state = &mut self.state; let text = self.bytes.slice(start..); - let entity_index = state.get_entity_index_for_text_insert(pos, PosType::Unicode); + let entity_index = state + .get_entity_index_for_text_insert(pos, PosType::Unicode) + .unwrap(); state.insert_at_entity_index(entity_index, text, IdFull::new(0, 0, 0)); }; } @@ -2403,7 +2535,8 @@ mod test { fn delete(&mut self, pos: usize, len: usize) { let ranges = self .state - .get_text_entity_ranges(pos, len, PosType::Unicode); + .get_text_entity_ranges(pos, len, PosType::Unicode) + .unwrap(); for range in ranges.into_iter().rev() { self.state.drain_by_entity_index( range.entity_start, @@ -2416,10 +2549,12 @@ mod test { fn mark(&mut self, range: Range, style: Arc) { let start = self .state - .get_entity_index_for_text_insert(range.start, PosType::Unicode); + .get_entity_index_for_text_insert(range.start, PosType::Unicode) + .unwrap(); let end = self .state - .get_entity_index_for_text_insert(range.end, PosType::Unicode); + .get_entity_index_for_text_insert(range.end, PosType::Unicode) + .unwrap(); self.state.mark_with_entity_index(start..end, style); } } diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 7687415b..d53bd8e2 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -7,7 +7,7 @@ use crate::{ richtext::{richtext_state::PosType, RichtextState, StyleOp, TextStyleInfoFlag}, }, cursor::{Cursor, Side}, - delta::{DeltaItem, StyleMeta, TreeExternalDiff}, + delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff}, event::{Diff, TextDiffItem}, op::ListSlice, state::{ContainerState, IndexType, State}, @@ -16,7 +16,7 @@ use crate::{ }; use append_only_bytes::BytesSlice; use enum_as_inner::EnumAsInner; -use fxhash::{FxHashMap, FxHashSet}; +use fxhash::FxHashMap; use generic_btree::rle::HasLength; use loro_common::{ ContainerID, ContainerType, IdFull, InternalString, LoroError, LoroResult, LoroValue, TreeID, @@ -31,7 +31,8 @@ use std::{ ops::Deref, sync::{Arc, Mutex, Weak}, }; -use tracing::{debug, error, info, instrument, trace}; + +use tracing::{debug, error, info, instrument, Event}; mod tree; pub use tree::TreeHandler; @@ -1337,7 +1338,8 @@ impl TextHandler { let mut t = t.try_lock().unwrap(); let index = t .value - .get_entity_index_for_text_insert(pos, PosType::Event); + .get_entity_index_for_text_insert(pos, PosType::Event) + .unwrap(); t.value.insert_at_entity_index( index, BytesSlice::from_bytes(s.as_bytes()), @@ -1349,16 +1351,89 @@ impl TextHandler { } } + pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> { + match &self.inner { + MaybeDetached::Detached(t) => { + let mut t = t.try_lock().unwrap(); + let index = t + .value + .get_entity_index_for_text_insert(pos, PosType::Bytes) + .unwrap(); + t.value.insert_at_entity_index( + index, + BytesSlice::from_bytes(s.as_bytes()), + IdFull::NONE_ID, + ); + Ok(()) + } + MaybeDetached::Attached(a) => a.with_txn(|txn| self.insert_with_txn_utf8(txn, pos, s)), + } + } + /// `pos` is a Event Index: /// /// - if feature="wasm", pos is a UTF-16 index /// - if feature!="wasm", pos is a Unicode index pub fn insert_with_txn(&self, txn: &mut Transaction, pos: usize, s: &str) -> LoroResult<()> { - self.insert_with_txn_and_attr(txn, pos, s, None)?; + self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Event)?; Ok(()) } - /// If attr is specified, it will be used as the attribute of the inserted text. + pub fn insert_with_txn_utf8( + &self, + txn: &mut Transaction, + pos: usize, + s: &str, + ) -> LoroResult<()> { + self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Bytes)?; + Ok(()) + } + + /// `pos` is a Event Index: + /// + /// - if feature="wasm", pos is a UTF-16 index + /// - if feature!="wasm", pos is a Unicode index + /// + /// This method requires auto_commit to be enabled. + pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> { + match &self.inner { + MaybeDetached::Detached(t) => { + let mut t = t.try_lock().unwrap(); + let ranges = t + .value + .get_text_entity_ranges(pos, len, PosType::Event) + .unwrap(); + for range in ranges.iter().rev() { + t.value + .drain_by_entity_index(range.entity_start, range.entity_len(), None); + } + Ok(()) + } + MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)), + } + } + + pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> { + match &self.inner { + MaybeDetached::Detached(t) => { + let mut t = t.try_lock().unwrap(); + let ranges = match t.value.get_text_entity_ranges(pos, len, PosType::Bytes) { + Err(x) => return Err(x), + Ok(x) => x, + }; + for range in ranges.iter().rev() { + t.value + .drain_by_entity_index(range.entity_start, range.entity_len(), None); + } + Ok(()) + } + MaybeDetached::Attached(a) => { + a.with_txn(|txn| self.delete_with_txn_utf8(txn, pos, len)) + } + } + } + + /// If attr is specified, it will be used as the at tribute of the inserted text. /// It will override the existing attribute of the text. fn insert_with_txn_and_attr( &self, @@ -1366,27 +1441,51 @@ impl TextHandler { pos: usize, s: &str, attr: Option<&FxHashMap>, + pos_type: PosType, ) -> Result, LoroError> { if s.is_empty() { return Ok(Vec::new()); } - if pos > self.len_event() { - return Err(LoroError::OutOfBound { - pos, - len: self.len_event(), - info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), - }); + match pos_type { + PosType::Event => { + if pos > self.len_event() { + return Err(LoroError::OutOfBound { + pos, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + } + PosType::Bytes => { + if pos > self.len_utf8() { + return Err(LoroError::OutOfBound { + pos, + len: self.len_utf8(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + } + _ => (), } let inner = self.inner.try_attached_state()?; let (entity_index, styles) = inner.with_state(|state| { let richtext_state = state.as_richtext_state_mut().unwrap(); - let pos = richtext_state.get_entity_index_for_text_insert(pos); + let pos = richtext_state.get_entity_index_for_text_insert(pos, pos_type); + let pos = match pos { + Err(_) => return (pos, StyleMeta::empty()), + Ok(x) => x, + }; let styles = richtext_state.get_styles_at_entity_index(pos); - (pos, styles) + (Ok(pos), styles) }); + let entity_index = match entity_index { + Err(x) => return Err(x), + _ => entity_index.unwrap(), + }; + let mut override_styles = Vec::new(); if let Some(attr) = attr { // current styles @@ -1442,50 +1541,66 @@ impl TextHandler { /// /// - if feature="wasm", pos is a UTF-16 index /// - if feature!="wasm", pos is a Unicode index - /// - /// This method requires auto_commit to be enabled. - pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> { - match &self.inner { - MaybeDetached::Detached(t) => { - let mut t = t.try_lock().unwrap(); - let ranges = t.value.get_text_entity_ranges(pos, len, PosType::Event); - for range in ranges.iter().rev() { - t.value - .drain_by_entity_index(range.entity_start, range.entity_len(), None); - } - Ok(()) - } - MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)), - } + pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> { + self.delete_with_txn_inline(txn, pos, len, PosType::Event) } - /// `pos` is a Event Index: - /// - /// - if feature="wasm", pos is a UTF-16 index - /// - if feature!="wasm", pos is a Unicode index - pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> { + pub fn delete_with_txn_utf8( + &self, + txn: &mut Transaction, + pos: usize, + len: usize, + ) -> LoroResult<()> { + self.delete_with_txn_inline(txn, pos, len, PosType::Bytes) + } + + fn delete_with_txn_inline( + &self, + txn: &mut Transaction, + pos: usize, + len: usize, + pos_type: PosType, + ) -> LoroResult<()> { if len == 0 { return Ok(()); } - if pos + len > self.len_event() { - error!("pos={} len={} len_event={}", pos, len, self.len_event()); - return Err(LoroError::OutOfBound { - pos: pos + len, - len: self.len_event(), - info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), - }); + match pos_type { + PosType::Event => { + if pos + len > self.len_event() { + error!("pos={} len={} len_event={}", pos, len, self.len_event()); + return Err(LoroError::OutOfBound { + pos: pos + len, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + } + PosType::Bytes => { + if pos + len > self.len_utf8() { + error!("pos={} len={} len_event={}", pos, len, self.len_event()); + return Err(LoroError::OutOfBound { + pos: pos + len, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + } + _ => (), } let inner = self.inner.try_attached_state()?; let s = tracing::span!(tracing::Level::INFO, "delete", "pos={} len={}", pos, len); let _e = s.enter(); - let ranges = inner.with_state(|state| { + let ranges = match inner.with_state(|state| { let richtext_state = state.as_richtext_state_mut().unwrap(); - richtext_state.get_text_entity_ranges_in_event_index_range(pos, len) - }); + richtext_state.get_text_entity_ranges_in_event_index_range(pos, len, pos_type) + }) { + Err(x) => return Err(x), + Ok(x) => x, + }; - debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::(), len); + //debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::(), len); let mut event_end = (pos + len) as isize; for range in ranges.iter().rev() { let event_start = event_end - range.event_len as isize; @@ -1749,6 +1864,7 @@ impl TextHandler { index, insert.as_str(), Some(attributes.as_ref().unwrap_or(&Default::default())), + PosType::Event, )?; for (key, value) in override_styles { @@ -3558,14 +3674,14 @@ pub mod counter { #[cfg(test)] mod test { + use super::{HandlerTrait, TextDelta}; + use crate::container::richtext::richtext_state::PosType; use crate::loro::LoroDoc; use crate::version::Frontiers; use crate::{fx_map, ToJson}; use loro_common::ID; use serde_json::json; - use super::{HandlerTrait, TextDelta}; - #[test] fn import() { let loro = LoroDoc::new(); diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index d20c25fd..02b138e9 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -5,7 +5,7 @@ use std::{ use fxhash::{FxHashMap, FxHashSet}; use generic_btree::rle::HasLength; -use loro_common::{ContainerID, InternalString, LoroResult, LoroValue, ID}; +use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID}; use loro_delta::DeltaRopeBuilder; use crate::{ @@ -743,10 +743,14 @@ impl RichtextState { } #[inline] - pub(crate) fn get_entity_index_for_text_insert(&mut self, event_index: usize) -> usize { + pub(crate) fn get_entity_index_for_text_insert( + &mut self, + event_index: usize, + pos_type: PosType, + ) -> Result { self.state .get_mut() - .get_entity_index_for_text_insert(event_index, PosType::Event) + .get_entity_index_for_text_insert(event_index, pos_type) } pub(crate) fn get_entity_range_and_styles_at_range( @@ -771,10 +775,11 @@ impl RichtextState { &mut self, pos: usize, len: usize, - ) -> Vec { + pos_type: PosType, + ) -> LoroResult> { self.state .get_mut() - .get_text_entity_ranges(pos, len, PosType::Event) + .get_text_entity_ranges(pos, len, pos_type) } #[inline] diff --git a/crates/loro-internal/tests/test.rs b/crates/loro-internal/tests/test.rs index 72fab027..c1269898 100644 --- a/crates/loro-internal/tests/test.rs +++ b/crates/loro-internal/tests/test.rs @@ -960,3 +960,147 @@ fn counter() { let doc2 = LoroDoc::new_auto_commit(); doc2.import_json_updates(json).unwrap(); } + +#[test] +fn test_insert_utf8() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "Hello ").unwrap(); + text.insert_utf8(6, "World").unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"Hello World"}]) + ) +} + +#[test] +fn test_insert_utf8_cross_unicode_1() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "你好").unwrap(); + text.insert_utf8(3, "World").unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"你World好"}]) + ) +} + +#[test] +fn test_insert_utf8_cross_unicode_2() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "你好").unwrap(); + text.insert_utf8(6, "World").unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"你好World"}]) + ) +} + +#[test] +fn test_insert_utf8_detached() { + let text = TextHandler::new_detached(); + text.insert_utf8(0, "Hello ").unwrap(); + text.insert_utf8(6, "World").unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"Hello World"}]) + ) +} + +#[test] +#[should_panic] +fn test_insert_utf8_panic_cross_unicode() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "你好").unwrap(); + text.insert_utf8(1, "World").unwrap(); +} + +#[test] +#[should_panic] +fn test_insert_utf8_panic_out_bound() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "Hello ").unwrap(); + text.insert_utf8(7, "World").unwrap(); +} + +// println!("{}", text.get_richtext_value().to_json_value().to_string()); + +#[test] +fn test_delete_utf8() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "Hello").unwrap(); + text.delete_utf8(1, 3).unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"Ho"}]) + ) +} + +#[test] +fn test_delete_utf8_with_zero_len() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "Hello").unwrap(); + text.delete_utf8(1, 0).unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"Hello"}]) + ) +} + +#[test] +fn test_delete_utf8_cross_unicode() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert_utf8(0, "你好").unwrap(); + text.delete_utf8(0, 3).unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"好"}]) + ) +} + +#[test] +fn test_delete_utf8_detached() { + let text = TextHandler::new_detached(); + text.insert_utf8(0, "Hello").unwrap(); + text.delete_utf8(1, 3).unwrap(); + assert_eq!( + text.get_richtext_value().to_json_value(), + json!([{"insert":"Ho"}]) + ) +} + +// WARNING: +// Due to the current inability to report an error on +// get_offset_and_found on BTree, this test won't be ok. +// #[test] +// #[should_panic] +// fn test_delete_utf8_panic_cross_unicode() { +// let doc = LoroDoc::new_auto_commit(); +// let text = doc.get_text("text"); +// text.insert_utf8(0, "你好").unwrap(); +// text.delete_utf8(0, 2).unwrap(); +// } + +#[test] +#[should_panic] +fn test_delete_utf8_panic_out_bound_pos() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "Hello").unwrap(); + text.delete_utf8(10, 1).unwrap(); +} + +#[test] +#[should_panic] +fn test_delete_utf8_panic_out_bound_len() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "Hello").unwrap(); + text.delete_utf8(1, 10).unwrap(); +} diff --git a/crates/loro-wasm/src/lib.rs b/crates/loro-wasm/src/lib.rs index 3b9bfb2f..7d77919b 100644 --- a/crates/loro-wasm/src/lib.rs +++ b/crates/loro-wasm/src/lib.rs @@ -1515,6 +1515,22 @@ impl LoroText { Ok(()) } + /// Insert some string at utf-8 index. + /// + /// @example + /// ```ts + /// import { Loro } from "loro-crdt"; + /// + /// const doc = new Loro(); + /// const text = doc.getText("text"); + /// text.insertUtf8(0, "Hello"); + /// ``` + #[wasm_bindgen(js_name = "insertUtf8")] + pub fn insert_utf8(&mut self, index: usize, content: &str) -> JsResult<()> { + self.handler.insert_utf8(index, content)?; + Ok(()) + } + /// Delete elements from index to index + len /// /// @example @@ -1533,6 +1549,25 @@ impl LoroText { Ok(()) } + /// Delete elements from index to utf-8 index + len + /// + /// @example + /// ```ts + /// import { Loro } from "loro-crdt"; + /// + /// const doc = new Loro(); + /// const text = doc.getText("text"); + /// text.insertUtf8(0, "Hello"); + /// text.deleteUtf8(1, 3); + /// const s = text.toString(); + /// console.log(s); // "Ho" + /// ``` + #[wasm_bindgen(js_name = "deleteUtf8")] + pub fn delete_utf8(&mut self, index: usize, len: usize) -> JsResult<()> { + self.handler.delete_utf8(index, len)?; + Ok(()) + } + /// Mark a range of text with a key and a value. /// /// > You should call `configTextStyle` before using `mark` and `unmark`. diff --git a/crates/loro/src/lib.rs b/crates/loro/src/lib.rs index ee7def81..c4c209c7 100644 --- a/crates/loro/src/lib.rs +++ b/crates/loro/src/lib.rs @@ -983,11 +983,21 @@ impl LoroText { self.handler.insert(pos, s) } + /// Insert a string at the given utf-8 position. + pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> { + self.handler.insert_utf8(pos, s) + } + /// Delete a range of text at the given unicode position with unicode length. pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> { self.handler.delete(pos, len) } + /// Delete a range of text at the given utf-8 position with utf-8 length. + pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> { + self.handler.delete_utf8(pos, len) + } + /// Whether the text container is empty. pub fn is_empty(&self) -> bool { self.handler.is_empty() diff --git a/loro-js/tests/richtext.test.ts b/loro-js/tests/richtext.test.ts index a0cbf3e4..085495eb 100644 --- a/loro-js/tests/richtext.test.ts +++ b/loro-js/tests/richtext.test.ts @@ -286,4 +286,20 @@ describe("richtext", () => { const text = doc.getText("text"); text.insert(0, `“aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`); }); + + it("Insert/delete by utf8 index", () => { + const doc = new Loro(); + const text = doc.getText('t'); + text.insert(0, "你好"); + text.insertUtf8(3, "a"); + text.insertUtf8(7, "b"); + expect(text.toDelta()).toStrictEqual([ + { insert: "你a好b" }, + ]); + text.deleteUtf8(3, 4); + expect(text.toDelta()).toStrictEqual([ + { insert: "你b"}, + ]); + + }); });