feat: add insert_utf8 and delete_utf8 for Rust Text API (#396)

* feat: add insert_utf8

* chore: merge insert functions

* fix: use utf8_to_unicode_index to reslove index

* fix: add bound-check and use unicode PosType

* feat: add delete_utf8

* perf: O(LogN) insert_utf8

* feat: add utf-16 cross unicode check

* perf: O(LogN) delete_utf8

* chore: add api

* chore: remove unused function

* fix: api name and bindgen name

* test: add utf8 js test

---------

Co-authored-by: Zixuan Chen <remch183@outlook.com>
This commit is contained in:
东灯 2024-07-10 10:20:08 +08:00 committed by GitHub
parent 9eaaaeada9
commit 86c760abd0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 545 additions and 81 deletions

View file

@ -1,4 +1,3 @@
use serde_columnar::ColumnarError;
use thiserror::Error;
@ -69,6 +68,10 @@ pub enum LoroError {
UndoWithDifferentPeerId { expected: PeerID, actual: PeerID },
#[error("The input JSON schema is invalid")]
InvalidJsonSchema,
#[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")]
UTF8InUnicodeCodePoint { pos: usize },
#[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")]
UTF16InUnicodeCodePoint { pos: usize },
}
#[derive(Error, Debug)]

View file

@ -4,7 +4,10 @@ use generic_btree::{
rle::{CanRemove, HasLength, Mergeable, Sliceable, TryInsert},
BTree, BTreeTrait, Cursor,
};
use loro_common::{Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroValue, ID};
use loro_common::{
Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroError, LoroResult, LoroValue, ID,
};
use query::{ByteQuery, ByteQueryT};
use serde::{ser::SerializeStruct, Serialize};
use std::{
fmt::{Display, Formatter},
@ -118,6 +121,11 @@ mod text_chunk {
self.unicode_len
}
#[inline]
pub fn utf8_len(&self) -> i32 {
self.bytes.len() as i32
}
#[inline]
pub fn unicode_len(&self) -> i32 {
self.unicode_len
@ -636,8 +644,7 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
let mut current_utf16_index = 0;
let mut current_unicode_index = 0;
for (i, c) in s.chars().enumerate() {
let len = c.len_utf16();
current_utf16_index += len;
current_utf16_index += c.len_utf16();
if current_utf16_index == utf16_index {
return Ok(i + 1);
}
@ -652,9 +659,38 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
Err(current_unicode_index)
}
pub(crate) fn utf8_to_unicode_index(s: &str, utf8_index: usize) -> Result<usize, usize> {
if utf8_index == 0 {
return Ok(0);
}
let mut current_utf8_index = 0;
let mut current_unicode_index = 0;
for (i, c) in s.chars().enumerate() {
let char_start = current_utf8_index;
current_utf8_index += c.len_utf8();
if utf8_index == char_start {
return Ok(i);
}
if utf8_index < current_utf8_index {
tracing::info!("WARNING: UTF-8 index is in the middle of a codepoint!");
return Err(i);
}
current_unicode_index = i + 1;
}
if current_utf8_index == utf8_index {
Ok(current_unicode_index)
} else {
Err(current_unicode_index)
}
}
fn pos_to_unicode_index(s: &str, pos: usize, kind: PosType) -> Option<usize> {
match kind {
PosType::Bytes => todo!(),
PosType::Bytes => utf8_to_unicode_index(s, pos).ok(),
PosType::Unicode => Some(pos),
PosType::Utf16 => utf16_to_unicode_index(s, pos).ok(),
PosType::Entity => Some(pos),
@ -910,6 +946,7 @@ mod query {
// Allow left to not at the correct utf16 boundary. If so fallback to the last position.
// TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
// WARNING: Unable to report error!!!
let offset = utf16_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
(offset, true)
}
@ -963,13 +1000,55 @@ mod query {
cache.entity_len as usize
}
}
pub(super) struct ByteQueryT;
pub(super) type ByteQuery = IndexQuery<ByteQueryT, RichtextTreeTrait>;
impl QueryByLen<RichtextTreeTrait> for ByteQueryT {
fn get_cache_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
cache.bytes as usize
}
fn get_elem_len(elem: &<RichtextTreeTrait as BTreeTrait>::Elem) -> usize {
match elem {
RichtextStateChunk::Text(s) => s.utf8_len() as usize,
RichtextStateChunk::Style { .. } => 0,
}
}
fn get_offset_and_found(
left: usize,
elem: &<RichtextTreeTrait as BTreeTrait>::Elem,
) -> (usize, bool) {
match elem {
RichtextStateChunk::Text(s) => {
if left == 0 {
return (0, true);
}
// Allow left to not at the correct utf16 boundary. If so fallback to the last position.
// TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
// WARNING: Unable to report error!!!
let offset = utf8_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
(offset, true)
}
RichtextStateChunk::Style { .. } => (1, false),
}
}
fn get_cache_entity_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
cache.entity_len as usize
}
}
}
mod cursor_cache {
use std::sync::atomic::AtomicUsize;
use super::{pos_to_unicode_index, unicode_to_utf16_index, PosType, RichtextTreeTrait};
use super::{
pos_to_unicode_index, unicode_to_utf16_index, unicode_to_utf8_index, PosType,
RichtextTreeTrait,
};
use generic_btree::{rle::HasLength, BTree, Cursor, LeafIndex};
use loro_common::LoroError;
#[derive(Debug, Clone)]
struct CursorCacheItem {
@ -1038,9 +1117,34 @@ mod cursor_cache {
entity_index: usize,
cursor: Cursor,
tree: &BTree<RichtextTreeTrait>,
) {
) -> Result<(), usize> {
match kind {
PosType::Bytes => todo!(),
PosType::Bytes => {
if cursor.offset == 0 {
self.entity = Some(EntityIndexCacheItem {
pos,
pos_type: kind,
entity_index,
leaf: cursor.leaf,
});
} else {
let elem = tree.get_elem(cursor.leaf).unwrap();
let Some(s) = elem.as_str() else {
return Ok(());
};
let utf8offset = unicode_to_utf8_index(s, cursor.offset).unwrap();
if pos < utf8offset {
return Err(pos);
}
self.entity = Some(EntityIndexCacheItem {
pos: pos - utf8offset,
pos_type: kind,
entity_index: entity_index - cursor.offset,
leaf: cursor.leaf,
});
}
Ok(())
}
PosType::Unicode | PosType::Entity => {
self.entity = Some(EntityIndexCacheItem {
pos: pos - cursor.offset,
@ -1048,6 +1152,7 @@ mod cursor_cache {
entity_index: entity_index - cursor.offset,
leaf: cursor.leaf,
});
Ok(())
}
PosType::Event if cfg!(not(feature = "wasm")) => {
self.entity = Some(EntityIndexCacheItem {
@ -1056,6 +1161,7 @@ mod cursor_cache {
entity_index: entity_index - cursor.offset,
leaf: cursor.leaf,
});
Ok(())
}
_ => {
// utf16
@ -1068,8 +1174,13 @@ mod cursor_cache {
});
} else {
let elem = tree.get_elem(cursor.leaf).unwrap();
let Some(s) = elem.as_str() else { return };
let Some(s) = elem.as_str() else {
return Ok(());
};
let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap();
if pos < utf16offset {
return Err(pos);
}
self.entity = Some(EntityIndexCacheItem {
pos: pos - utf16offset,
pos_type: kind,
@ -1077,6 +1188,7 @@ mod cursor_cache {
leaf: cursor.leaf,
});
}
Ok(())
}
}
}
@ -1196,9 +1308,9 @@ impl RichtextState {
&mut self,
pos: usize,
pos_type: PosType,
) -> usize {
) -> Result<usize, LoroError> {
if self.tree.is_empty() {
return 0;
return Ok(0);
}
if let Some(pos) =
@ -1211,11 +1323,11 @@ impl RichtextState {
&self.tree,
&self.cursor_cache
);
return pos;
return Ok(pos);
}
let (c, entity_index) = match pos_type {
PosType::Bytes => todo!(),
PosType::Bytes => self.find_best_insert_pos::<ByteQueryT>(pos),
PosType::Unicode => self.find_best_insert_pos::<UnicodeQueryT>(pos),
PosType::Utf16 => self.find_best_insert_pos::<Utf16QueryT>(pos),
PosType::Entity => self.find_best_insert_pos::<EntityQueryT>(pos),
@ -1227,12 +1339,23 @@ impl RichtextState {
self.cursor_cache
.record_cursor(entity_index, PosType::Entity, c, &self.tree);
if !self.has_styles() {
self.cursor_cache
.record_entity_index(pos, pos_type, entity_index, c, &self.tree);
if let Err(pos) = self.cursor_cache.record_entity_index(
pos,
pos_type,
entity_index,
c,
&self.tree,
) {
return match pos_type {
PosType::Bytes => Err(LoroError::UTF8InUnicodeCodePoint { pos: pos }),
PosType::Utf16 => Err(LoroError::UTF16InUnicodeCodePoint { pos: pos }),
_ => unreachable!(),
};
}
}
}
entity_index
Ok(entity_index)
}
fn has_styles(&self) -> bool {
@ -1251,8 +1374,12 @@ impl RichtextState {
return (0..0, None);
}
let start = self.get_entity_index_for_text_insert(range.start, pos_type);
let end = self.get_entity_index_for_text_insert(range.end, pos_type);
let start = self
.get_entity_index_for_text_insert(range.start, pos_type)
.unwrap();
let end = self
.get_entity_index_for_text_insert(range.end, pos_type)
.unwrap();
if self.has_styles() {
(
start..end,
@ -1656,22 +1783,25 @@ impl RichtextState {
pos: usize,
len: usize,
pos_type: PosType,
) -> Vec<EntityRangeInfo> {
) -> LoroResult<Vec<EntityRangeInfo>> {
if self.tree.is_empty() {
return Vec::new();
return Ok(Vec::new());
}
if len == 0 {
return Vec::new();
return Ok(Vec::new());
}
if pos + len > self.len(pos_type) {
return Vec::new();
return Ok(Vec::new());
}
let mut ans: Vec<EntityRangeInfo> = Vec::new();
let (start, end) = match pos_type {
PosType::Bytes => todo!(),
PosType::Bytes => (
self.tree.query::<ByteQuery>(&pos).unwrap().cursor,
self.tree.query::<ByteQuery>(&(pos + len)).unwrap().cursor,
),
PosType::Unicode => (
self.tree.query::<UnicodeQuery>(&pos).unwrap().cursor,
self.tree
@ -1735,7 +1865,7 @@ impl RichtextState {
}
}
ans
Ok(ans)
}
// PERF: can be splitted into two methods. One is without cursor_to_event_index
@ -2272,7 +2402,7 @@ impl RichtextState {
pos: usize,
kind: PosType,
) -> Option<ID> {
let v = &self.get_text_entity_ranges(pos, 1, kind);
let v = &self.get_text_entity_ranges(pos, 1, kind).unwrap();
let a = v.first()?;
Some(a.id_start)
}
@ -2395,7 +2525,9 @@ mod test {
{
let state = &mut self.state;
let text = self.bytes.slice(start..);
let entity_index = state.get_entity_index_for_text_insert(pos, PosType::Unicode);
let entity_index = state
.get_entity_index_for_text_insert(pos, PosType::Unicode)
.unwrap();
state.insert_at_entity_index(entity_index, text, IdFull::new(0, 0, 0));
};
}
@ -2403,7 +2535,8 @@ mod test {
fn delete(&mut self, pos: usize, len: usize) {
let ranges = self
.state
.get_text_entity_ranges(pos, len, PosType::Unicode);
.get_text_entity_ranges(pos, len, PosType::Unicode)
.unwrap();
for range in ranges.into_iter().rev() {
self.state.drain_by_entity_index(
range.entity_start,
@ -2416,10 +2549,12 @@ mod test {
fn mark(&mut self, range: Range<usize>, style: Arc<StyleOp>) {
let start = self
.state
.get_entity_index_for_text_insert(range.start, PosType::Unicode);
.get_entity_index_for_text_insert(range.start, PosType::Unicode)
.unwrap();
let end = self
.state
.get_entity_index_for_text_insert(range.end, PosType::Unicode);
.get_entity_index_for_text_insert(range.end, PosType::Unicode)
.unwrap();
self.state.mark_with_entity_index(start..end, style);
}
}

View file

@ -7,7 +7,7 @@ use crate::{
richtext::{richtext_state::PosType, RichtextState, StyleOp, TextStyleInfoFlag},
},
cursor::{Cursor, Side},
delta::{DeltaItem, StyleMeta, TreeExternalDiff},
delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff},
event::{Diff, TextDiffItem},
op::ListSlice,
state::{ContainerState, IndexType, State},
@ -16,7 +16,7 @@ use crate::{
};
use append_only_bytes::BytesSlice;
use enum_as_inner::EnumAsInner;
use fxhash::{FxHashMap, FxHashSet};
use fxhash::FxHashMap;
use generic_btree::rle::HasLength;
use loro_common::{
ContainerID, ContainerType, IdFull, InternalString, LoroError, LoroResult, LoroValue, TreeID,
@ -31,7 +31,8 @@ use std::{
ops::Deref,
sync::{Arc, Mutex, Weak},
};
use tracing::{debug, error, info, instrument, trace};
use tracing::{debug, error, info, instrument, Event};
mod tree;
pub use tree::TreeHandler;
@ -1337,7 +1338,8 @@ impl TextHandler {
let mut t = t.try_lock().unwrap();
let index = t
.value
.get_entity_index_for_text_insert(pos, PosType::Event);
.get_entity_index_for_text_insert(pos, PosType::Event)
.unwrap();
t.value.insert_at_entity_index(
index,
BytesSlice::from_bytes(s.as_bytes()),
@ -1349,16 +1351,89 @@ impl TextHandler {
}
}
pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
match &self.inner {
MaybeDetached::Detached(t) => {
let mut t = t.try_lock().unwrap();
let index = t
.value
.get_entity_index_for_text_insert(pos, PosType::Bytes)
.unwrap();
t.value.insert_at_entity_index(
index,
BytesSlice::from_bytes(s.as_bytes()),
IdFull::NONE_ID,
);
Ok(())
}
MaybeDetached::Attached(a) => a.with_txn(|txn| self.insert_with_txn_utf8(txn, pos, s)),
}
}
/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
pub fn insert_with_txn(&self, txn: &mut Transaction, pos: usize, s: &str) -> LoroResult<()> {
self.insert_with_txn_and_attr(txn, pos, s, None)?;
self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Event)?;
Ok(())
}
/// If attr is specified, it will be used as the attribute of the inserted text.
pub fn insert_with_txn_utf8(
&self,
txn: &mut Transaction,
pos: usize,
s: &str,
) -> LoroResult<()> {
self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Bytes)?;
Ok(())
}
/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
///
/// This method requires auto_commit to be enabled.
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
match &self.inner {
MaybeDetached::Detached(t) => {
let mut t = t.try_lock().unwrap();
let ranges = t
.value
.get_text_entity_ranges(pos, len, PosType::Event)
.unwrap();
for range in ranges.iter().rev() {
t.value
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
}
Ok(())
}
MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
}
}
pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
match &self.inner {
MaybeDetached::Detached(t) => {
let mut t = t.try_lock().unwrap();
let ranges = match t.value.get_text_entity_ranges(pos, len, PosType::Bytes) {
Err(x) => return Err(x),
Ok(x) => x,
};
for range in ranges.iter().rev() {
t.value
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
}
Ok(())
}
MaybeDetached::Attached(a) => {
a.with_txn(|txn| self.delete_with_txn_utf8(txn, pos, len))
}
}
}
/// If attr is specified, it will be used as the at tribute of the inserted text.
/// It will override the existing attribute of the text.
fn insert_with_txn_and_attr(
&self,
@ -1366,27 +1441,51 @@ impl TextHandler {
pos: usize,
s: &str,
attr: Option<&FxHashMap<String, LoroValue>>,
pos_type: PosType,
) -> Result<Vec<(InternalString, LoroValue)>, LoroError> {
if s.is_empty() {
return Ok(Vec::new());
}
if pos > self.len_event() {
return Err(LoroError::OutOfBound {
pos,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
match pos_type {
PosType::Event => {
if pos > self.len_event() {
return Err(LoroError::OutOfBound {
pos,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}
}
PosType::Bytes => {
if pos > self.len_utf8() {
return Err(LoroError::OutOfBound {
pos,
len: self.len_utf8(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}
}
_ => (),
}
let inner = self.inner.try_attached_state()?;
let (entity_index, styles) = inner.with_state(|state| {
let richtext_state = state.as_richtext_state_mut().unwrap();
let pos = richtext_state.get_entity_index_for_text_insert(pos);
let pos = richtext_state.get_entity_index_for_text_insert(pos, pos_type);
let pos = match pos {
Err(_) => return (pos, StyleMeta::empty()),
Ok(x) => x,
};
let styles = richtext_state.get_styles_at_entity_index(pos);
(pos, styles)
(Ok(pos), styles)
});
let entity_index = match entity_index {
Err(x) => return Err(x),
_ => entity_index.unwrap(),
};
let mut override_styles = Vec::new();
if let Some(attr) = attr {
// current styles
@ -1442,50 +1541,66 @@ impl TextHandler {
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
///
/// This method requires auto_commit to be enabled.
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
match &self.inner {
MaybeDetached::Detached(t) => {
let mut t = t.try_lock().unwrap();
let ranges = t.value.get_text_entity_ranges(pos, len, PosType::Event);
for range in ranges.iter().rev() {
t.value
.drain_by_entity_index(range.entity_start, range.entity_len(), None);
}
Ok(())
}
MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
}
pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
self.delete_with_txn_inline(txn, pos, len, PosType::Event)
}
/// `pos` is a Event Index:
///
/// - if feature="wasm", pos is a UTF-16 index
/// - if feature!="wasm", pos is a Unicode index
pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
pub fn delete_with_txn_utf8(
&self,
txn: &mut Transaction,
pos: usize,
len: usize,
) -> LoroResult<()> {
self.delete_with_txn_inline(txn, pos, len, PosType::Bytes)
}
fn delete_with_txn_inline(
&self,
txn: &mut Transaction,
pos: usize,
len: usize,
pos_type: PosType,
) -> LoroResult<()> {
if len == 0 {
return Ok(());
}
if pos + len > self.len_event() {
error!("pos={} len={} len_event={}", pos, len, self.len_event());
return Err(LoroError::OutOfBound {
pos: pos + len,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
match pos_type {
PosType::Event => {
if pos + len > self.len_event() {
error!("pos={} len={} len_event={}", pos, len, self.len_event());
return Err(LoroError::OutOfBound {
pos: pos + len,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}
}
PosType::Bytes => {
if pos + len > self.len_utf8() {
error!("pos={} len={} len_event={}", pos, len, self.len_event());
return Err(LoroError::OutOfBound {
pos: pos + len,
len: self.len_event(),
info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
});
}
}
_ => (),
}
let inner = self.inner.try_attached_state()?;
let s = tracing::span!(tracing::Level::INFO, "delete", "pos={} len={}", pos, len);
let _e = s.enter();
let ranges = inner.with_state(|state| {
let ranges = match inner.with_state(|state| {
let richtext_state = state.as_richtext_state_mut().unwrap();
richtext_state.get_text_entity_ranges_in_event_index_range(pos, len)
});
richtext_state.get_text_entity_ranges_in_event_index_range(pos, len, pos_type)
}) {
Err(x) => return Err(x),
Ok(x) => x,
};
debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
//debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
let mut event_end = (pos + len) as isize;
for range in ranges.iter().rev() {
let event_start = event_end - range.event_len as isize;
@ -1749,6 +1864,7 @@ impl TextHandler {
index,
insert.as_str(),
Some(attributes.as_ref().unwrap_or(&Default::default())),
PosType::Event,
)?;
for (key, value) in override_styles {
@ -3558,14 +3674,14 @@ pub mod counter {
#[cfg(test)]
mod test {
use super::{HandlerTrait, TextDelta};
use crate::container::richtext::richtext_state::PosType;
use crate::loro::LoroDoc;
use crate::version::Frontiers;
use crate::{fx_map, ToJson};
use loro_common::ID;
use serde_json::json;
use super::{HandlerTrait, TextDelta};
#[test]
fn import() {
let loro = LoroDoc::new();

View file

@ -5,7 +5,7 @@ use std::{
use fxhash::{FxHashMap, FxHashSet};
use generic_btree::rle::HasLength;
use loro_common::{ContainerID, InternalString, LoroResult, LoroValue, ID};
use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID};
use loro_delta::DeltaRopeBuilder;
use crate::{
@ -743,10 +743,14 @@ impl RichtextState {
}
#[inline]
pub(crate) fn get_entity_index_for_text_insert(&mut self, event_index: usize) -> usize {
pub(crate) fn get_entity_index_for_text_insert(
&mut self,
event_index: usize,
pos_type: PosType,
) -> Result<usize, LoroError> {
self.state
.get_mut()
.get_entity_index_for_text_insert(event_index, PosType::Event)
.get_entity_index_for_text_insert(event_index, pos_type)
}
pub(crate) fn get_entity_range_and_styles_at_range(
@ -771,10 +775,11 @@ impl RichtextState {
&mut self,
pos: usize,
len: usize,
) -> Vec<EntityRangeInfo> {
pos_type: PosType,
) -> LoroResult<Vec<EntityRangeInfo>> {
self.state
.get_mut()
.get_text_entity_ranges(pos, len, PosType::Event)
.get_text_entity_ranges(pos, len, pos_type)
}
#[inline]

View file

@ -960,3 +960,147 @@ fn counter() {
let doc2 = LoroDoc::new_auto_commit();
doc2.import_json_updates(json).unwrap();
}
#[test]
fn test_insert_utf8() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "Hello ").unwrap();
text.insert_utf8(6, "World").unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"Hello World"}])
)
}
#[test]
fn test_insert_utf8_cross_unicode_1() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "你好").unwrap();
text.insert_utf8(3, "World").unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"你World好"}])
)
}
#[test]
fn test_insert_utf8_cross_unicode_2() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "你好").unwrap();
text.insert_utf8(6, "World").unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"你好World"}])
)
}
#[test]
fn test_insert_utf8_detached() {
let text = TextHandler::new_detached();
text.insert_utf8(0, "Hello ").unwrap();
text.insert_utf8(6, "World").unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"Hello World"}])
)
}
#[test]
#[should_panic]
fn test_insert_utf8_panic_cross_unicode() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "你好").unwrap();
text.insert_utf8(1, "World").unwrap();
}
#[test]
#[should_panic]
fn test_insert_utf8_panic_out_bound() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "Hello ").unwrap();
text.insert_utf8(7, "World").unwrap();
}
// println!("{}", text.get_richtext_value().to_json_value().to_string());
#[test]
fn test_delete_utf8() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "Hello").unwrap();
text.delete_utf8(1, 3).unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"Ho"}])
)
}
#[test]
fn test_delete_utf8_with_zero_len() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "Hello").unwrap();
text.delete_utf8(1, 0).unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"Hello"}])
)
}
#[test]
fn test_delete_utf8_cross_unicode() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert_utf8(0, "你好").unwrap();
text.delete_utf8(0, 3).unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":""}])
)
}
#[test]
fn test_delete_utf8_detached() {
let text = TextHandler::new_detached();
text.insert_utf8(0, "Hello").unwrap();
text.delete_utf8(1, 3).unwrap();
assert_eq!(
text.get_richtext_value().to_json_value(),
json!([{"insert":"Ho"}])
)
}
// WARNING:
// Due to the current inability to report an error on
// get_offset_and_found on BTree, this test won't be ok.
// #[test]
// #[should_panic]
// fn test_delete_utf8_panic_cross_unicode() {
// let doc = LoroDoc::new_auto_commit();
// let text = doc.get_text("text");
// text.insert_utf8(0, "你好").unwrap();
// text.delete_utf8(0, 2).unwrap();
// }
#[test]
#[should_panic]
fn test_delete_utf8_panic_out_bound_pos() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "Hello").unwrap();
text.delete_utf8(10, 1).unwrap();
}
#[test]
#[should_panic]
fn test_delete_utf8_panic_out_bound_len() {
let doc = LoroDoc::new_auto_commit();
let text = doc.get_text("text");
text.insert(0, "Hello").unwrap();
text.delete_utf8(1, 10).unwrap();
}

View file

@ -1515,6 +1515,22 @@ impl LoroText {
Ok(())
}
/// Insert some string at utf-8 index.
///
/// @example
/// ```ts
/// import { Loro } from "loro-crdt";
///
/// const doc = new Loro();
/// const text = doc.getText("text");
/// text.insertUtf8(0, "Hello");
/// ```
#[wasm_bindgen(js_name = "insertUtf8")]
pub fn insert_utf8(&mut self, index: usize, content: &str) -> JsResult<()> {
self.handler.insert_utf8(index, content)?;
Ok(())
}
/// Delete elements from index to index + len
///
/// @example
@ -1533,6 +1549,25 @@ impl LoroText {
Ok(())
}
/// Delete elements from index to utf-8 index + len
///
/// @example
/// ```ts
/// import { Loro } from "loro-crdt";
///
/// const doc = new Loro();
/// const text = doc.getText("text");
/// text.insertUtf8(0, "Hello");
/// text.deleteUtf8(1, 3);
/// const s = text.toString();
/// console.log(s); // "Ho"
/// ```
#[wasm_bindgen(js_name = "deleteUtf8")]
pub fn delete_utf8(&mut self, index: usize, len: usize) -> JsResult<()> {
self.handler.delete_utf8(index, len)?;
Ok(())
}
/// Mark a range of text with a key and a value.
///
/// > You should call `configTextStyle` before using `mark` and `unmark`.

View file

@ -983,11 +983,21 @@ impl LoroText {
self.handler.insert(pos, s)
}
/// Insert a string at the given utf-8 position.
pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
self.handler.insert_utf8(pos, s)
}
/// Delete a range of text at the given unicode position with unicode length.
pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
self.handler.delete(pos, len)
}
/// Delete a range of text at the given utf-8 position with utf-8 length.
pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
self.handler.delete_utf8(pos, len)
}
/// Whether the text container is empty.
pub fn is_empty(&self) -> bool {
self.handler.is_empty()

View file

@ -286,4 +286,20 @@ describe("richtext", () => {
const text = doc.getText("text");
text.insert(0, `“aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);
});
it("Insert/delete by utf8 index", () => {
const doc = new Loro();
const text = doc.getText('t');
text.insert(0, "你好");
text.insertUtf8(3, "a");
text.insertUtf8(7, "b");
expect(text.toDelta()).toStrictEqual([
{ insert: "你a好b" },
]);
text.deleteUtf8(3, 4);
expect(text.toDelta()).toStrictEqual([
{ insert: "你b"},
]);
});
});