feat: add insert_utf8 and delete_utf8 for Rust Text API (#396)

* feat: add insert_utf8 * chore: merge insert functions * fix: use utf8_to_unicode_index to reslove index * fix: add bound-check and use unicode PosType * feat: add delete_utf8 * perf: O(LogN) insert_utf8 * feat: add utf-16 cross unicode check * perf: O(LogN) delete_utf8 * chore: add api * chore: remove unused function * fix: api name and bindgen name * test: add utf8 js test --------- Co-authored-by: Zixuan Chen <remch183@outlook.com>
2024-11-28 09:25:36 +00:00 · 2024-07-10 10:20:08 +08:00 · 2024-07-10 10:20:08 +08:00 · 86c760abd0
commit 86c760abd0
parent 9eaaaeada9
8 changed files with 545 additions and 81 deletions
--- a/crates/loro-common/src/error.rs
+++ b/crates/loro-common/src/error.rs
@ -1,4 +1,3 @@
-
 use serde_columnar::ColumnarError;
 use thiserror::Error;

@ -69,6 +68,10 @@ pub enum LoroError {
    UndoWithDifferentPeerId { expected: PeerID, actual: PeerID },
    #[error("The input JSON schema is invalid")]
    InvalidJsonSchema,
+    #[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")]
+    UTF8InUnicodeCodePoint { pos: usize },
+    #[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")]
+    UTF16InUnicodeCodePoint { pos: usize },
 }

 #[derive(Error, Debug)]
--- a/crates/loro-internal/src/container/richtext/richtext_state.rs
+++ b/crates/loro-internal/src/container/richtext/richtext_state.rs
@ -4,7 +4,10 @@ use generic_btree::{
    rle::{CanRemove, HasLength, Mergeable, Sliceable, TryInsert},
    BTree, BTreeTrait, Cursor,
 };
-use loro_common::{Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroValue, ID};
+use loro_common::{
+    Counter, IdFull, IdLpSpan, IdSpan, Lamport, LoroError, LoroResult, LoroValue, ID,
+};
+use query::{ByteQuery, ByteQueryT};
 use serde::{ser::SerializeStruct, Serialize};
 use std::{
    fmt::{Display, Formatter},
@ -118,6 +121,11 @@ mod text_chunk {
            self.unicode_len
        }

+        #[inline]
+        pub fn utf8_len(&self) -> i32 {
+            self.bytes.len() as i32
+        }
+
        #[inline]
        pub fn unicode_len(&self) -> i32 {
            self.unicode_len
@ -636,8 +644,7 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
    let mut current_utf16_index = 0;
    let mut current_unicode_index = 0;
    for (i, c) in s.chars().enumerate() {
-        let len = c.len_utf16();
-        current_utf16_index += len;
+        current_utf16_index += c.len_utf16();
        if current_utf16_index == utf16_index {
            return Ok(i + 1);
        }
@ -652,9 +659,38 @@ pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usiz
    Err(current_unicode_index)
 }

+pub(crate) fn utf8_to_unicode_index(s: &str, utf8_index: usize) -> Result<usize, usize> {
+    if utf8_index == 0 {
+        return Ok(0);
+    }
+
+    let mut current_utf8_index = 0;
+    let mut current_unicode_index = 0;
+    for (i, c) in s.chars().enumerate() {
+        let char_start = current_utf8_index;
+        current_utf8_index += c.len_utf8();
+
+        if utf8_index == char_start {
+            return Ok(i);
+        }
+
+        if utf8_index < current_utf8_index {
+            tracing::info!("WARNING: UTF-8 index is in the middle of a codepoint!");
+            return Err(i);
+        }
+        current_unicode_index = i + 1;
+    }
+
+    if current_utf8_index == utf8_index {
+        Ok(current_unicode_index)
+    } else {
+        Err(current_unicode_index)
+    }
+}
+
 fn pos_to_unicode_index(s: &str, pos: usize, kind: PosType) -> Option<usize> {
    match kind {
-        PosType::Bytes => todo!(),
+        PosType::Bytes => utf8_to_unicode_index(s, pos).ok(),
        PosType::Unicode => Some(pos),
        PosType::Utf16 => utf16_to_unicode_index(s, pos).ok(),
        PosType::Entity => Some(pos),
@ -910,6 +946,7 @@ mod query {

                    // Allow left to not at the correct utf16 boundary. If so fallback to the last position.
                    // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
+                    // WARNING: Unable to report error!!!
                    let offset = utf16_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
                    (offset, true)
                }
@ -963,13 +1000,55 @@ mod query {
            cache.entity_len as usize
        }
    }
+
+    pub(super) struct ByteQueryT;
+    pub(super) type ByteQuery = IndexQuery<ByteQueryT, RichtextTreeTrait>;
+    impl QueryByLen<RichtextTreeTrait> for ByteQueryT {
+        fn get_cache_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
+            cache.bytes as usize
+        }
+        fn get_elem_len(elem: &<RichtextTreeTrait as BTreeTrait>::Elem) -> usize {
+            match elem {
+                RichtextStateChunk::Text(s) => s.utf8_len() as usize,
+                RichtextStateChunk::Style { .. } => 0,
+            }
+        }
+
+        fn get_offset_and_found(
+            left: usize,
+            elem: &<RichtextTreeTrait as BTreeTrait>::Elem,
+        ) -> (usize, bool) {
+            match elem {
+                RichtextStateChunk::Text(s) => {
+                    if left == 0 {
+                        return (0, true);
+                    }
+
+                    // Allow left to not at the correct utf16 boundary. If so fallback to the last position.
+                    // TODO: if we remove the use of query(pos-1), we won't need this fallback behavior
+                    // WARNING: Unable to report error!!!
+                    let offset = utf8_to_unicode_index(s.as_str(), left).unwrap_or_else(|e| e);
+                    (offset, true)
+                }
+                RichtextStateChunk::Style { .. } => (1, false),
+            }
+        }
+
+        fn get_cache_entity_len(cache: &<RichtextTreeTrait as BTreeTrait>::Cache) -> usize {
+            cache.entity_len as usize
+        }
+    }
 }

 mod cursor_cache {
    use std::sync::atomic::AtomicUsize;

-    use super::{pos_to_unicode_index, unicode_to_utf16_index, PosType, RichtextTreeTrait};
+    use super::{
+        pos_to_unicode_index, unicode_to_utf16_index, unicode_to_utf8_index, PosType,
+        RichtextTreeTrait,
+    };
    use generic_btree::{rle::HasLength, BTree, Cursor, LeafIndex};
+    use loro_common::LoroError;

    #[derive(Debug, Clone)]
    struct CursorCacheItem {
@ -1038,9 +1117,34 @@ mod cursor_cache {
            entity_index: usize,
            cursor: Cursor,
            tree: &BTree<RichtextTreeTrait>,
-        ) {
+        ) -> Result<(), usize> {
            match kind {
-                PosType::Bytes => todo!(),
+                PosType::Bytes => {
+                    if cursor.offset == 0 {
+                        self.entity = Some(EntityIndexCacheItem {
+                            pos,
+                            pos_type: kind,
+                            entity_index,
+                            leaf: cursor.leaf,
+                        });
+                    } else {
+                        let elem = tree.get_elem(cursor.leaf).unwrap();
+                        let Some(s) = elem.as_str() else {
+                            return Ok(());
+                        };
+                        let utf8offset = unicode_to_utf8_index(s, cursor.offset).unwrap();
+                        if pos < utf8offset {
+                            return Err(pos);
+                        }
+                        self.entity = Some(EntityIndexCacheItem {
+                            pos: pos - utf8offset,
+                            pos_type: kind,
+                            entity_index: entity_index - cursor.offset,
+                            leaf: cursor.leaf,
+                        });
+                    }
+                    Ok(())
+                }
                PosType::Unicode | PosType::Entity => {
                    self.entity = Some(EntityIndexCacheItem {
                        pos: pos - cursor.offset,
@ -1048,6 +1152,7 @@ mod cursor_cache {
                        entity_index: entity_index - cursor.offset,
                        leaf: cursor.leaf,
                    });
+                    Ok(())
                }
                PosType::Event if cfg!(not(feature = "wasm")) => {
                    self.entity = Some(EntityIndexCacheItem {
@ -1056,6 +1161,7 @@ mod cursor_cache {
                        entity_index: entity_index - cursor.offset,
                        leaf: cursor.leaf,
                    });
+                    Ok(())
                }
                _ => {
                    // utf16
@ -1068,8 +1174,13 @@ mod cursor_cache {
                        });
                    } else {
                        let elem = tree.get_elem(cursor.leaf).unwrap();
-                        let Some(s) = elem.as_str() else { return };
+                        let Some(s) = elem.as_str() else {
+                            return Ok(());
+                        };
                        let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap();
+                        if pos < utf16offset {
+                            return Err(pos);
+                        }
                        self.entity = Some(EntityIndexCacheItem {
                            pos: pos - utf16offset,
                            pos_type: kind,
@ -1077,6 +1188,7 @@ mod cursor_cache {
                            leaf: cursor.leaf,
                        });
                    }
+                    Ok(())
                }
            }
        }
@ -1196,9 +1308,9 @@ impl RichtextState {
        &mut self,
        pos: usize,
        pos_type: PosType,
-    ) -> usize {
+    ) -> Result<usize, LoroError> {
        if self.tree.is_empty() {
-            return 0;
+            return Ok(0);
        }

        if let Some(pos) =
@ -1211,11 +1323,11 @@ impl RichtextState {
                &self.tree,
                &self.cursor_cache
            );
-            return pos;
+            return Ok(pos);
        }

        let (c, entity_index) = match pos_type {
-            PosType::Bytes => todo!(),
+            PosType::Bytes => self.find_best_insert_pos::<ByteQueryT>(pos),
            PosType::Unicode => self.find_best_insert_pos::<UnicodeQueryT>(pos),
            PosType::Utf16 => self.find_best_insert_pos::<Utf16QueryT>(pos),
            PosType::Entity => self.find_best_insert_pos::<EntityQueryT>(pos),
@ -1227,12 +1339,23 @@ impl RichtextState {
            self.cursor_cache
                .record_cursor(entity_index, PosType::Entity, c, &self.tree);
            if !self.has_styles() {
-                self.cursor_cache
-                    .record_entity_index(pos, pos_type, entity_index, c, &self.tree);
+                if let Err(pos) = self.cursor_cache.record_entity_index(
+                    pos,
+                    pos_type,
+                    entity_index,
+                    c,
+                    &self.tree,
+                ) {
+                    return match pos_type {
+                        PosType::Bytes => Err(LoroError::UTF8InUnicodeCodePoint { pos: pos }),
+                        PosType::Utf16 => Err(LoroError::UTF16InUnicodeCodePoint { pos: pos }),
+                        _ => unreachable!(),
+                    };
+                }
            }
        }

-        entity_index
+        Ok(entity_index)
    }

    fn has_styles(&self) -> bool {
@ -1251,8 +1374,12 @@ impl RichtextState {
            return (0..0, None);
        }

-        let start = self.get_entity_index_for_text_insert(range.start, pos_type);
-        let end = self.get_entity_index_for_text_insert(range.end, pos_type);
+        let start = self
+            .get_entity_index_for_text_insert(range.start, pos_type)
+            .unwrap();
+        let end = self
+            .get_entity_index_for_text_insert(range.end, pos_type)
+            .unwrap();
        if self.has_styles() {
            (
                start..end,
@ -1656,22 +1783,25 @@ impl RichtextState {
        pos: usize,
        len: usize,
        pos_type: PosType,
-    ) -> Vec<EntityRangeInfo> {
+    ) -> LoroResult<Vec<EntityRangeInfo>> {
        if self.tree.is_empty() {
-            return Vec::new();
+            return Ok(Vec::new());
        }

        if len == 0 {
-            return Vec::new();
+            return Ok(Vec::new());
        }

        if pos + len > self.len(pos_type) {
-            return Vec::new();
+            return Ok(Vec::new());
        }

        let mut ans: Vec<EntityRangeInfo> = Vec::new();
        let (start, end) = match pos_type {
-            PosType::Bytes => todo!(),
+            PosType::Bytes => (
+                self.tree.query::<ByteQuery>(&pos).unwrap().cursor,
+                self.tree.query::<ByteQuery>(&(pos + len)).unwrap().cursor,
+            ),
            PosType::Unicode => (
                self.tree.query::<UnicodeQuery>(&pos).unwrap().cursor,
                self.tree
@ -1735,7 +1865,7 @@ impl RichtextState {
            }
        }

-        ans
+        Ok(ans)
    }

    // PERF: can be splitted into two methods. One is without cursor_to_event_index
@ -2272,7 +2402,7 @@ impl RichtextState {
        pos: usize,
        kind: PosType,
    ) -> Option<ID> {
-        let v = &self.get_text_entity_ranges(pos, 1, kind);
+        let v = &self.get_text_entity_ranges(pos, 1, kind).unwrap();
        let a = v.first()?;
        Some(a.id_start)
    }
@ -2395,7 +2525,9 @@ mod test {
            {
                let state = &mut self.state;
                let text = self.bytes.slice(start..);
-                let entity_index = state.get_entity_index_for_text_insert(pos, PosType::Unicode);
+                let entity_index = state
+                    .get_entity_index_for_text_insert(pos, PosType::Unicode)
+                    .unwrap();
                state.insert_at_entity_index(entity_index, text, IdFull::new(0, 0, 0));
            };
        }
@ -2403,7 +2535,8 @@ mod test {
        fn delete(&mut self, pos: usize, len: usize) {
            let ranges = self
                .state
-                .get_text_entity_ranges(pos, len, PosType::Unicode);
+                .get_text_entity_ranges(pos, len, PosType::Unicode)
+                .unwrap();
            for range in ranges.into_iter().rev() {
                self.state.drain_by_entity_index(
                    range.entity_start,
@ -2416,10 +2549,12 @@ mod test {
        fn mark(&mut self, range: Range<usize>, style: Arc<StyleOp>) {
            let start = self
                .state
-                .get_entity_index_for_text_insert(range.start, PosType::Unicode);
+                .get_entity_index_for_text_insert(range.start, PosType::Unicode)
+                .unwrap();
            let end = self
                .state
-                .get_entity_index_for_text_insert(range.end, PosType::Unicode);
+                .get_entity_index_for_text_insert(range.end, PosType::Unicode)
+                .unwrap();
            self.state.mark_with_entity_index(start..end, style);
        }
    }
--- a/crates/loro-internal/src/handler.rs
+++ b/crates/loro-internal/src/handler.rs
@ -7,7 +7,7 @@ use crate::{
        richtext::{richtext_state::PosType, RichtextState, StyleOp, TextStyleInfoFlag},
    },
    cursor::{Cursor, Side},
-    delta::{DeltaItem, StyleMeta, TreeExternalDiff},
+    delta::{DeltaItem, Meta, StyleMeta, TreeExternalDiff},
    event::{Diff, TextDiffItem},
    op::ListSlice,
    state::{ContainerState, IndexType, State},
@ -16,7 +16,7 @@ use crate::{
 };
 use append_only_bytes::BytesSlice;
 use enum_as_inner::EnumAsInner;
-use fxhash::{FxHashMap, FxHashSet};
+use fxhash::FxHashMap;
 use generic_btree::rle::HasLength;
 use loro_common::{
    ContainerID, ContainerType, IdFull, InternalString, LoroError, LoroResult, LoroValue, TreeID,
@ -31,7 +31,8 @@ use std::{
    ops::Deref,
    sync::{Arc, Mutex, Weak},
 };
-use tracing::{debug, error, info, instrument, trace};
+
+use tracing::{debug, error, info, instrument, Event};

 mod tree;
 pub use tree::TreeHandler;
@ -1337,7 +1338,8 @@ impl TextHandler {
                let mut t = t.try_lock().unwrap();
                let index = t
                    .value
-                    .get_entity_index_for_text_insert(pos, PosType::Event);
+                    .get_entity_index_for_text_insert(pos, PosType::Event)
+                    .unwrap();
                t.value.insert_at_entity_index(
                    index,
                    BytesSlice::from_bytes(s.as_bytes()),
@ -1349,16 +1351,89 @@ impl TextHandler {
        }
    }

+    pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
+        match &self.inner {
+            MaybeDetached::Detached(t) => {
+                let mut t = t.try_lock().unwrap();
+                let index = t
+                    .value
+                    .get_entity_index_for_text_insert(pos, PosType::Bytes)
+                    .unwrap();
+                t.value.insert_at_entity_index(
+                    index,
+                    BytesSlice::from_bytes(s.as_bytes()),
+                    IdFull::NONE_ID,
+                );
+                Ok(())
+            }
+            MaybeDetached::Attached(a) => a.with_txn(|txn| self.insert_with_txn_utf8(txn, pos, s)),
+        }
+    }
+
    /// `pos` is a Event Index:
    ///
    /// - if feature="wasm", pos is a UTF-16 index
    /// - if feature!="wasm", pos is a Unicode index
    pub fn insert_with_txn(&self, txn: &mut Transaction, pos: usize, s: &str) -> LoroResult<()> {
-        self.insert_with_txn_and_attr(txn, pos, s, None)?;
+        self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Event)?;
        Ok(())
    }

-    /// If attr is specified, it will be used as the attribute of the inserted text.
+    pub fn insert_with_txn_utf8(
+        &self,
+        txn: &mut Transaction,
+        pos: usize,
+        s: &str,
+    ) -> LoroResult<()> {
+        self.insert_with_txn_and_attr(txn, pos, s, None, PosType::Bytes)?;
+        Ok(())
+    }
+
+    /// `pos` is a Event Index:
+    ///
+    /// - if feature="wasm", pos is a UTF-16 index
+    /// - if feature!="wasm", pos is a Unicode index
+    ///
+    /// This method requires auto_commit to be enabled.
+    pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
+        match &self.inner {
+            MaybeDetached::Detached(t) => {
+                let mut t = t.try_lock().unwrap();
+                let ranges = t
+                    .value
+                    .get_text_entity_ranges(pos, len, PosType::Event)
+                    .unwrap();
+                for range in ranges.iter().rev() {
+                    t.value
+                        .drain_by_entity_index(range.entity_start, range.entity_len(), None);
+                }
+                Ok(())
+            }
+            MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
+        }
+    }
+
+    pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
+        match &self.inner {
+            MaybeDetached::Detached(t) => {
+                let mut t = t.try_lock().unwrap();
+                let ranges = match t.value.get_text_entity_ranges(pos, len, PosType::Bytes) {
+                    Err(x) => return Err(x),
+                    Ok(x) => x,
+                };
+                for range in ranges.iter().rev() {
+                    t.value
+                        .drain_by_entity_index(range.entity_start, range.entity_len(), None);
+                }
+                Ok(())
+            }
+            MaybeDetached::Attached(a) => {
+                a.with_txn(|txn| self.delete_with_txn_utf8(txn, pos, len))
+            }
+        }
+    }
+
+    /// If attr is specified, it will be used as the at tribute of the inserted text.
    /// It will override the existing attribute of the text.
    fn insert_with_txn_and_attr(
        &self,
@ -1366,27 +1441,51 @@ impl TextHandler {
        pos: usize,
        s: &str,
        attr: Option<&FxHashMap<String, LoroValue>>,
+        pos_type: PosType,
    ) -> Result<Vec<(InternalString, LoroValue)>, LoroError> {
        if s.is_empty() {
            return Ok(Vec::new());
        }

-        if pos > self.len_event() {
-            return Err(LoroError::OutOfBound {
-                pos,
-                len: self.len_event(),
-                info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
-            });
+        match pos_type {
+            PosType::Event => {
+                if pos > self.len_event() {
+                    return Err(LoroError::OutOfBound {
+                        pos,
+                        len: self.len_event(),
+                        info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
+                    });
+                }
+            }
+            PosType::Bytes => {
+                if pos > self.len_utf8() {
+                    return Err(LoroError::OutOfBound {
+                        pos,
+                        len: self.len_utf8(),
+                        info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
+                    });
+                }
+            }
+            _ => (),
        }

        let inner = self.inner.try_attached_state()?;
        let (entity_index, styles) = inner.with_state(|state| {
            let richtext_state = state.as_richtext_state_mut().unwrap();
-            let pos = richtext_state.get_entity_index_for_text_insert(pos);
+            let pos = richtext_state.get_entity_index_for_text_insert(pos, pos_type);
+            let pos = match pos {
+                Err(_) => return (pos, StyleMeta::empty()),
+                Ok(x) => x,
+            };
            let styles = richtext_state.get_styles_at_entity_index(pos);
-            (pos, styles)
+            (Ok(pos), styles)
        });

+        let entity_index = match entity_index {
+            Err(x) => return Err(x),
+            _ => entity_index.unwrap(),
+        };
+
        let mut override_styles = Vec::new();
        if let Some(attr) = attr {
            // current styles
@ -1442,50 +1541,66 @@ impl TextHandler {
    ///
    /// - if feature="wasm", pos is a UTF-16 index
    /// - if feature!="wasm", pos is a Unicode index
-    ///
-    /// This method requires auto_commit to be enabled.
-    pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
-        match &self.inner {
-            MaybeDetached::Detached(t) => {
-                let mut t = t.try_lock().unwrap();
-                let ranges = t.value.get_text_entity_ranges(pos, len, PosType::Event);
-                for range in ranges.iter().rev() {
-                    t.value
-                        .drain_by_entity_index(range.entity_start, range.entity_len(), None);
-                }
-                Ok(())
-            }
-            MaybeDetached::Attached(a) => a.with_txn(|txn| self.delete_with_txn(txn, pos, len)),
-        }
+    pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
+        self.delete_with_txn_inline(txn, pos, len, PosType::Event)
    }

-    /// `pos` is a Event Index:
-    ///
-    /// - if feature="wasm", pos is a UTF-16 index
-    /// - if feature!="wasm", pos is a Unicode index
-    pub fn delete_with_txn(&self, txn: &mut Transaction, pos: usize, len: usize) -> LoroResult<()> {
+    pub fn delete_with_txn_utf8(
+        &self,
+        txn: &mut Transaction,
+        pos: usize,
+        len: usize,
+    ) -> LoroResult<()> {
+        self.delete_with_txn_inline(txn, pos, len, PosType::Bytes)
+    }
+
+    fn delete_with_txn_inline(
+        &self,
+        txn: &mut Transaction,
+        pos: usize,
+        len: usize,
+        pos_type: PosType,
+    ) -> LoroResult<()> {
        if len == 0 {
            return Ok(());
        }

-        if pos + len > self.len_event() {
-            error!("pos={} len={} len_event={}", pos, len, self.len_event());
-            return Err(LoroError::OutOfBound {
-                pos: pos + len,
-                len: self.len_event(),
-                info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
-            });
+        match pos_type {
+            PosType::Event => {
+                if pos + len > self.len_event() {
+                    error!("pos={} len={} len_event={}", pos, len, self.len_event());
+                    return Err(LoroError::OutOfBound {
+                        pos: pos + len,
+                        len: self.len_event(),
+                        info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
+                    });
+                }
+            }
+            PosType::Bytes => {
+                if pos + len > self.len_utf8() {
+                    error!("pos={} len={} len_event={}", pos, len, self.len_event());
+                    return Err(LoroError::OutOfBound {
+                        pos: pos + len,
+                        len: self.len_event(),
+                        info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(),
+                    });
+                }
+            }
+            _ => (),
        }

        let inner = self.inner.try_attached_state()?;
        let s = tracing::span!(tracing::Level::INFO, "delete", "pos={} len={}", pos, len);
        let _e = s.enter();
-        let ranges = inner.with_state(|state| {
+        let ranges = match inner.with_state(|state| {
            let richtext_state = state.as_richtext_state_mut().unwrap();
-            richtext_state.get_text_entity_ranges_in_event_index_range(pos, len)
-        });
+            richtext_state.get_text_entity_ranges_in_event_index_range(pos, len, pos_type)
+        }) {
+            Err(x) => return Err(x),
+            Ok(x) => x,
+        };

-        debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
+        //debug_assert_eq!(ranges.iter().map(|x| x.event_len).sum::<usize>(), len);
        let mut event_end = (pos + len) as isize;
        for range in ranges.iter().rev() {
            let event_start = event_end - range.event_len as isize;
@ -1749,6 +1864,7 @@ impl TextHandler {
                        index,
                        insert.as_str(),
                        Some(attributes.as_ref().unwrap_or(&Default::default())),
+                        PosType::Event,
                    )?;

                    for (key, value) in override_styles {
@ -3558,14 +3674,14 @@ pub mod counter {
 #[cfg(test)]
 mod test {

+    use super::{HandlerTrait, TextDelta};
+    use crate::container::richtext::richtext_state::PosType;
    use crate::loro::LoroDoc;
    use crate::version::Frontiers;
    use crate::{fx_map, ToJson};
    use loro_common::ID;
    use serde_json::json;

-    use super::{HandlerTrait, TextDelta};
-
    #[test]
    fn import() {
        let loro = LoroDoc::new();
--- a/crates/loro-internal/src/state/richtext_state.rs
+++ b/crates/loro-internal/src/state/richtext_state.rs
@ -5,7 +5,7 @@ use std::{

 use fxhash::{FxHashMap, FxHashSet};
 use generic_btree::rle::HasLength;
-use loro_common::{ContainerID, InternalString, LoroResult, LoroValue, ID};
+use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID};
 use loro_delta::DeltaRopeBuilder;

 use crate::{
@ -743,10 +743,14 @@ impl RichtextState {
    }

    #[inline]
-    pub(crate) fn get_entity_index_for_text_insert(&mut self, event_index: usize) -> usize {
+    pub(crate) fn get_entity_index_for_text_insert(
+        &mut self,
+        event_index: usize,
+        pos_type: PosType,
+    ) -> Result<usize, LoroError> {
        self.state
            .get_mut()
-            .get_entity_index_for_text_insert(event_index, PosType::Event)
+            .get_entity_index_for_text_insert(event_index, pos_type)
    }

    pub(crate) fn get_entity_range_and_styles_at_range(
@ -771,10 +775,11 @@ impl RichtextState {
        &mut self,
        pos: usize,
        len: usize,
-    ) -> Vec<EntityRangeInfo> {
+        pos_type: PosType,
+    ) -> LoroResult<Vec<EntityRangeInfo>> {
        self.state
            .get_mut()
-            .get_text_entity_ranges(pos, len, PosType::Event)
+            .get_text_entity_ranges(pos, len, pos_type)
    }

    #[inline]
--- a/crates/loro-internal/tests/test.rs
+++ b/crates/loro-internal/tests/test.rs
@ -960,3 +960,147 @@ fn counter() {
    let doc2 = LoroDoc::new_auto_commit();
    doc2.import_json_updates(json).unwrap();
 }
+
+#[test]
+fn test_insert_utf8() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "Hello ").unwrap();
+    text.insert_utf8(6, "World").unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"Hello World"}])
+    )
+}
+
+#[test]
+fn test_insert_utf8_cross_unicode_1() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "你好").unwrap();
+    text.insert_utf8(3, "World").unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"你World好"}])
+    )
+}
+
+#[test]
+fn test_insert_utf8_cross_unicode_2() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "你好").unwrap();
+    text.insert_utf8(6, "World").unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"你好World"}])
+    )
+}
+
+#[test]
+fn test_insert_utf8_detached() {
+    let text = TextHandler::new_detached();
+    text.insert_utf8(0, "Hello ").unwrap();
+    text.insert_utf8(6, "World").unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"Hello World"}])
+    )
+}
+
+#[test]
+#[should_panic]
+fn test_insert_utf8_panic_cross_unicode() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "你好").unwrap();
+    text.insert_utf8(1, "World").unwrap();
+}
+
+#[test]
+#[should_panic]
+fn test_insert_utf8_panic_out_bound() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "Hello ").unwrap();
+    text.insert_utf8(7, "World").unwrap();
+}
+
+//    println!("{}", text.get_richtext_value().to_json_value().to_string());
+
+#[test]
+fn test_delete_utf8() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "Hello").unwrap();
+    text.delete_utf8(1, 3).unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"Ho"}])
+    )
+}
+
+#[test]
+fn test_delete_utf8_with_zero_len() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "Hello").unwrap();
+    text.delete_utf8(1, 0).unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"Hello"}])
+    )
+}
+
+#[test]
+fn test_delete_utf8_cross_unicode() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert_utf8(0, "你好").unwrap();
+    text.delete_utf8(0, 3).unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"好"}])
+    )
+}
+
+#[test]
+fn test_delete_utf8_detached() {
+    let text = TextHandler::new_detached();
+    text.insert_utf8(0, "Hello").unwrap();
+    text.delete_utf8(1, 3).unwrap();
+    assert_eq!(
+        text.get_richtext_value().to_json_value(),
+        json!([{"insert":"Ho"}])
+    )
+}
+
+// WARNING:
+// Due to the current inability to report an error on
+// get_offset_and_found on BTree, this test won't be ok.
+// #[test]
+// #[should_panic]
+// fn test_delete_utf8_panic_cross_unicode() {
+//     let doc = LoroDoc::new_auto_commit();
+//     let text = doc.get_text("text");
+//     text.insert_utf8(0, "你好").unwrap();
+//     text.delete_utf8(0, 2).unwrap();
+// }
+
+#[test]
+#[should_panic]
+fn test_delete_utf8_panic_out_bound_pos() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert(0, "Hello").unwrap();
+    text.delete_utf8(10, 1).unwrap();
+}
+
+#[test]
+#[should_panic]
+fn test_delete_utf8_panic_out_bound_len() {
+    let doc = LoroDoc::new_auto_commit();
+    let text = doc.get_text("text");
+    text.insert(0, "Hello").unwrap();
+    text.delete_utf8(1, 10).unwrap();
+}
--- a/crates/loro-wasm/src/lib.rs
+++ b/crates/loro-wasm/src/lib.rs
@ -1515,6 +1515,22 @@ impl LoroText {
        Ok(())
    }

+    /// Insert some string at utf-8 index.
+    ///
+    /// @example
+    /// ```ts
+    /// import { Loro } from "loro-crdt";
+    ///
+    /// const doc = new Loro();
+    /// const text = doc.getText("text");
+    /// text.insertUtf8(0, "Hello");
+    /// ```
+    #[wasm_bindgen(js_name = "insertUtf8")]
+    pub fn insert_utf8(&mut self, index: usize, content: &str) -> JsResult<()> {
+        self.handler.insert_utf8(index, content)?;
+        Ok(())
+    }
+
    /// Delete elements from index to index + len
    ///
    /// @example
@ -1533,6 +1549,25 @@ impl LoroText {
        Ok(())
    }

+    /// Delete elements from index to utf-8 index + len
+    ///
+    /// @example
+    /// ```ts
+    /// import { Loro } from "loro-crdt";
+    ///
+    /// const doc = new Loro();
+    /// const text = doc.getText("text");
+    /// text.insertUtf8(0, "Hello");
+    /// text.deleteUtf8(1, 3);
+    /// const s = text.toString();
+    /// console.log(s); // "Ho"
+    /// ```
+    #[wasm_bindgen(js_name = "deleteUtf8")]
+    pub fn delete_utf8(&mut self, index: usize, len: usize) -> JsResult<()> {
+        self.handler.delete_utf8(index, len)?;
+        Ok(())
+    }
+
    /// Mark a range of text with a key and a value.
    ///
    /// > You should call `configTextStyle` before using `mark` and `unmark`.
--- a/crates/loro/src/lib.rs
+++ b/crates/loro/src/lib.rs
@ -983,11 +983,21 @@ impl LoroText {
        self.handler.insert(pos, s)
    }

+    /// Insert a string at the given utf-8 position.
+    pub fn insert_utf8(&self, pos: usize, s: &str) -> LoroResult<()> {
+        self.handler.insert_utf8(pos, s)
+    }
+
    /// Delete a range of text at the given unicode position with unicode length.
    pub fn delete(&self, pos: usize, len: usize) -> LoroResult<()> {
        self.handler.delete(pos, len)
    }

+    /// Delete a range of text at the given utf-8 position with utf-8 length.
+    pub fn delete_utf8(&self, pos: usize, len: usize) -> LoroResult<()> {
+        self.handler.delete_utf8(pos, len)
+    }
+
    /// Whether the text container is empty.
    pub fn is_empty(&self) -> bool {
        self.handler.is_empty()
--- a/loro-js/tests/richtext.test.ts
+++ b/loro-js/tests/richtext.test.ts
@ -286,4 +286,20 @@ describe("richtext", () => {
    const text = doc.getText("text");
    text.insert(0, `“aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`);
  });
+
+  it("Insert/delete by utf8 index", () => {
+    const doc = new Loro();
+    const text = doc.getText('t');
+    text.insert(0, "你好");
+    text.insertUtf8(3, "a");
+    text.insertUtf8(7, "b");
+    expect(text.toDelta()).toStrictEqual([
+      { insert: "你a好b" },
+    ]);
+    text.deleteUtf8(3, 4);
+    expect(text.toDelta()).toStrictEqual([
+      { insert: "你b"},
+    ]);
+
+  });
 });