diff --git a/README.md b/README.md index 48c4c7fc..2a6cc838 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,6 @@ https://github.com/loro-dev/loro/assets/18425020/fe246c47-a120-44b3-91d4-1e7232a5b4ac - -> ⚠️ **Notice**: The current API and encoding schema of Loro are **experimental** and **subject to change**. You should not use it in production. - Loro is a [CRDTs(Conflict-free Replicated Data Types)](https://crdt.tech/) library that makes building [local-first apps][local-first] easier. It is currently available for JavaScript (via WASM) and Rust developers. Explore our vision in our blog: [**✨ Reimagine State Management with CRDTs**](https://loro.dev/blog/loro-now-open-source). diff --git a/crates/loro-common/src/error.rs b/crates/loro-common/src/error.rs index cd838639..f7e843bb 100644 --- a/crates/loro-common/src/error.rs +++ b/crates/loro-common/src/error.rs @@ -68,10 +68,12 @@ pub enum LoroError { UndoWithDifferentPeerId { expected: PeerID, actual: PeerID }, #[error("The input JSON schema is invalid")] InvalidJsonSchema, - #[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode.")] + #[error("Cannot insert or delete utf-8 in the middle of the codepoint in Unicode")] UTF8InUnicodeCodePoint { pos: usize }, - #[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode.")] + #[error("Cannot insert or delete utf-16 in the middle of the codepoint in Unicode")] UTF16InUnicodeCodePoint { pos: usize }, + #[error("The end index cannot be less than the start index")] + EndIndexLessThanStartIndex { start: usize, end: usize }, } #[derive(Error, Debug)] diff --git a/crates/loro-internal/src/container/richtext/richtext_state.rs b/crates/loro-internal/src/container/richtext/richtext_state.rs index 9550dad2..9302a2c1 100644 --- a/crates/loro-internal/src/container/richtext/richtext_state.rs +++ b/crates/loro-internal/src/container/richtext/richtext_state.rs @@ -582,6 +582,17 @@ impl CanRemove for RichtextStateChunk { } } +//TODO: start/end can be scanned in one loop, but now it takes twice the time +fn unicode_slice(s: &str, start_index: usize, end_index: usize) -> Result<&str, ()> { + let (Some(start), Some(end)) = ( + unicode_to_utf8_index(s, start_index), + unicode_to_utf8_index(s, end_index), + ) else { + return Err(()); + }; + Ok(&s[start..end]) +} + pub(crate) fn unicode_to_utf8_index(s: &str, unicode_index: usize) -> Option { let mut current_unicode_index = 0; for (byte_index, _) in s.char_indices() { @@ -1626,6 +1637,27 @@ impl RichtextState { self.style_ranges.as_mut().unwrap() } + pub(crate) fn get_char_by_event_index(&self, pos: usize) -> Result { + let cursor = self.tree.query::(&pos).unwrap().cursor; + let Some(str) = &self.tree.get_elem(cursor.leaf) else { + return Err(()); + }; + if cfg!(not(feature = "wasm")) { + let mut char_iter = str.as_str().unwrap().chars(); + match &mut char_iter.nth(cursor.offset) { + Some(c) => Ok(*c), + None => Err(()), + } + } else { + let s = str.as_str().unwrap(); + let utf16offset = unicode_to_utf16_index(s, cursor.offset).unwrap(); + match s.encode_utf16().nth(utf16offset) { + Some(c) => Ok(std::char::from_u32(c as u32).unwrap()), + None => Err(()), + } + } + } + /// Find the best insert position based on algorithm similar to Peritext. /// The result is only different from `query` when there are style anchors around the insert pos. /// Returns the right neighbor of the insert pos and the entity index. @@ -1874,6 +1906,54 @@ impl RichtextState { Ok(ans) } + pub(crate) fn get_text_slice_by_event_index( + &self, + pos: usize, + len: usize, + ) -> LoroResult { + if self.tree.is_empty() { + return Ok(String::new()); + } + + if len == 0 { + return Ok(String::new()); + } + + if pos + len > self.len_event() { + return Err(LoroError::OutOfBound { + pos: pos + len, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + + let mut ans = String::new(); + let (start, end) = ( + self.tree.query::(&pos).unwrap().cursor, + self.tree + .query::(&(pos + len)) + .unwrap() + .cursor, + ); + + for span in self.tree.iter_range(start..end) { + let start = span.start.unwrap_or(0); + let end = span.end.unwrap_or(span.elem.rle_len()); + if end == 0 { + break; + } + + if let RichtextStateChunk::Text(s) = span.elem { + match unicode_slice(&s.as_str(), start, end) { + Ok(x) => ans.push_str(&x), + Err(()) => return Err(LoroError::UTF16InUnicodeCodePoint { pos: pos + len }), + } + } + } + + Ok(ans) + } + // PERF: can be splitted into two methods. One is without cursor_to_event_index // PERF: can be speed up a lot by detecting whether the range is in a single leaf first /// This is used to accept changes from DiffCalculator diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 5ea0a0b0..8966a305 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -1363,6 +1363,80 @@ impl TextHandler { } } + /// `pos` is a Event Index: + /// + /// - if feature="wasm", pos is a UTF-16 index + /// - if feature!="wasm", pos is a Unicode index + pub fn char_at(&self, pos: usize) -> LoroResult { + if pos >= self.len_event() { + return Err(LoroError::OutOfBound { + pos: pos, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }); + } + if let Ok(c) = match &self.inner { + MaybeDetached::Detached(t) => { + let t = t.try_lock().unwrap(); + t.value.get_char_by_event_index(pos) + } + MaybeDetached::Attached(a) => a.with_state(|state| { + state + .as_richtext_state_mut() + .unwrap() + .get_char_by_event_index(pos) + }), + } { + Ok(c) + } else { + Err(LoroError::OutOfBound { + pos: pos, + len: self.len_event(), + info: format!("Position: {}:{}", file!(), line!()).into_boxed_str(), + }) + } + } + + /// `start_index` and `end_index` are Event Index: + /// + /// - if feature="wasm", pos is a UTF-16 index + /// - if feature!="wasm", pos is a Unicode index + /// + pub fn slice(&self, start_index: usize, end_index: usize) -> LoroResult { + if end_index < start_index { + return Err(LoroError::EndIndexLessThanStartIndex { + start: start_index, + end: end_index, + }); + } + match &self.inner { + MaybeDetached::Detached(t) => { + let t = t.try_lock().unwrap(); + t.value + .get_text_slice_by_event_index(start_index, end_index - start_index) + } + MaybeDetached::Attached(a) => a.with_state(|state| { + state + .as_richtext_state_mut() + .unwrap() + .get_text_slice_by_event_index(start_index, end_index - start_index) + }), + } + } + + /// `pos` is a Event Index: + /// + /// - if feature="wasm", pos is a UTF-16 index + /// - if feature!="wasm", pos is a Unicode index + /// + /// This method requires auto_commit to be enabled. + pub fn splice(&self, pos: usize, len: usize, s: &str) -> LoroResult { + let x = self.slice(pos, pos + len)?; + self.delete(pos, len)?; + self.insert(pos, s)?; + Ok(x) + } + /// `pos` is a Event Index: /// /// - if feature="wasm", pos is a UTF-16 index @@ -3541,7 +3615,14 @@ impl MapHandler { pub fn len(&self) -> usize { match &self.inner { MaybeDetached::Detached(m) => m.try_lock().unwrap().value.len(), - MaybeDetached::Attached(a) => a.with_state(|state| state.as_map_state().unwrap().len()), + MaybeDetached::Attached(a) => a.with_state(|state| { + state + .as_map_state() + .unwrap() + .iter() + .filter(|&(_, v)| v.value.is_some()) + .count() + }), } } diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index d381c280..bfbbac0e 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -96,7 +96,19 @@ impl RichtextState { } } - pub(crate) fn iter(&mut self, mut callback: impl FnMut(&str) -> bool) { + pub(crate) fn get_text_slice_by_event_index( + &mut self, + pos: usize, + len: usize, + ) -> LoroResult { + self.state.get_mut().get_text_slice_by_event_index(pos, len) + } + + pub(crate) fn get_char_by_event_index(&mut self, pos: usize) -> Result { + self.state.get_mut().get_char_by_event_index(pos) + } + + pub(crate) fn iter(&mut self, mut callback: impl FnMut(&str) -> bool) -> () { for span in self.state.get_mut().iter() { if !callback(span.text.as_str()) { return; diff --git a/crates/loro-internal/tests/test.rs b/crates/loro-internal/tests/test.rs index b1e2e642..8078fef1 100644 --- a/crates/loro-internal/tests/test.rs +++ b/crates/loro-internal/tests/test.rs @@ -1106,6 +1106,103 @@ fn test_delete_utf8_panic_out_bound_len() { } #[test] +fn test_char_at() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "Herld").unwrap(); + text.insert(2, "llo Wo").unwrap(); + assert_eq!(text.char_at(0).unwrap(), 'H'); + assert_eq!(text.char_at(1).unwrap(), 'e'); + assert_eq!(text.char_at(2).unwrap(), 'l'); + assert_eq!(text.char_at(3).unwrap(), 'l'); + let err = text.char_at(15).unwrap_err(); + assert!(matches!(err, loro_common::LoroError::OutOfBound { .. })) +} + +#[test] +fn test_char_at_detached() { + let text = TextHandler::new_detached(); + text.insert(0, "Herld").unwrap(); + text.insert(2, "llo Wo").unwrap(); + assert_eq!(text.char_at(0).unwrap(), 'H'); + assert_eq!(text.char_at(1).unwrap(), 'e'); + assert_eq!(text.char_at(2).unwrap(), 'l'); + assert_eq!(text.char_at(3).unwrap(), 'l'); + let err = text.char_at(15).unwrap_err(); + assert!(matches!(err, loro_common::LoroError::OutOfBound { .. })) +} + +#[test] +fn test_char_at_wchar() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "你好").unwrap(); + text.insert(1, "世界").unwrap(); + assert_eq!(text.char_at(0).unwrap(), '你'); + assert_eq!(text.char_at(1).unwrap(), '世'); + assert_eq!(text.char_at(2).unwrap(), '界'); + assert_eq!(text.char_at(3).unwrap(), '好'); + let err = text.char_at(5).unwrap_err(); + assert!(matches!(err, loro_common::LoroError::OutOfBound { .. })) +} + +#[test] +fn test_text_slice() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "Hello").unwrap(); + text.insert(1, "World").unwrap(); + assert_eq!(text.slice(0, 4).unwrap(), "HWor"); + assert_eq!(text.slice(0, 1).unwrap(), "H"); +} + +#[test] +fn test_text_slice_detached() { + let text = TextHandler::new_detached(); + text.insert(0, "Herld").unwrap(); + text.insert(2, "llo Wo").unwrap(); + assert_eq!(text.slice(0, 4).unwrap(), "Hell"); + assert_eq!(text.slice(0, 1).unwrap(), "H"); +} + +#[test] +fn test_text_slice_wchar() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "你好").unwrap(); + text.insert(1, "世界").unwrap(); + assert_eq!(text.slice(0, 3).unwrap(), "你世界"); +} + +#[test] +#[should_panic] +fn test_text_slice_end_index_less_than_start() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "你好").unwrap(); + text.insert(1, "世界").unwrap(); + text.slice(2, 1).unwrap(); +} + +#[test] +#[should_panic] +fn test_text_slice_out_of_bound() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "你好").unwrap(); + text.insert(1, "世界").unwrap(); + text.slice(1, 10).unwrap(); +} + +#[test] +fn test_text_splice() { + let doc = LoroDoc::new_auto_commit(); + let text = doc.get_text("text"); + text.insert(0, "你好").unwrap(); + assert_eq!(text.splice(1, 1, "世界").unwrap(), "好"); + assert_eq!(text.to_string(), "你世界"); +} + fn test_text_iter() { let mut str = String::new(); let doc = LoroDoc::new_auto_commit(); diff --git a/crates/loro-wasm/CHANGELOG.md b/crates/loro-wasm/CHANGELOG.md index c51bc932..e7c766dc 100644 --- a/crates/loro-wasm/CHANGELOG.md +++ b/crates/loro-wasm/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## 0.16.7 + +### Patch Changes + +- 45c98d5: Better text APIs and bug fixes + + ### 🚀 Features + + - Add insert_utf8 and delete_utf8 for Rust Text API (#396) + - Add text iter (#400) + - Add more text api (#398) + + ### 🐛 Bug Fixes + + - Tree undo when processing deleted node (#399) + - Tree diff calc children should be sorted by idlp (#401) + - When computing the len of the map, do not count elements that are None (#402) + + ### 📚 Documentation + + - Update wasm docs + - Rm experimental warning + + ### ⚙️ Miscellaneous Tasks + + - Update fuzz config + - Pnpm + - Rename position to fractional_index (#381) + ## 0.16.6 ### Patch Changes diff --git a/crates/loro-wasm/package.json b/crates/loro-wasm/package.json index 67c620b7..5579f4f9 100644 --- a/crates/loro-wasm/package.json +++ b/crates/loro-wasm/package.json @@ -1,6 +1,6 @@ { "name": "loro-wasm", - "version": "0.16.6", + "version": "0.16.7", "description": "Loro CRDTs is a high-performance CRDT framework that makes your app state synchronized, collaborative and maintainable effortlessly.", "keywords": [ "crdt", diff --git a/crates/loro-wasm/src/lib.rs b/crates/loro-wasm/src/lib.rs index 49fbf9ed..8ef23d0c 100644 --- a/crates/loro-wasm/src/lib.rs +++ b/crates/loro-wasm/src/lib.rs @@ -1541,6 +1541,61 @@ impl LoroText { Ok(()) } + /// Get a string slice. + /// + /// @example + /// ```ts + /// import { Loro } from "loro-crdt"; + /// + /// const doc = new Loro(); + /// const text = doc.getText("text"); + /// text.insert(0, "Hello"); + /// text.slice(0, 2); // "He" + /// ``` + pub fn slice(&mut self, start_index: usize, end_index: usize) -> JsResult { + match self.handler.slice(start_index, end_index) { + Ok(x) => Ok(x), + Err(x) => Err(x.into()), + } + } + + /// Get the character at the given position. + /// + /// @example + /// ```ts + /// import { Loro } from "loro-crdt"; + /// + /// const doc = new Loro(); + /// const text = doc.getText("text"); + /// text.insert(0, "Hello"); + /// text.charAt(0); // "H" + /// ``` + #[wasm_bindgen(js_name = "charAt")] + pub fn char_at(&mut self, pos: usize) -> JsResult { + match self.handler.char_at(pos) { + Ok(x) => Ok(x), + Err(x) => Err(x.into()), + } + } + + /// Delete and return the string at the given range and insert a string at the same position. + /// + /// @example + /// ```ts + /// import { Loro } from "loro-crdt"; + /// + /// const doc = new Loro(); + /// const text = doc.getText("text"); + /// text.insert(0, "Hello"); + /// text.splice(2, 3, "llo"); // "llo" + /// ``` + pub fn splice(&mut self, pos: usize, len: usize, s: &str) -> JsResult { + match self.handler.splice(pos, len, s) { + Ok(x) => Ok(x), + Err(x) => Err(x.into()), + } + } + /// Insert some string at utf-8 index. /// /// @example diff --git a/crates/loro/src/lib.rs b/crates/loro/src/lib.rs index 9e40d245..64f6c7bb 100644 --- a/crates/loro/src/lib.rs +++ b/crates/loro/src/lib.rs @@ -1006,6 +1006,21 @@ impl LoroText { self.handler.delete_utf8(pos, len) } + /// Get a string slice at the given Unicode range + pub fn slice(&self, start_index: usize, end_index: usize) -> LoroResult { + self.handler.slice(start_index, end_index) + } + + /// Get the characters at given unicode position. + pub fn char_at(&self, pos: usize) -> LoroResult { + self.handler.char_at(pos) + } + + /// Delete specified character and insert string at the same position at given unicode position. + pub fn splice(&self, pos: usize, len: usize, s: &str) -> LoroResult { + self.handler.splice(pos, len, s) + } + /// Whether the text container is empty. pub fn is_empty(&self) -> bool { self.handler.is_empty() diff --git a/crates/loro/tests/loro_rust_test.rs b/crates/loro/tests/loro_rust_test.rs index acff0778..b07ffd5d 100644 --- a/crates/loro/tests/loro_rust_test.rs +++ b/crates/loro/tests/loro_rust_test.rs @@ -853,3 +853,20 @@ fn awareness() { ); assert_eq!(b.get_all_states().get(&2).map(|x| x.state.clone()), None); } + +#[test] +// https://github.com/loro-dev/loro/issues/397 +fn len_and_is_empty_inconsistency() { + let doc = LoroDoc::new(); + let map = doc.get_map("map"); + println!("{:#?}", map); + assert!(map.is_empty()); + map.insert("leaf", 42i64).unwrap(); + println!("{:#?}", map.get("leaf")); + + assert_eq!(map.len(), 1); + map.delete("leaf").unwrap(); + println!("{:#?}", map.get("leaf")); + assert_eq!(map.len(), 0); + assert!(map.is_empty()); +} diff --git a/loro-js/CHANGELOG.md b/loro-js/CHANGELOG.md index b3aee710..04882384 100644 --- a/loro-js/CHANGELOG.md +++ b/loro-js/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +## 0.16.7 + +### Patch Changes + +- 45c98d5: Better text APIs and bug fixes + + ### 🚀 Features + + - Add insert_utf8 and delete_utf8 for Rust Text API (#396) + - Add text iter (#400) + - Add more text api (#398) + + ### 🐛 Bug Fixes + + - Tree undo when processing deleted node (#399) + - Tree diff calc children should be sorted by idlp (#401) + - When computing the len of the map, do not count elements that are None (#402) + + ### 📚 Documentation + + - Update wasm docs + - Rm experimental warning + + ### ⚙️ Miscellaneous Tasks + + - Update fuzz config + - Pnpm + - Rename position to fractional_index (#381) + +- Updated dependencies [45c98d5] + - loro-wasm@0.16.7 + ## 0.16.6 ### Patch Changes diff --git a/loro-js/package.json b/loro-js/package.json index 5b587db9..cedc187d 100644 --- a/loro-js/package.json +++ b/loro-js/package.json @@ -1,6 +1,6 @@ { "name": "loro-crdt", - "version": "0.16.6", + "version": "0.16.7", "description": "Loro CRDTs is a high-performance CRDT framework that makes your app state synchronized, collaborative and maintainable effortlessly.", "keywords": [ "crdt", diff --git a/loro-js/tests/richtext.test.ts b/loro-js/tests/richtext.test.ts index 9cfeb450..acdbbd5f 100644 --- a/loro-js/tests/richtext.test.ts +++ b/loro-js/tests/richtext.test.ts @@ -302,6 +302,35 @@ describe("richtext", () => { ]); }); + it("Slice", () => { + const doc = new Loro(); + const text = doc.getText('t'); + text.insert(0, "你好"); + expect(text.slice(0, 1)).toStrictEqual("你"); + }); + + it("Slice emoji", () => { + const doc = new Loro(); + const text = doc.getText('t'); + text.insert(0, "😡😡😡"); + expect(text.slice(0, 2)).toStrictEqual("😡"); + }); + + it("CharAt", () => { + const doc = new Loro(); + const text = doc.getText('t'); + text.insert(0, "你好"); + expect(text.charAt(1)).toStrictEqual("好"); + }); + + it("Splice", () => { + const doc = new Loro(); + const text = doc.getText('t'); + text.insert(0, "你好"); + expect(text.splice(1, 1, "我")).toStrictEqual("好"); + expect(text.toString()).toStrictEqual("你我"); + }); + it("Text iter", () => { const doc = new Loro(); const text = doc.getText('t');