fix: utf16 query err (#151)

This commit is contained in:
Zixuan Chen 2023-11-04 12:24:05 +08:00 committed by GitHub
parent 5b65963104
commit 5a9baebba0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 9 deletions

View file

@ -312,32 +312,41 @@ pub(crate) fn utf16_to_utf8_index(s: &str, utf16_index: usize) -> Option<usize>
None
}
pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Option<usize> {
/// Returns the unicode index of the character at the given utf16 index.
///
/// If the given utf16 index is not at the correct boundary, returns the unicode index of the
/// character before the given utf16 index.
pub(crate) fn utf16_to_unicode_index(s: &str, utf16_index: usize) -> Result<usize, usize> {
if utf16_index == 0 {
return Some(0);
return Ok(0);
}
let mut current_utf16_index = 0;
let mut current_unicode_index = 0;
for (i, c) in s.chars().enumerate() {
let len = c.len_utf16();
current_utf16_index += len;
if current_utf16_index == utf16_index {
return Some(i + 1);
return Ok(i + 1);
}
if current_utf16_index > utf16_index {
return Err(i);
}
current_unicode_index = i + 1;
}
None
Err(current_unicode_index)
}
fn pos_to_unicode_index(s: &str, pos: usize, kind: PosType) -> Option<usize> {
match kind {
PosType::Bytes => todo!(),
PosType::Unicode => Some(pos),
PosType::Utf16 => utf16_to_unicode_index(s, pos),
PosType::Utf16 => utf16_to_unicode_index(s, pos).ok(),
PosType::Entity => Some(pos),
PosType::Event => {
if cfg!(feature = "wasm") {
utf16_to_unicode_index(s, pos)
utf16_to_unicode_index(s, pos).ok()
} else {
Some(pos)
}
@ -575,8 +584,12 @@ mod query {
return (0, true);
}
let s = std::str::from_utf8(text).unwrap();
let offset = utf16_to_unicode_index(s, left).unwrap();
// TODO: extract this behavior to a dedicated type
// SAFETY: we know that `text` is valid utf8
let s = unsafe { std::str::from_utf8_unchecked(text) };
// Allow left to not at the correct utf16 boundary. If so fallback to the last position.
// TODO: if we remove the use of query(pos-1), we won't need this fallback behaviro
let offset = utf16_to_unicode_index(s, left).unwrap_or_else(|e| e);
(offset, true)
}
RichtextStateChunk::Style { .. } => (1, false),
@ -890,7 +903,6 @@ impl RichtextState {
return pos;
}
// TODO: use cache
let (c, entity_index) = match pos_type {
PosType::Bytes => todo!(),
PosType::Unicode => self.find_best_insert_pos::<UnicodeQueryT>(pos),

View file

@ -27,6 +27,14 @@ describe("richtext", () => {
] as Delta<string>[])
})
it("insert after emoji", () => {
const doc = new Loro();
const text = doc.getText("text");
text.insert(0, "👨‍👩‍👦");
text.insert(8, "a");
expect(text.toString()).toBe("👨👩👦a")
})
it("emit event correctly", () => {
const doc = new Loro();
const text = doc.getText("text");