From 8235eaaa9b797ef28128a5df697f9c2430e5628a Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Tue, 31 Oct 2023 17:58:50 +0800 Subject: [PATCH] fix: snapshot encode err (#135) - allow import snapshot when user has created empty handlers - fix state snapshot encode err --- crates/loro-internal/src/arena.rs | 12 ++--- crates/loro-internal/src/arena/str_arena.rs | 4 +- .../src/container/richtext/tracker.rs | 1 - .../richtext/tracker/id_to_cursor.rs | 1 - .../src/encoding/encode_snapshot.rs | 46 +++++++++++++++---- crates/loro-internal/src/handler.rs | 1 - crates/loro-internal/src/loro.rs | 4 +- crates/loro-internal/src/oplog.rs | 2 +- crates/loro-internal/src/state.rs | 3 +- .../loro-internal/src/state/richtext_state.rs | 6 ++- crates/loro-internal/tests/test.rs | 42 ++++++++++++++++- crates/loro-preload/src/encode.rs | 4 +- 12 files changed, 94 insertions(+), 32 deletions(-) diff --git a/crates/loro-internal/src/arena.rs b/crates/loro-internal/src/arena.rs index 176a97b3..e20bcd46 100644 --- a/crates/loro-internal/src/arena.rs +++ b/crates/loro-internal/src/arena.rs @@ -25,7 +25,7 @@ use crate::{ use self::str_arena::StrArena; -#[derive(Default)] +#[derive(Default, Debug)] struct InnerSharedArena { // The locks should not be exposed outside this file. // It might be better to use RwLock in the future @@ -42,7 +42,7 @@ struct InnerSharedArena { /// This is shared between [OpLog] and [AppState]. /// -#[derive(Default, Clone)] +#[derive(Default, Debug, Clone)] pub struct SharedArena { inner: Arc, } @@ -390,12 +390,8 @@ impl SharedArena { self.inner_convert_op(content, peer, counter, lamport, container) } - pub fn is_empty(&self) -> bool { - self.inner.container_idx_to_id.lock().unwrap().is_empty() - && self.inner.container_id_to_idx.lock().unwrap().is_empty() - && self.inner.str.lock().unwrap().is_empty() - && self.inner.values.lock().unwrap().is_empty() - && self.inner.parents.lock().unwrap().is_empty() + pub fn can_import_snapshot(&self) -> bool { + self.inner.str.lock().unwrap().is_empty() && self.inner.values.lock().unwrap().is_empty() } fn inner_convert_op( diff --git a/crates/loro-internal/src/arena/str_arena.rs b/crates/loro-internal/src/arena/str_arena.rs index 2d40c5b5..34743f20 100644 --- a/crates/loro-internal/src/arena/str_arena.rs +++ b/crates/loro-internal/src/arena/str_arena.rs @@ -5,14 +5,14 @@ use append_only_bytes::{AppendOnlyBytes, BytesSlice}; use crate::container::richtext::richtext_state::unicode_to_utf8_index; const INDEX_INTERVAL: u32 = 128; -#[derive(Default)] +#[derive(Default, Debug)] pub(crate) struct StrArena { bytes: AppendOnlyBytes, unicode_indexes: Vec, len: Index, } -#[derive(Default, Clone, Copy)] +#[derive(Debug, Default, Clone, Copy)] struct Index { bytes: u32, utf16: u32, diff --git a/crates/loro-internal/src/container/richtext/tracker.rs b/crates/loro-internal/src/container/richtext/tracker.rs index 4ddaf7cc..1648569b 100644 --- a/crates/loro-internal/src/container/richtext/tracker.rs +++ b/crates/loro-internal/src/container/richtext/tracker.rs @@ -252,7 +252,6 @@ mod test { t.checkout(&v); assert_eq!(&t.applied_vv, &v); assert_eq!(t.rope.len(), 4); - dbg!(&t); } #[test] diff --git a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs index e837d6ff..66b499dd 100644 --- a/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs +++ b/crates/loro-internal/src/container/richtext/tracker/id_to_cursor.rs @@ -71,7 +71,6 @@ impl IdToCursor { assert_eq!(start_counter, id_span.counter.end); } - // FIXME: delete id span can be reversed pub fn iter(&self, mut iter_id_span: IdSpan) -> impl Iterator + '_ { iter_id_span.normalize_(); let list = self.map.get(&iter_id_span.client_id).unwrap_or(&EMPTY_VEC); diff --git a/crates/loro-internal/src/encoding/encode_snapshot.rs b/crates/loro-internal/src/encoding/encode_snapshot.rs index 2be7bfaf..2ae1c19c 100644 --- a/crates/loro-internal/src/encoding/encode_snapshot.rs +++ b/crates/loro-internal/src/encoding/encode_snapshot.rs @@ -35,14 +35,14 @@ use crate::{ pub fn encode_app_snapshot(app: &LoroDoc) -> Vec { let state = app.app_state().lock().unwrap(); - let pre_encoded_state = preprocess_app_state(&state); + let pre_encoded_state = encode_app_state(&state); let f = encode_oplog(&app.oplog().lock().unwrap(), Some(pre_encoded_state)); // f.diagnose_size(); f.encode() } pub fn decode_app_snapshot(app: &LoroDoc, bytes: &[u8], with_state: bool) -> Result<(), LoroError> { - assert!(app.is_empty()); + assert!(app.can_reset_with_snapshot()); let data = FinalPhase::decode(bytes)?; if with_state { let mut app_state = app.app_state().lock().unwrap(); @@ -67,11 +67,14 @@ pub fn decode_oplog( let (arena, state_arena, common) = arena.unwrap_or_else(|| { let arena = SharedArena::default(); let state_arena = TempArena::decode_state_arena(data).unwrap(); + debug_assert!(arena.can_import_snapshot()); arena.alloc_str_fast(&state_arena.text); (arena, state_arena, CommonArena::decode(data).unwrap()) }); oplog.arena = arena.clone(); let mut extra_arena = TempArena::decode_additional_arena(data)?; + // str and values are allocated directly on the empty arena, + // so the indices don't need to be updated arena.alloc_str_fast(&extra_arena.text); arena.alloc_values(state_arena.values.into_iter()); arena.alloc_values(extra_arena.values.into_iter()); @@ -253,6 +256,9 @@ pub fn decode_state<'b>( let arena = app_state.arena.clone(); let common = CommonArena::decode(data)?; let state_arena = TempArena::decode_state_arena(data)?; + // str and values are allocated directly on the empty arena, + // so the indices don't need to be updated + debug_assert!(arena.can_import_snapshot()); arena.alloc_str_fast(&state_arena.text); let encoded_app_state = EncodedAppState::decode(data)?; let mut container_states = @@ -264,9 +270,10 @@ pub fn decode_state<'b>( .zip(encoded_app_state.parents.iter()) .zip(encoded_app_state.states.into_iter()) { + // We need to register new container, and cannot reuse the container idx. Because arena's containers fields may not be empty. let idx = arena.register_container(id); let parent_idx = - (*parent).map(|x| ContainerIdx::from_index_and_type(x, state.container_type())); + (*parent).map(|x| arena.register_container(&common.container_ids[x as usize])); arena.set_parent(idx, parent_idx); match state { loro_preload::EncodedContainerState::Map(map_data) => { @@ -300,7 +307,7 @@ pub fn decode_state<'b>( } loro_preload::EncodedContainerState::Richtext(richtext_data) => { let mut richtext = RichtextState::new(idx); - richtext.decode_snapshot(richtext_data, &state_arena, &common, &arena); + richtext.decode_snapshot(*richtext_data, &state_arena, &common, &arena); container_states.insert(idx, State::RichtextState(richtext)); } loro_preload::EncodedContainerState::Tree(tree_data) => { @@ -599,7 +606,7 @@ struct PreEncodedState<'a> { tree_id_lookup: FxHashMap<(u32, i32), usize>, } -fn preprocess_app_state(app_state: &DocState) -> PreEncodedState { +fn encode_app_state(app_state: &DocState) -> PreEncodedState { assert!(!app_state.is_in_txn()); let mut peers = Vec::new(); let mut peer_lookup = FxHashMap::default(); @@ -667,7 +674,27 @@ fn preprocess_app_state(app_state: &DocState) -> PreEncodedState { tree_ids.len() }; - for (_, state) in app_state.states.iter() { + let container_ids = app_state.arena.export_containers(); + for (i, id) in container_ids.iter().enumerate() { + let idx = ContainerIdx::from_index_and_type(i as u32, id.container_type()); + let Some(state) = app_state.states.get(&idx) else { + match id.container_type() { + loro_common::ContainerType::List => { + encoded.states.push(EncodedContainerState::List(Vec::new())) + } + loro_common::ContainerType::Map => { + encoded.states.push(EncodedContainerState::Map(Vec::new())) + } + loro_common::ContainerType::Tree => { + encoded.states.push(EncodedContainerState::Tree(Vec::new())) + } + loro_common::ContainerType::Text => encoded + .states + .push(EncodedContainerState::Richtext(Default::default())), + } + + continue; + }; match state { State::TreeState(tree) => { let v = tree @@ -714,16 +741,17 @@ fn preprocess_app_state(app_state: &DocState) -> PreEncodedState { } State::RichtextState(text) => { let result = text.encode_snapshot(&mut record_peer, &mut record_key); - encoded.states.push(EncodedContainerState::Richtext(result)); + encoded + .states + .push(EncodedContainerState::Richtext(Box::new(result))); } } } let common = CommonArena { peer_ids: peers.into(), - container_ids: app_state.arena.export_containers(), + container_ids, }; - let arena = TempArena { values, keywords, diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index beee4f49..729cae67 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -1194,7 +1194,6 @@ mod test { // assert has bold let value = handler2.get_richtext_value(); - dbg!(&value); assert_eq!(value[0]["insert"], "hello".into()); let meta = value[0]["attributes"].as_map().unwrap(); assert_eq!(meta.len(), 1); diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index 12abadcd..32e30a08 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -85,7 +85,7 @@ impl LoroDoc { /// Is the document empty? (no ops) #[inline(always)] - pub fn is_empty(&self) -> bool { + pub fn can_reset_with_snapshot(&self) -> bool { self.oplog.lock().unwrap().is_empty() && self.state.lock().unwrap().is_empty() } @@ -356,7 +356,7 @@ impl LoroDoc { debug_log::group_end!(); } EncodeMode::Snapshot => { - if self.is_empty() { + if self.can_reset_with_snapshot() { decode_app_snapshot(self, &input[1..], !self.detached)?; } else { let app = LoroDoc::new(); diff --git a/crates/loro-internal/src/oplog.rs b/crates/loro-internal/src/oplog.rs index ca9c7b56..9e54b6ed 100644 --- a/crates/loro-internal/src/oplog.rs +++ b/crates/loro-internal/src/oplog.rs @@ -185,7 +185,7 @@ impl OpLog { } pub fn is_empty(&self) -> bool { - self.dag.map.is_empty() && self.arena.is_empty() + self.dag.map.is_empty() && self.arena.can_import_snapshot() } /// Import a change. diff --git a/crates/loro-internal/src/state.rs b/crates/loro-internal/src/state.rs index b7147594..854b8fea 100644 --- a/crates/loro-internal/src/state.rs +++ b/crates/loro-internal/src/state.rs @@ -360,7 +360,6 @@ impl DocState { } if self.is_recording() { - debug_log::debug_log!("TODIFF"); let diff = self .states .iter_mut() @@ -439,7 +438,7 @@ impl DocState { } pub fn is_empty(&self) -> bool { - !self.in_txn && self.states.is_empty() && self.arena.is_empty() + !self.in_txn && self.states.is_empty() && self.arena.can_import_snapshot() } pub fn get_deep_value(&mut self) -> LoroValue { diff --git a/crates/loro-internal/src/state/richtext_state.rs b/crates/loro-internal/src/state/richtext_state.rs index ee9b8c02..f163500e 100644 --- a/crates/loro-internal/src/state/richtext_state.rs +++ b/crates/loro-internal/src/state/richtext_state.rs @@ -548,6 +548,10 @@ impl RichtextState { arena: &SharedArena, ) { assert!(self.is_empty()); + if text_bytes.is_empty() { + return; + } + let bit_len = is_style_start.len() * 8; let is_style_start = BitMap::from_vec(is_style_start, bit_len); let mut is_style_start_iter = is_style_start.iter(); @@ -667,7 +671,6 @@ impl From for InnerState { impl RichtextStateLoader { pub fn push(&mut self, elem: RichtextStateChunk) { - debug_log::debug_dbg!(&elem); if let RichtextStateChunk::Style { style, anchor_type } = &elem { if *anchor_type == AnchorType::Start { self.start_anchor_pos @@ -696,7 +699,6 @@ impl RichtextStateLoader { state.annotate_style_range(range, style); } - debug_log::debug_dbg!(&state); if cfg!(debug_assertions) { state.check(); } diff --git a/crates/loro-internal/tests/test.rs b/crates/loro-internal/tests/test.rs index 6bd975b5..dd1ce7b0 100644 --- a/crates/loro-internal/tests/test.rs +++ b/crates/loro-internal/tests/test.rs @@ -1,9 +1,49 @@ use std::sync::{Arc, Mutex}; -use loro_common::{ContainerType, LoroValue, ID}; +use loro_common::{ContainerID, ContainerType, LoroValue, ID}; use loro_internal::{version::Frontiers, ApplyDiff, LoroDoc, ToJson}; use serde_json::json; +#[test] +fn import_after_init_handlers() { + let a = LoroDoc::new_auto_commit(); + a.subscribe( + &ContainerID::new_root("text", ContainerType::Text), + Arc::new(|event| { + assert!(matches!( + event.container.diff, + loro_internal::event::Diff::Text(_) + )) + }), + ); + a.subscribe( + &ContainerID::new_root("map", ContainerType::Map), + Arc::new(|event| { + assert!(matches!( + event.container.diff, + loro_internal::event::Diff::NewMap(_) + )) + }), + ); + a.subscribe( + &ContainerID::new_root("list", ContainerType::List), + Arc::new(|event| { + assert!(matches!( + event.container.diff, + loro_internal::event::Diff::List(_) + )) + }), + ); + + let b = LoroDoc::new_auto_commit(); + b.get_list("list").insert_(0, "list".into()).unwrap(); + b.get_list("list_a").insert_(0, "list_a".into()).unwrap(); + b.get_text("text").insert_(0, "text").unwrap(); + b.get_map("map").insert_("m", "map".into()).unwrap(); + a.import(&b.export_snapshot()).unwrap(); + a.commit(); +} + #[test] fn test_pending() { let a = LoroDoc::new_auto_commit(); diff --git a/crates/loro-preload/src/encode.rs b/crates/loro-preload/src/encode.rs index a741e59b..28182365 100644 --- a/crates/loro-preload/src/encode.rs +++ b/crates/loro-preload/src/encode.rs @@ -134,11 +134,11 @@ pub enum EncodedContainerState<'a> { Map(Vec), List(Vec), #[serde(borrow)] - Richtext(EncodedRichtextState<'a>), + Richtext(Box>), Tree(Vec<(usize, Option)>), } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct EncodedRichtextState<'a> { /// It's composed of interleaved: ///