fix: snapshot encode err (#135)

- allow import snapshot when user has created empty handlers
- fix state snapshot encode err
This commit is contained in:
Zixuan Chen 2023-10-31 17:58:50 +08:00 committed by GitHub
parent f72f812086
commit 8235eaaa9b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 94 additions and 32 deletions

View file

@ -25,7 +25,7 @@ use crate::{
use self::str_arena::StrArena;
#[derive(Default)]
#[derive(Default, Debug)]
struct InnerSharedArena {
// The locks should not be exposed outside this file.
// It might be better to use RwLock in the future
@ -42,7 +42,7 @@ struct InnerSharedArena {
/// This is shared between [OpLog] and [AppState].
///
#[derive(Default, Clone)]
#[derive(Default, Debug, Clone)]
pub struct SharedArena {
inner: Arc<InnerSharedArena>,
}
@ -390,12 +390,8 @@ impl SharedArena {
self.inner_convert_op(content, peer, counter, lamport, container)
}
pub fn is_empty(&self) -> bool {
self.inner.container_idx_to_id.lock().unwrap().is_empty()
&& self.inner.container_id_to_idx.lock().unwrap().is_empty()
&& self.inner.str.lock().unwrap().is_empty()
&& self.inner.values.lock().unwrap().is_empty()
&& self.inner.parents.lock().unwrap().is_empty()
pub fn can_import_snapshot(&self) -> bool {
self.inner.str.lock().unwrap().is_empty() && self.inner.values.lock().unwrap().is_empty()
}
fn inner_convert_op(

View file

@ -5,14 +5,14 @@ use append_only_bytes::{AppendOnlyBytes, BytesSlice};
use crate::container::richtext::richtext_state::unicode_to_utf8_index;
const INDEX_INTERVAL: u32 = 128;
#[derive(Default)]
#[derive(Default, Debug)]
pub(crate) struct StrArena {
bytes: AppendOnlyBytes,
unicode_indexes: Vec<Index>,
len: Index,
}
#[derive(Default, Clone, Copy)]
#[derive(Debug, Default, Clone, Copy)]
struct Index {
bytes: u32,
utf16: u32,

View file

@ -252,7 +252,6 @@ mod test {
t.checkout(&v);
assert_eq!(&t.applied_vv, &v);
assert_eq!(t.rope.len(), 4);
dbg!(&t);
}
#[test]

View file

@ -71,7 +71,6 @@ impl IdToCursor {
assert_eq!(start_counter, id_span.counter.end);
}
// FIXME: delete id span can be reversed
pub fn iter(&self, mut iter_id_span: IdSpan) -> impl Iterator<Item = IterCursor> + '_ {
iter_id_span.normalize_();
let list = self.map.get(&iter_id_span.client_id).unwrap_or(&EMPTY_VEC);

View file

@ -35,14 +35,14 @@ use crate::{
pub fn encode_app_snapshot(app: &LoroDoc) -> Vec<u8> {
let state = app.app_state().lock().unwrap();
let pre_encoded_state = preprocess_app_state(&state);
let pre_encoded_state = encode_app_state(&state);
let f = encode_oplog(&app.oplog().lock().unwrap(), Some(pre_encoded_state));
// f.diagnose_size();
f.encode()
}
pub fn decode_app_snapshot(app: &LoroDoc, bytes: &[u8], with_state: bool) -> Result<(), LoroError> {
assert!(app.is_empty());
assert!(app.can_reset_with_snapshot());
let data = FinalPhase::decode(bytes)?;
if with_state {
let mut app_state = app.app_state().lock().unwrap();
@ -67,11 +67,14 @@ pub fn decode_oplog(
let (arena, state_arena, common) = arena.unwrap_or_else(|| {
let arena = SharedArena::default();
let state_arena = TempArena::decode_state_arena(data).unwrap();
debug_assert!(arena.can_import_snapshot());
arena.alloc_str_fast(&state_arena.text);
(arena, state_arena, CommonArena::decode(data).unwrap())
});
oplog.arena = arena.clone();
let mut extra_arena = TempArena::decode_additional_arena(data)?;
// str and values are allocated directly on the empty arena,
// so the indices don't need to be updated
arena.alloc_str_fast(&extra_arena.text);
arena.alloc_values(state_arena.values.into_iter());
arena.alloc_values(extra_arena.values.into_iter());
@ -253,6 +256,9 @@ pub fn decode_state<'b>(
let arena = app_state.arena.clone();
let common = CommonArena::decode(data)?;
let state_arena = TempArena::decode_state_arena(data)?;
// str and values are allocated directly on the empty arena,
// so the indices don't need to be updated
debug_assert!(arena.can_import_snapshot());
arena.alloc_str_fast(&state_arena.text);
let encoded_app_state = EncodedAppState::decode(data)?;
let mut container_states =
@ -264,9 +270,10 @@ pub fn decode_state<'b>(
.zip(encoded_app_state.parents.iter())
.zip(encoded_app_state.states.into_iter())
{
// We need to register new container, and cannot reuse the container idx. Because arena's containers fields may not be empty.
let idx = arena.register_container(id);
let parent_idx =
(*parent).map(|x| ContainerIdx::from_index_and_type(x, state.container_type()));
(*parent).map(|x| arena.register_container(&common.container_ids[x as usize]));
arena.set_parent(idx, parent_idx);
match state {
loro_preload::EncodedContainerState::Map(map_data) => {
@ -300,7 +307,7 @@ pub fn decode_state<'b>(
}
loro_preload::EncodedContainerState::Richtext(richtext_data) => {
let mut richtext = RichtextState::new(idx);
richtext.decode_snapshot(richtext_data, &state_arena, &common, &arena);
richtext.decode_snapshot(*richtext_data, &state_arena, &common, &arena);
container_states.insert(idx, State::RichtextState(richtext));
}
loro_preload::EncodedContainerState::Tree(tree_data) => {
@ -599,7 +606,7 @@ struct PreEncodedState<'a> {
tree_id_lookup: FxHashMap<(u32, i32), usize>,
}
fn preprocess_app_state(app_state: &DocState) -> PreEncodedState {
fn encode_app_state(app_state: &DocState) -> PreEncodedState {
assert!(!app_state.is_in_txn());
let mut peers = Vec::new();
let mut peer_lookup = FxHashMap::default();
@ -667,7 +674,27 @@ fn preprocess_app_state(app_state: &DocState) -> PreEncodedState {
tree_ids.len()
};
for (_, state) in app_state.states.iter() {
let container_ids = app_state.arena.export_containers();
for (i, id) in container_ids.iter().enumerate() {
let idx = ContainerIdx::from_index_and_type(i as u32, id.container_type());
let Some(state) = app_state.states.get(&idx) else {
match id.container_type() {
loro_common::ContainerType::List => {
encoded.states.push(EncodedContainerState::List(Vec::new()))
}
loro_common::ContainerType::Map => {
encoded.states.push(EncodedContainerState::Map(Vec::new()))
}
loro_common::ContainerType::Tree => {
encoded.states.push(EncodedContainerState::Tree(Vec::new()))
}
loro_common::ContainerType::Text => encoded
.states
.push(EncodedContainerState::Richtext(Default::default())),
}
continue;
};
match state {
State::TreeState(tree) => {
let v = tree
@ -714,16 +741,17 @@ fn preprocess_app_state(app_state: &DocState) -> PreEncodedState {
}
State::RichtextState(text) => {
let result = text.encode_snapshot(&mut record_peer, &mut record_key);
encoded.states.push(EncodedContainerState::Richtext(result));
encoded
.states
.push(EncodedContainerState::Richtext(Box::new(result)));
}
}
}
let common = CommonArena {
peer_ids: peers.into(),
container_ids: app_state.arena.export_containers(),
container_ids,
};
let arena = TempArena {
values,
keywords,

View file

@ -1194,7 +1194,6 @@ mod test {
// assert has bold
let value = handler2.get_richtext_value();
dbg!(&value);
assert_eq!(value[0]["insert"], "hello".into());
let meta = value[0]["attributes"].as_map().unwrap();
assert_eq!(meta.len(), 1);

View file

@ -85,7 +85,7 @@ impl LoroDoc {
/// Is the document empty? (no ops)
#[inline(always)]
pub fn is_empty(&self) -> bool {
pub fn can_reset_with_snapshot(&self) -> bool {
self.oplog.lock().unwrap().is_empty() && self.state.lock().unwrap().is_empty()
}
@ -356,7 +356,7 @@ impl LoroDoc {
debug_log::group_end!();
}
EncodeMode::Snapshot => {
if self.is_empty() {
if self.can_reset_with_snapshot() {
decode_app_snapshot(self, &input[1..], !self.detached)?;
} else {
let app = LoroDoc::new();

View file

@ -185,7 +185,7 @@ impl OpLog {
}
pub fn is_empty(&self) -> bool {
self.dag.map.is_empty() && self.arena.is_empty()
self.dag.map.is_empty() && self.arena.can_import_snapshot()
}
/// Import a change.

View file

@ -360,7 +360,6 @@ impl DocState {
}
if self.is_recording() {
debug_log::debug_log!("TODIFF");
let diff = self
.states
.iter_mut()
@ -439,7 +438,7 @@ impl DocState {
}
pub fn is_empty(&self) -> bool {
!self.in_txn && self.states.is_empty() && self.arena.is_empty()
!self.in_txn && self.states.is_empty() && self.arena.can_import_snapshot()
}
pub fn get_deep_value(&mut self) -> LoroValue {

View file

@ -548,6 +548,10 @@ impl RichtextState {
arena: &SharedArena,
) {
assert!(self.is_empty());
if text_bytes.is_empty() {
return;
}
let bit_len = is_style_start.len() * 8;
let is_style_start = BitMap::from_vec(is_style_start, bit_len);
let mut is_style_start_iter = is_style_start.iter();
@ -667,7 +671,6 @@ impl From<RichtextStateLoader> for InnerState {
impl RichtextStateLoader {
pub fn push(&mut self, elem: RichtextStateChunk) {
debug_log::debug_dbg!(&elem);
if let RichtextStateChunk::Style { style, anchor_type } = &elem {
if *anchor_type == AnchorType::Start {
self.start_anchor_pos
@ -696,7 +699,6 @@ impl RichtextStateLoader {
state.annotate_style_range(range, style);
}
debug_log::debug_dbg!(&state);
if cfg!(debug_assertions) {
state.check();
}

View file

@ -1,9 +1,49 @@
use std::sync::{Arc, Mutex};
use loro_common::{ContainerType, LoroValue, ID};
use loro_common::{ContainerID, ContainerType, LoroValue, ID};
use loro_internal::{version::Frontiers, ApplyDiff, LoroDoc, ToJson};
use serde_json::json;
#[test]
fn import_after_init_handlers() {
let a = LoroDoc::new_auto_commit();
a.subscribe(
&ContainerID::new_root("text", ContainerType::Text),
Arc::new(|event| {
assert!(matches!(
event.container.diff,
loro_internal::event::Diff::Text(_)
))
}),
);
a.subscribe(
&ContainerID::new_root("map", ContainerType::Map),
Arc::new(|event| {
assert!(matches!(
event.container.diff,
loro_internal::event::Diff::NewMap(_)
))
}),
);
a.subscribe(
&ContainerID::new_root("list", ContainerType::List),
Arc::new(|event| {
assert!(matches!(
event.container.diff,
loro_internal::event::Diff::List(_)
))
}),
);
let b = LoroDoc::new_auto_commit();
b.get_list("list").insert_(0, "list".into()).unwrap();
b.get_list("list_a").insert_(0, "list_a".into()).unwrap();
b.get_text("text").insert_(0, "text").unwrap();
b.get_map("map").insert_("m", "map".into()).unwrap();
a.import(&b.export_snapshot()).unwrap();
a.commit();
}
#[test]
fn test_pending() {
let a = LoroDoc::new_auto_commit();

View file

@ -134,11 +134,11 @@ pub enum EncodedContainerState<'a> {
Map(Vec<MapEntry>),
List(Vec<usize>),
#[serde(borrow)]
Richtext(EncodedRichtextState<'a>),
Richtext(Box<EncodedRichtextState<'a>>),
Tree(Vec<(usize, Option<usize>)>),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct EncodedRichtextState<'a> {
/// It's composed of interleaved:
///