2022-11-24 15:28:36 +00:00
|
|
|
use enum_as_inner::EnumAsInner;
|
2023-11-08 08:15:11 +00:00
|
|
|
use fxhash::FxHasher64;
|
2024-04-01 09:25:12 +00:00
|
|
|
use itertools::Itertools;
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
use loro_delta::{array_vec::ArrayVec, delta_trait::DeltaAttr, DeltaItem, DeltaRope};
|
2022-11-29 10:31:57 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
2023-03-10 02:50:05 +00:00
|
|
|
use smallvec::SmallVec;
|
2022-11-23 08:26:38 +00:00
|
|
|
|
2023-03-01 13:37:58 +00:00
|
|
|
use crate::{
|
2023-10-30 03:13:52 +00:00
|
|
|
container::richtext::richtext_state::RichtextStateChunk,
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
delta::{
|
|
|
|
Delta, MapDelta, Meta, MovableListInnerDelta, ResolvedMapDelta, StyleMeta, TreeDelta,
|
|
|
|
TreeDiff,
|
|
|
|
},
|
2024-03-08 08:40:06 +00:00
|
|
|
handler::ValueOrHandler,
|
2023-10-29 06:02:13 +00:00
|
|
|
op::SliceRanges,
|
|
|
|
utils::string_slice::StringSlice,
|
2023-12-05 03:57:41 +00:00
|
|
|
InternalString,
|
2023-03-01 13:37:58 +00:00
|
|
|
};
|
2022-11-23 17:01:40 +00:00
|
|
|
|
2023-11-08 08:15:11 +00:00
|
|
|
use std::{
|
|
|
|
borrow::Cow,
|
|
|
|
hash::{Hash, Hasher},
|
|
|
|
};
|
2023-07-31 03:49:55 +00:00
|
|
|
|
2024-04-01 09:42:02 +00:00
|
|
|
use loro_common::{ContainerID, TreeID};
|
2023-07-31 03:49:55 +00:00
|
|
|
|
|
|
|
use crate::{container::idx::ContainerIdx, version::Frontiers};
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct ContainerDiff {
|
2023-04-04 13:44:41 +00:00
|
|
|
pub id: ContainerID,
|
2023-07-31 03:49:55 +00:00
|
|
|
pub path: Vec<(ContainerID, Index)>,
|
|
|
|
pub(crate) idx: ContainerIdx,
|
2024-05-13 13:37:10 +00:00
|
|
|
pub is_unknown: bool,
|
2023-07-31 03:49:55 +00:00
|
|
|
pub diff: Diff,
|
2023-04-04 13:44:41 +00:00
|
|
|
}
|
|
|
|
|
2024-04-29 11:35:21 +00:00
|
|
|
/// The kind of the event trigger.
|
2024-04-03 09:56:01 +00:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
|
|
pub enum EventTriggerKind {
|
|
|
|
/// The event is triggered by a local transaction.
|
|
|
|
Local,
|
|
|
|
/// The event is triggered by importing
|
|
|
|
Import,
|
|
|
|
/// The event is triggered by checkout
|
|
|
|
Checkout,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for EventTriggerKind {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
EventTriggerKind::Local => write!(f, "local"),
|
|
|
|
EventTriggerKind::Import => write!(f, "import"),
|
|
|
|
EventTriggerKind::Checkout => write!(f, "checkout"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl EventTriggerKind {
|
|
|
|
#[inline]
|
|
|
|
pub fn is_local(&self) -> bool {
|
|
|
|
matches!(self, EventTriggerKind::Local)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
pub fn is_import(&self) -> bool {
|
|
|
|
matches!(self, EventTriggerKind::Import)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
pub fn is_checkout(&self) -> bool {
|
|
|
|
matches!(self, EventTriggerKind::Checkout)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-31 03:49:55 +00:00
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct DiffEvent<'a> {
|
2024-02-08 06:03:15 +00:00
|
|
|
/// The receiver of the event.
|
|
|
|
pub current_target: Option<ContainerID>,
|
|
|
|
/// A list of events that should be received by the current target.
|
|
|
|
pub events: &'a [&'a ContainerDiff],
|
|
|
|
pub event_meta: &'a DocDiff,
|
2022-11-23 08:26:38 +00:00
|
|
|
}
|
|
|
|
|
2023-07-31 03:49:55 +00:00
|
|
|
/// It's the exposed event type.
|
|
|
|
/// It's exposed to the user. The user can use this to apply the diff to their local state.
|
|
|
|
///
|
|
|
|
/// [DocDiff] may include the diff that calculated from several transactions and imports.
|
|
|
|
/// They all should have the same origin and local flag.
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct DocDiff {
|
|
|
|
pub from: Frontiers,
|
|
|
|
pub to: Frontiers,
|
|
|
|
pub origin: InternalString,
|
2024-04-03 09:56:01 +00:00
|
|
|
pub by: EventTriggerKind,
|
2023-07-31 03:49:55 +00:00
|
|
|
pub diff: Vec<ContainerDiff>,
|
2022-11-23 08:26:38 +00:00
|
|
|
}
|
|
|
|
|
2023-11-08 08:15:11 +00:00
|
|
|
impl DocDiff {
|
|
|
|
/// Get the unique id of the diff.
|
|
|
|
pub fn id(&self) -> u64 {
|
|
|
|
let mut hasher = FxHasher64::default();
|
|
|
|
self.from.hash(&mut hasher);
|
|
|
|
self.to.hash(&mut hasher);
|
|
|
|
hasher.finish()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-31 03:49:55 +00:00
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub(crate) struct InternalContainerDiff {
|
|
|
|
pub(crate) idx: ContainerIdx,
|
2023-11-05 07:53:33 +00:00
|
|
|
// If true, this event is created by the container which was resurrected by another container
|
|
|
|
pub(crate) bring_back: bool,
|
2023-10-30 06:16:50 +00:00
|
|
|
pub(crate) is_container_deleted: bool,
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
pub(crate) diff: DiffVariant,
|
2023-10-29 06:02:13 +00:00
|
|
|
}
|
|
|
|
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
#[derive(Default, Debug, Clone, EnumAsInner)]
|
2023-10-29 06:02:13 +00:00
|
|
|
pub(crate) enum DiffVariant {
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
#[default]
|
|
|
|
None,
|
2023-10-29 06:02:13 +00:00
|
|
|
Internal(InternalDiff),
|
|
|
|
External(Diff),
|
2022-12-30 09:50:23 +00:00
|
|
|
}
|
|
|
|
|
2023-07-31 03:49:55 +00:00
|
|
|
/// It's used for transmitting and recording the diff internally.
|
|
|
|
///
|
|
|
|
/// It can be convert into a [DocDiff].
|
|
|
|
// Internally, we need to batch the diff then calculate the event. Because
|
|
|
|
// we need to sort the diff by containers' created time, to make sure the
|
|
|
|
// the path to each container is up-to-date.
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub(crate) struct InternalDocDiff<'a> {
|
|
|
|
pub(crate) origin: InternalString,
|
2024-04-03 09:56:01 +00:00
|
|
|
pub(crate) by: EventTriggerKind,
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
/// The values inside this array is in random order
|
2023-07-31 03:49:55 +00:00
|
|
|
pub(crate) diff: Cow<'a, [InternalContainerDiff]>,
|
|
|
|
pub(crate) new_version: Cow<'a, Frontiers>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> InternalDocDiff<'a> {
|
|
|
|
pub fn into_owned(self) -> InternalDocDiff<'static> {
|
|
|
|
InternalDocDiff {
|
|
|
|
origin: self.origin,
|
2024-04-03 09:56:01 +00:00
|
|
|
by: self.by,
|
2023-07-31 03:49:55 +00:00
|
|
|
diff: Cow::Owned((*self.diff).to_owned()),
|
|
|
|
new_version: Cow::Owned((*self.new_version).to_owned()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn can_merge(&self, other: &Self) -> bool {
|
2024-04-03 09:56:01 +00:00
|
|
|
self.by == other.by
|
2023-07-31 03:49:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-03 01:29:25 +00:00
|
|
|
pub type Path = SmallVec<[Index; 4]>;
|
2022-11-23 08:26:38 +00:00
|
|
|
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, enum_as_inner::EnumAsInner)]
|
2022-11-23 08:26:38 +00:00
|
|
|
pub enum Index {
|
|
|
|
Key(InternalString),
|
2022-11-23 10:12:23 +00:00
|
|
|
Seq(usize),
|
2023-10-30 03:13:52 +00:00
|
|
|
Node(TreeID),
|
2022-11-23 08:26:38 +00:00
|
|
|
}
|
|
|
|
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
impl std::fmt::Debug for Index {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
Self::Key(arg0) => write!(f, "Index::Key(\"{}\")", arg0),
|
|
|
|
Self::Seq(arg0) => write!(f, "Index::Seq({})", arg0),
|
|
|
|
Self::Node(arg0) => write!(f, "Index::Node({})", arg0),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-01 09:25:12 +00:00
|
|
|
impl std::fmt::Display for Index {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self {
|
|
|
|
Index::Key(key) => write!(f, "{}", key),
|
|
|
|
Index::Seq(s) => write!(f, "{}", s),
|
|
|
|
Index::Node(id) => write!(f, "{}@{}", id.peer, id.counter),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TryFrom<&str> for Index {
|
|
|
|
type Error = &'static str;
|
|
|
|
fn try_from(s: &str) -> Result<Self, &'static str> {
|
|
|
|
if s.is_empty() {
|
|
|
|
return Ok(Index::Key(InternalString::default()));
|
|
|
|
}
|
|
|
|
|
|
|
|
let c = s.chars().next().unwrap();
|
|
|
|
if c.is_ascii_digit() {
|
|
|
|
if let Ok(seq) = s.parse::<usize>() {
|
|
|
|
Ok(Index::Seq(seq))
|
|
|
|
} else if let Ok(id) = s.try_into() {
|
|
|
|
Ok(Index::Node(id))
|
|
|
|
} else {
|
|
|
|
Ok(Index::Key(InternalString::from(s)))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Ok(Index::Key(InternalString::from(s)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-29 06:02:13 +00:00
|
|
|
impl DiffVariant {
|
|
|
|
pub fn compose(self, other: Self) -> Result<Self, Self> {
|
|
|
|
match (self, other) {
|
|
|
|
(DiffVariant::Internal(a), DiffVariant::Internal(b)) => {
|
|
|
|
Ok(DiffVariant::Internal(a.compose(b)?))
|
|
|
|
}
|
|
|
|
(DiffVariant::External(a), DiffVariant::External(b)) => {
|
|
|
|
Ok(DiffVariant::External(a.compose(b)?))
|
|
|
|
}
|
|
|
|
(a, _) => Err(a),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-30 03:13:52 +00:00
|
|
|
#[non_exhaustive]
|
2024-01-30 01:54:54 +00:00
|
|
|
#[derive(Clone, Debug, EnumAsInner)]
|
2023-11-04 12:03:43 +00:00
|
|
|
pub(crate) enum InternalDiff {
|
feat: stabilizing encoding (#219)
This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/).
The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version.
This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR.
# Encoding Schema
## Header
The header has 22 bytes.
- (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes.
- (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum.
- (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version.
## Encode Mode: Updates
In this approach, only ops, specifically their historical record, are encoded, while document states are excluded.
Like Automerge's format, we employ columnar encoding for operations and changes.
Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential.
## Encode Mode: Snapshot
This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations.
Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible.
Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
|
|
|
ListRaw(Delta<SliceRanges>),
|
2023-10-30 03:13:52 +00:00
|
|
|
/// This always uses entity indexes.
|
|
|
|
RichtextRaw(Delta<RichtextStateChunk>),
|
|
|
|
Map(MapDelta),
|
|
|
|
Tree(TreeDelta),
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
MovableList(MovableListInnerDelta),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
Counter(i64),
|
|
|
|
Unknown,
|
2023-10-30 03:13:52 +00:00
|
|
|
}
|
|
|
|
|
2023-10-29 06:02:13 +00:00
|
|
|
impl From<InternalDiff> for DiffVariant {
|
|
|
|
fn from(diff: InternalDiff) -> Self {
|
|
|
|
DiffVariant::Internal(diff)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub struct ListDeltaMeta {
|
|
|
|
/// Whether the content of the insert is moved from
|
|
|
|
/// a deletion in the same delta and **the value is not changed**.
|
|
|
|
///
|
|
|
|
/// If true, this op must be a move op under the hood.
|
|
|
|
/// But an insert created by a move op doesn't necessarily
|
|
|
|
/// have this flag, because the insert content may not
|
|
|
|
/// be moved from a deletion in the same delta.
|
|
|
|
pub from_move: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Meta for ListDeltaMeta {
|
|
|
|
fn is_empty(&self) -> bool {
|
|
|
|
!self.from_move
|
|
|
|
}
|
|
|
|
|
|
|
|
fn compose(
|
|
|
|
&mut self,
|
|
|
|
other: &Self,
|
|
|
|
type_pair: (crate::delta::DeltaType, crate::delta::DeltaType),
|
|
|
|
) {
|
|
|
|
// We can't have two Some because we don't have `move_from` for Retain.
|
|
|
|
// And this function is only called when composing a insert/retain with a retain.
|
|
|
|
if let (crate::delta::DeltaType::Insert, crate::delta::DeltaType::Insert) = type_pair {
|
|
|
|
unreachable!()
|
|
|
|
}
|
|
|
|
|
|
|
|
self.from_move = self.from_move || other.from_move;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_mergeable(&self, other: &Self) -> bool {
|
|
|
|
self.from_move == other.from_move
|
|
|
|
}
|
|
|
|
|
|
|
|
fn merge(&mut self, _other: &Self) {}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DeltaAttr for ListDeltaMeta {
|
|
|
|
fn compose(&mut self, other: &Self) {
|
|
|
|
self.from_move = self.from_move || other.from_move;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn attr_is_empty(&self) -> bool {
|
|
|
|
!self.from_move
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-24 05:53:26 +00:00
|
|
|
pub type ListDiffInsertItem = ArrayVec<ValueOrHandler, 8>;
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
pub type ListDiffItem = DeltaItem<ListDiffInsertItem, ListDeltaMeta>;
|
|
|
|
pub type ListDiff = DeltaRope<ListDiffInsertItem, ListDeltaMeta>;
|
2024-04-24 05:53:26 +00:00
|
|
|
|
|
|
|
pub type TextDiffItem = DeltaItem<StringSlice, StyleMeta>;
|
|
|
|
pub type TextDiff = DeltaRope<StringSlice, StyleMeta>;
|
|
|
|
|
2023-07-28 05:38:52 +00:00
|
|
|
/// Diff is the diff between two versions of a container.
|
|
|
|
/// It's used to describe the change of a container and the events.
|
|
|
|
///
|
|
|
|
/// # Internal
|
|
|
|
///
|
2023-10-29 06:02:13 +00:00
|
|
|
/// Text index variants:
|
2023-07-28 05:38:52 +00:00
|
|
|
///
|
|
|
|
/// - When `wasm` is enabled, it should use utf16 indexes.
|
2023-10-29 06:02:13 +00:00
|
|
|
/// - When `wasm` is disabled, it should use unicode indexes.
|
|
|
|
#[non_exhaustive]
|
2023-12-05 03:57:41 +00:00
|
|
|
#[derive(Clone, Debug, EnumAsInner)]
|
2022-11-23 08:26:38 +00:00
|
|
|
pub enum Diff {
|
2024-04-24 05:53:26 +00:00
|
|
|
List(ListDiff),
|
2024-01-19 14:20:27 +00:00
|
|
|
// TODO: refactor, doesn't make much sense to use `StyleMeta` here, because sometime style
|
|
|
|
// don't have peer and lamport info
|
2023-10-29 06:02:13 +00:00
|
|
|
/// - When feature `wasm` is enabled, it should use utf16 indexes.
|
|
|
|
/// - When feature `wasm` is disabled, it should use unicode indexes.
|
2024-04-24 05:53:26 +00:00
|
|
|
Text(TextDiff),
|
2023-12-05 03:57:41 +00:00
|
|
|
Map(ResolvedMapDelta),
|
2023-11-05 07:53:33 +00:00
|
|
|
Tree(TreeDiff),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
Counter(i64),
|
|
|
|
Unknown,
|
2022-11-23 08:26:38 +00:00
|
|
|
}
|
|
|
|
|
2023-12-05 03:57:41 +00:00
|
|
|
impl From<Diff> for DiffVariant {
|
|
|
|
fn from(diff: Diff) -> Self {
|
|
|
|
DiffVariant::External(diff)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-29 06:02:13 +00:00
|
|
|
impl InternalDiff {
|
2023-10-30 06:16:50 +00:00
|
|
|
pub(crate) fn is_empty(&self) -> bool {
|
|
|
|
match self {
|
feat: stabilizing encoding (#219)
This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/).
The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version.
This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR.
# Encoding Schema
## Header
The header has 22 bytes.
- (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes.
- (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum.
- (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version.
## Encode Mode: Updates
In this approach, only ops, specifically their historical record, are encoded, while document states are excluded.
Like Automerge's format, we employ columnar encoding for operations and changes.
Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential.
## Encode Mode: Snapshot
This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations.
Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible.
Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
|
|
|
InternalDiff::ListRaw(s) => s.is_empty(),
|
2023-10-30 06:16:50 +00:00
|
|
|
InternalDiff::RichtextRaw(t) => t.is_empty(),
|
|
|
|
InternalDiff::Map(m) => m.updated.is_empty(),
|
2023-11-05 07:53:33 +00:00
|
|
|
InternalDiff::Tree(t) => t.is_empty(),
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
InternalDiff::MovableList(t) => t.is_empty(),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
InternalDiff::Counter(c) => *c == 0,
|
|
|
|
InternalDiff::Unknown => true,
|
2023-10-30 06:16:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-29 06:02:13 +00:00
|
|
|
pub(crate) fn compose(self, diff: InternalDiff) -> Result<Self, Self> {
|
|
|
|
// PERF: avoid clone
|
|
|
|
match (self, diff) {
|
feat: stabilizing encoding (#219)
This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/).
The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version.
This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR.
# Encoding Schema
## Header
The header has 22 bytes.
- (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes.
- (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum.
- (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version.
## Encode Mode: Updates
In this approach, only ops, specifically their historical record, are encoded, while document states are excluded.
Like Automerge's format, we employ columnar encoding for operations and changes.
Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential.
## Encode Mode: Snapshot
This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations.
Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible.
Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
|
|
|
(InternalDiff::ListRaw(a), InternalDiff::ListRaw(b)) => {
|
|
|
|
Ok(InternalDiff::ListRaw(a.compose(b)))
|
2023-10-29 06:02:13 +00:00
|
|
|
}
|
|
|
|
(InternalDiff::RichtextRaw(a), InternalDiff::RichtextRaw(b)) => {
|
|
|
|
Ok(InternalDiff::RichtextRaw(a.compose(b)))
|
|
|
|
}
|
|
|
|
(InternalDiff::Map(a), InternalDiff::Map(b)) => Ok(InternalDiff::Map(a.compose(b))),
|
2023-10-30 03:13:52 +00:00
|
|
|
(InternalDiff::Tree(a), InternalDiff::Tree(b)) => Ok(InternalDiff::Tree(a.compose(b))),
|
2023-10-29 06:02:13 +00:00
|
|
|
(a, _) => Err(a),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-22 11:02:22 +00:00
|
|
|
impl Diff {
|
2024-05-20 22:14:49 +00:00
|
|
|
pub(crate) fn compose_ref(&mut self, diff: &Diff) {
|
|
|
|
// PERF: avoid clone
|
|
|
|
match (self, diff) {
|
|
|
|
(Diff::List(a), Diff::List(b)) => {
|
|
|
|
a.compose(b);
|
|
|
|
}
|
|
|
|
(Diff::Text(a), Diff::Text(b)) => {
|
|
|
|
a.compose(b);
|
|
|
|
}
|
|
|
|
(Diff::Map(a), Diff::Map(b)) => {
|
|
|
|
*a = a.clone().compose(b.clone());
|
|
|
|
}
|
|
|
|
(Diff::Tree(a), Diff::Tree(b)) => {
|
|
|
|
*a = a.clone().compose(b.clone());
|
|
|
|
}
|
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
(Diff::Counter(a), Diff::Counter(b)) => *a += b,
|
|
|
|
(_, _) => unreachable!(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-05 03:57:41 +00:00
|
|
|
pub(crate) fn compose(self, diff: Diff) -> Result<Self, Self> {
|
2023-07-22 11:02:22 +00:00
|
|
|
// PERF: avoid clone
|
|
|
|
match (self, diff) {
|
2024-04-24 05:53:26 +00:00
|
|
|
(Diff::List(mut a), Diff::List(b)) => {
|
|
|
|
a.compose(&b);
|
|
|
|
Ok(Diff::List(a))
|
|
|
|
}
|
|
|
|
(Diff::Text(mut a), Diff::Text(b)) => {
|
|
|
|
a.compose(&b);
|
|
|
|
Ok(Diff::Text(a))
|
|
|
|
}
|
2023-12-05 03:57:41 +00:00
|
|
|
(Diff::Map(a), Diff::Map(b)) => Ok(Diff::Map(a.compose(b))),
|
2023-10-30 03:13:52 +00:00
|
|
|
|
|
|
|
(Diff::Tree(a), Diff::Tree(b)) => Ok(Diff::Tree(a.compose(b))),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
(Diff::Counter(a), Diff::Counter(b)) => Ok(Diff::Counter(a + b)),
|
2023-07-22 11:02:22 +00:00
|
|
|
(a, _) => Err(a),
|
|
|
|
}
|
|
|
|
}
|
2023-11-05 07:53:33 +00:00
|
|
|
|
2024-05-20 22:14:49 +00:00
|
|
|
// Transform this diff based on the other diff
|
|
|
|
pub(crate) fn transform(&mut self, other: &Self, left_prior: bool) {
|
|
|
|
match (self, other) {
|
|
|
|
(Diff::List(a), Diff::List(b)) => a.transform_(b, left_prior),
|
|
|
|
(Diff::Text(a), Diff::Text(b)) => a.transform_(b, left_prior),
|
|
|
|
(Diff::Map(a), Diff::Map(b)) => a.transform(b, left_prior),
|
|
|
|
(Diff::Tree(a), Diff::Tree(b)) => a.transform(b, left_prior),
|
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
(Diff::Counter(a), Diff::Counter(b)) => {
|
|
|
|
if left_prior {
|
|
|
|
*a += b;
|
|
|
|
} else {
|
|
|
|
*a -= b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[allow(unused)]
|
2023-11-05 07:53:33 +00:00
|
|
|
pub(crate) fn is_empty(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Diff::List(s) => s.is_empty(),
|
|
|
|
Diff::Text(t) => t.is_empty(),
|
2023-12-05 03:57:41 +00:00
|
|
|
Diff::Map(m) => m.updated.is_empty(),
|
2023-11-05 07:53:33 +00:00
|
|
|
Diff::Tree(t) => t.diff.is_empty(),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
Diff::Counter(c) => *c == 0,
|
|
|
|
Diff::Unknown => true,
|
2023-11-05 07:53:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Movable List (#293)
* bk: add move op content
* bk: add inner_movable_list diff and related stuff
* perf: high perf state
* fix: update old list item cache
* fix: should use id in del
* feat: two kinds of len for movable list state
* bk: add op index to movable list
* bk: make basic handler test pass
* refactor: add move_from to list event
* fix: make all existing tests pass
* bk: list move event hint into event
* bk: convert inner event into user event
Co-authored-by: Leon <leeeon233@gmail.com>
* fix: convert issue when inserting new value
* feat: add op group for movable list
* feat: diff calc
* feat: add mov support to tracker
* fix: when applying diff, state should be force update
* feat: encoded op
* feat: snapshot encode
* fix: pass basic sync
* fix: snapshot encode/decode
* fix: warnings
* feat: expose mov list to loro crate
* test: fuzz movable list
* test: fix fuzz integration
* fix: movable list basic move sync
* fix: movable list events
* fix: movable event err
* fix: register child container on movable list
* fix: should not return child index if the value is already overwritten
* fix: local event err in movable list
* fix: get elem at pos
* refactor: extract mut op that could break invariants
* fix: event err
* fix: child container to elem err
* fix: bringback event issue
* fix: event err
* fix: event emit
* fix: id to cursor iter issue
* chore: fix a few warnings
* fix: warnings
* fix: fix move in tracker
* test: add consistency check
* test: fix tracker
* refactor: simplify event conversion in docstate
* refactor: refine move event
* refactor: simplify the maintain of parent child links
* fix: revive err
* fix: warnings
* fix: it's possible that pos change but cannot find the respective list item
* fix: elem may be dropped after snapshot
* fix: warnings
* fix: richtext time travel issue
* fix: move op used wrong delete id on tracker
* fix: handle events created by concurrent moves correctly
* fix: event hint error, used op index for list event
* fix: move_from flag err
* fix: id to cursor get err
* test: add mov fuzz target
* fix: the pos of inserting new container
* fix: used wrong event hint index
* fix: del event hint
* fix: warnings
* fix: internal diff to event err
* fix: event's move flag error
This "move" flag does not actually mean that the insertion
is caused by the move op.
就算是 move 造成的它不一定就能是 true
它得是下游真的能在“前一个版本的 array 里找到“,才能是 true
* fix: remove redundant elements from the movable list
The Movable List is currently flawed; an element may not exist on the movable list state, yet there are operations that revive its corresponding list item. In such cases, the diff calculation does not send back the corresponding element state (this occurs when tracing back, which fuzz testing currently does not cover. It might only be exposed by randomly switching to a version and then checking for consistency; otherwise, as long as all elements are in memory, this problem does not arise).
Moreover, there is no need to store elements in the state that do not have a corresponding list item. They will be deleted during the Snapshot, and relying on "them still being in the state" is incorrect behavior. Such adjustments also eliminate the need to maintain the `pending_elements` field.
By allowing the opgroup to record the mapping from pos id to state id, we can ensure that the events sent to the movable list state will include the corresponding state.
Movable List 现在是有错的,elem 可能不存在 movable list state 上,但是又有操作把它对应的 list item 复活了,此时 diff calc 不会把对应 element 状态发送回来(往前回溯的时候会出现,fuzz 现在没覆盖到。得有随意切换一个版本然后 check consistency 才可能会暴露;否则现在大家 elements 都在内存,就没这个问题)
而且我们没有必要在状态中存储没有对应 list item 的 element。在 Snapshot 的时候它们都会被删掉,如果依赖了“它们还会在 state 内”就是错误的行为。这样的调整也让我们不需要去维护 pending_elements 这个 field 了
通过让 opgroup 记录了 pos id → state id 的映射,可以保证发给 movable list state 的事件中会带上对应的 state
* test: make fuzzer stricter
* test: test expectation error
* refactor: rename stable pos to cursor
* tests: chore list bench init
* test: add bench
* bench: add mov & set bench
* feat(wasm): movable list js api
* fix: make movablelist able to attach even if it's already attached & refine the type of subscribe
* fix: remove the loro doc param in .unsub
* refactor: refine ts types and export setContainer api
* chore: fix warnings
* chore: rm debug logs
* perf: reduce mem usage of opgroup
* bench: add list criterion bench
---------
Co-authored-by: Leon <leeeon233@gmail.com>
2024-04-26 04:08:53 +00:00
|
|
|
#[allow(unused)]
|
2023-11-05 07:53:33 +00:00
|
|
|
pub(crate) fn concat(self, diff: Diff) -> Diff {
|
|
|
|
match (self, diff) {
|
2024-04-24 05:53:26 +00:00
|
|
|
(Diff::List(mut a), Diff::List(b)) => {
|
|
|
|
a.compose(&b);
|
|
|
|
Diff::List(a)
|
|
|
|
}
|
|
|
|
(Diff::Text(mut a), Diff::Text(b)) => {
|
|
|
|
a.compose(&b);
|
|
|
|
Diff::Text(a)
|
|
|
|
}
|
2023-12-05 03:57:41 +00:00
|
|
|
(Diff::Map(a), Diff::Map(b)) => {
|
2023-11-05 07:53:33 +00:00
|
|
|
let mut a = a;
|
|
|
|
for (k, v) in b.updated {
|
|
|
|
a = a.with_entry(k, v);
|
|
|
|
}
|
2023-12-05 03:57:41 +00:00
|
|
|
Diff::Map(a)
|
2023-11-05 07:53:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
(Diff::Tree(a), Diff::Tree(b)) => Diff::Tree(a.extend(b.diff)),
|
2024-05-13 13:37:10 +00:00
|
|
|
#[cfg(feature = "counter")]
|
|
|
|
(Diff::Counter(a), Diff::Counter(b)) => Diff::Counter(a + b),
|
2023-11-05 07:53:33 +00:00
|
|
|
_ => unreachable!(),
|
|
|
|
}
|
|
|
|
}
|
2024-05-23 02:19:08 +00:00
|
|
|
|
|
|
|
/// Transform the cursor based on this diff
|
|
|
|
pub(crate) fn transform_cursor(&self, pos: usize, left_prior: bool) -> usize {
|
2024-06-06 08:54:52 +00:00
|
|
|
match self {
|
2024-05-23 02:19:08 +00:00
|
|
|
Diff::List(list) => list.transform_pos(pos, left_prior),
|
|
|
|
Diff::Text(text) => text.transform_pos(pos, left_prior),
|
|
|
|
_ => pos,
|
2024-06-06 08:54:52 +00:00
|
|
|
}
|
2024-05-23 02:19:08 +00:00
|
|
|
}
|
2023-07-22 11:02:22 +00:00
|
|
|
}
|
2023-12-05 03:57:41 +00:00
|
|
|
|
2024-04-01 09:25:12 +00:00
|
|
|
pub fn str_to_path(s: &str) -> Option<Vec<Index>> {
|
|
|
|
s.split('/').map(|x| x.try_into()).try_collect().ok()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn path_to_str(path: &[Index]) -> String {
|
|
|
|
path.iter().map(|x| x.to_string()).join("/")
|
|
|
|
}
|
|
|
|
|
2023-12-05 03:57:41 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use std::sync::Arc;
|
|
|
|
|
2024-02-08 06:03:15 +00:00
|
|
|
use itertools::Itertools;
|
2023-12-05 03:57:41 +00:00
|
|
|
use loro_common::LoroValue;
|
|
|
|
|
|
|
|
use crate::{ApplyDiff, LoroDoc};
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_text_event() {
|
|
|
|
let loro = LoroDoc::new();
|
|
|
|
loro.subscribe_root(Arc::new(|event| {
|
|
|
|
let mut value = LoroValue::String(Default::default());
|
2024-02-08 06:03:15 +00:00
|
|
|
value.apply_diff(&event.events.iter().map(|x| x.diff.clone()).collect_vec());
|
2023-12-05 03:57:41 +00:00
|
|
|
assert_eq!(value, "h223ello".into());
|
|
|
|
}));
|
|
|
|
let mut txn = loro.txn().unwrap();
|
|
|
|
let text = loro.get_text("id");
|
|
|
|
text.insert_with_txn(&mut txn, 0, "hello").unwrap();
|
|
|
|
text.insert_with_txn(&mut txn, 1, "223").unwrap();
|
|
|
|
txn.commit().unwrap();
|
|
|
|
}
|
|
|
|
}
|