loro/crates/loro-internal/src/event.rs

311 lines
9.3 KiB
Rust
Raw Normal View History

use enum_as_inner::EnumAsInner;
use fxhash::FxHasher64;
use itertools::Itertools;
2022-11-29 10:31:57 +00:00
use serde::{Deserialize, Serialize};
2023-03-10 02:50:05 +00:00
use smallvec::SmallVec;
2022-11-23 08:26:38 +00:00
2023-03-01 13:37:58 +00:00
use crate::{
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
container::richtext::richtext_state::RichtextStateChunk,
delta::{Delta, MapDelta, ResolvedMapDelta, StyleMeta, TreeDelta, TreeDiff},
handler::ValueOrHandler,
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
op::SliceRanges,
utils::string_slice::StringSlice,
InternalString,
2023-03-01 13:37:58 +00:00
};
2022-11-23 17:01:40 +00:00
use std::{
borrow::Cow,
hash::{Hash, Hasher},
};
2024-04-01 09:42:02 +00:00
use loro_common::{ContainerID, TreeID};
use crate::{container::idx::ContainerIdx, version::Frontiers};
#[derive(Debug, Clone)]
pub struct ContainerDiff {
pub id: ContainerID,
pub path: Vec<(ContainerID, Index)>,
pub(crate) idx: ContainerIdx,
pub diff: Diff,
}
#[derive(Debug, Clone)]
pub struct DiffEvent<'a> {
/// The receiver of the event.
pub current_target: Option<ContainerID>,
/// A list of events that should be received by the current target.
pub events: &'a [&'a ContainerDiff],
pub event_meta: &'a DocDiff,
2022-11-23 08:26:38 +00:00
}
/// It's the exposed event type.
/// It's exposed to the user. The user can use this to apply the diff to their local state.
///
/// [DocDiff] may include the diff that calculated from several transactions and imports.
/// They all should have the same origin and local flag.
#[derive(Debug, Clone)]
pub struct DocDiff {
pub from: Frontiers,
pub to: Frontiers,
pub origin: InternalString,
pub local: bool,
/// Whether the diff is created from the checkout operation.
pub from_checkout: bool,
pub diff: Vec<ContainerDiff>,
2022-11-23 08:26:38 +00:00
}
impl DocDiff {
/// Get the unique id of the diff.
pub fn id(&self) -> u64 {
let mut hasher = FxHasher64::default();
self.from.hash(&mut hasher);
self.to.hash(&mut hasher);
hasher.finish()
}
}
#[derive(Debug, Clone)]
pub(crate) struct InternalContainerDiff {
pub(crate) idx: ContainerIdx,
// If true, this event is created by the container which was resurrected by another container
pub(crate) bring_back: bool,
pub(crate) is_container_deleted: bool,
pub(crate) diff: Option<DiffVariant>,
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
}
#[derive(Debug, Clone, EnumAsInner)]
pub(crate) enum DiffVariant {
Internal(InternalDiff),
External(Diff),
2022-12-30 09:50:23 +00:00
}
/// It's used for transmitting and recording the diff internally.
///
/// It can be convert into a [DocDiff].
// Internally, we need to batch the diff then calculate the event. Because
// we need to sort the diff by containers' created time, to make sure the
// the path to each container is up-to-date.
#[derive(Debug, Clone)]
pub(crate) struct InternalDocDiff<'a> {
pub(crate) origin: InternalString,
pub(crate) local: bool,
pub(crate) from_checkout: bool,
pub(crate) diff: Cow<'a, [InternalContainerDiff]>,
pub(crate) new_version: Cow<'a, Frontiers>,
}
impl<'a> InternalDocDiff<'a> {
pub fn into_owned(self) -> InternalDocDiff<'static> {
InternalDocDiff {
origin: self.origin,
local: self.local,
from_checkout: self.from_checkout,
diff: Cow::Owned((*self.diff).to_owned()),
new_version: Cow::Owned((*self.new_version).to_owned()),
}
}
pub fn can_merge(&self, other: &Self) -> bool {
self.origin == other.origin && self.local == other.local
}
}
pub type Path = SmallVec<[Index; 4]>;
2022-11-23 08:26:38 +00:00
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, enum_as_inner::EnumAsInner)]
2022-11-23 08:26:38 +00:00
pub enum Index {
Key(InternalString),
2022-11-23 10:12:23 +00:00
Seq(usize),
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
Node(TreeID),
2022-11-23 08:26:38 +00:00
}
impl std::fmt::Display for Index {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Index::Key(key) => write!(f, "{}", key),
Index::Seq(s) => write!(f, "{}", s),
Index::Node(id) => write!(f, "{}@{}", id.peer, id.counter),
}
}
}
impl TryFrom<&str> for Index {
type Error = &'static str;
fn try_from(s: &str) -> Result<Self, &'static str> {
if s.is_empty() {
return Ok(Index::Key(InternalString::default()));
}
let c = s.chars().next().unwrap();
if c.is_ascii_digit() {
if let Ok(seq) = s.parse::<usize>() {
Ok(Index::Seq(seq))
} else if let Ok(id) = s.try_into() {
Ok(Index::Node(id))
} else {
Ok(Index::Key(InternalString::from(s)))
}
} else {
Ok(Index::Key(InternalString::from(s)))
}
}
}
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
impl DiffVariant {
pub fn compose(self, other: Self) -> Result<Self, Self> {
match (self, other) {
(DiffVariant::Internal(a), DiffVariant::Internal(b)) => {
Ok(DiffVariant::Internal(a.compose(b)?))
}
(DiffVariant::External(a), DiffVariant::External(b)) => {
Ok(DiffVariant::External(a.compose(b)?))
}
(a, _) => Err(a),
}
}
}
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
#[non_exhaustive]
#[derive(Clone, Debug, EnumAsInner)]
2023-11-04 12:03:43 +00:00
pub(crate) enum InternalDiff {
feat: stabilizing encoding (#219) This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/). The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version. This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR. # Encoding Schema ## Header The header has 22 bytes. - (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes. - (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum. - (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version. ## Encode Mode: Updates In this approach, only ops, specifically their historical record, are encoded, while document states are excluded. Like Automerge's format, we employ columnar encoding for operations and changes. Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential. ## Encode Mode: Snapshot This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations. Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible. Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
ListRaw(Delta<SliceRanges>),
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
/// This always uses entity indexes.
RichtextRaw(Delta<RichtextStateChunk>),
Map(MapDelta),
Tree(TreeDelta),
}
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
impl From<InternalDiff> for DiffVariant {
fn from(diff: InternalDiff) -> Self {
DiffVariant::Internal(diff)
}
}
2023-07-28 05:38:52 +00:00
/// Diff is the diff between two versions of a container.
/// It's used to describe the change of a container and the events.
///
/// # Internal
///
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
/// Text index variants:
2023-07-28 05:38:52 +00:00
///
/// - When `wasm` is enabled, it should use utf16 indexes.
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
/// - When `wasm` is disabled, it should use unicode indexes.
#[non_exhaustive]
#[derive(Clone, Debug, EnumAsInner)]
2022-11-23 08:26:38 +00:00
pub enum Diff {
List(Delta<Vec<ValueOrHandler>>),
// TODO: refactor, doesn't make much sense to use `StyleMeta` here, because sometime style
// don't have peer and lamport info
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
/// - When feature `wasm` is enabled, it should use utf16 indexes.
/// - When feature `wasm` is disabled, it should use unicode indexes.
Text(Delta<StringSlice, StyleMeta>),
Map(ResolvedMapDelta),
Tree(TreeDiff),
2022-11-23 08:26:38 +00:00
}
impl From<Diff> for DiffVariant {
fn from(diff: Diff) -> Self {
DiffVariant::External(diff)
}
}
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
impl InternalDiff {
pub(crate) fn is_empty(&self) -> bool {
match self {
feat: stabilizing encoding (#219) This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/). The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version. This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR. # Encoding Schema ## Header The header has 22 bytes. - (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes. - (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum. - (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version. ## Encode Mode: Updates In this approach, only ops, specifically their historical record, are encoded, while document states are excluded. Like Automerge's format, we employ columnar encoding for operations and changes. Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential. ## Encode Mode: Snapshot This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations. Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible. Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
InternalDiff::ListRaw(s) => s.is_empty(),
InternalDiff::RichtextRaw(t) => t.is_empty(),
InternalDiff::Map(m) => m.updated.is_empty(),
InternalDiff::Tree(t) => t.is_empty(),
}
}
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
pub(crate) fn compose(self, diff: InternalDiff) -> Result<Self, Self> {
// PERF: avoid clone
match (self, diff) {
feat: stabilizing encoding (#219) This PR implements a new encode schema that is more extendible and more compact. It’s also simpler and takes less binary size and maintaining effort. It is inspired by the [Automerge Encoding Format](https://automerge.org/automerge-binary-format-spec/). The main motivation is the extensibility. When we integrate a new CRDT algorithm, we don’t want to make a breaking change to the encoding or keep multiple versions of the encoding schema in the code, as it will make our WASM size much larger. We need a stable and extendible encoding schema for our v1.0 version. This PR also exposes the ops that compose the current container state. For example, now you can make a query about which operation a certain character quickly. This behavior is required in the new snapshot encoding, so it’s included in this PR. # Encoding Schema ## Header The header has 22 bytes. - (0-4 bytes) Magic Bytes: The encoding starts with `loro` as magic bytes. - (4-20 bytes) Checksum: MD5 checksum of the encoded data, including the header starting from 20th bytes. The checksum is encoded as a 16-byte array. The `checksum` and `magic bytes` fields are trimmed when calculating the checksum. - (20-21 bytes) Encoding Method (2 bytes, big endian): Multiple encoding methods are available for a specific encoding version. ## Encode Mode: Updates In this approach, only ops, specifically their historical record, are encoded, while document states are excluded. Like Automerge's format, we employ columnar encoding for operations and changes. Previously, operations were ordered by their Operation ID (OpId) before columnar encoding. However, sorting operations based on their respective containers initially enhance compression potential. ## Encode Mode: Snapshot This mode simultaneously captures document state and historical data. Upon importing a snapshot into a new document, initialization occurs directly from the snapshot, bypassing the need for CRDT-based recalculations. Unlike previous snapshot encoding methods, the current binary output in snapshot mode is compatible with the updates mode. This enhances the efficiency of importing snapshots into non-empty documents, where initialization via snapshot is infeasible. Additionally, when feasible, we leverage the sequence of operations to construct state snapshots. In CRDTs, deducing the specific ops constituting the current container state is feasible. These ops are tagged in relation to the container, facilitating direct state reconstruction from them. This approach, pioneered by Automerge, significantly improves compression efficiency.
2024-01-02 09:03:24 +00:00
(InternalDiff::ListRaw(a), InternalDiff::ListRaw(b)) => {
Ok(InternalDiff::ListRaw(a.compose(b)))
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
}
(InternalDiff::RichtextRaw(a), InternalDiff::RichtextRaw(b)) => {
Ok(InternalDiff::RichtextRaw(a.compose(b)))
}
(InternalDiff::Map(a), InternalDiff::Map(b)) => Ok(InternalDiff::Map(a.compose(b))),
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
(InternalDiff::Tree(a), InternalDiff::Tree(b)) => Ok(InternalDiff::Tree(a.compose(b))),
Feat: Peritext-like rich text support (#123) * feat: richtext wip * feat: add insert to style range map wip * feat: richtext state * fix: fix style state inserting and style map * fix: tiny vec merge err * fix: comment err * refactor: use new generic-btree & refine impl * feat: fugue tracker * feat: tracker * feat: tracker * fix: fix a few err in impl * feat: init richtext content state * feat: refactor arena * feat: extract anchor_type info out of style flag * refactor: state apply op more efficiently we can now reuse the repr in state and op * fix: new clippy errors * refactor: use state chunk as delta item * refactor: use two op to insert style start and style end * feat: diff calc * feat: handler * fix: tracker checkout err * fix: pass basic richtext handler tests * fix: pass handler basic marking tests * fix: pass all peritext criteria * feat: snapshot encoding for richtext init * refactor: replace Text with Richtext * refacotr: rm text code * fix: richtext checkout err * refactor: diff of text and map * refactor: del span * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * perf: speedup text insert / del * fix: cursor cache * perf: reduce conversion by introducing InsertText * perf: speed up by refined cursor cache * chore: update gbtree dep * refactor(wasm): use quill delta format * chore: fix warnings
2023-10-29 06:02:13 +00:00
(a, _) => Err(a),
}
}
}
2023-07-22 11:02:22 +00:00
impl Diff {
pub(crate) fn compose(self, diff: Diff) -> Result<Self, Self> {
2023-07-22 11:02:22 +00:00
// PERF: avoid clone
match (self, diff) {
(Diff::List(a), Diff::List(b)) => Ok(Diff::List(a.compose(b))),
(Diff::Text(a), Diff::Text(b)) => Ok(Diff::Text(a.compose(b))),
(Diff::Map(a), Diff::Map(b)) => Ok(Diff::Map(a.compose(b))),
feat: movable tree support (#120) * feat: tree state * feat: tree value * feat: tree handler * fix: tree diff * test: fuzz tree * feat: tree snapshot * fix: tree default value * fix: test new node * fix: tree diff * fix: tree unresolved value * fix: tree fuzz * fix: tree fuzz move * fix: sort by tree id * fix: tree diff sorted by lamport * fix: sort roots before tree converted to string * fix: rebase main * fix: tree fuzz * fix: delete undo * fix: tree to json children sorted * fix: diff calculate * fix: diff cycle move * fix: tree old parent cache * feat: cache * fix: local op add tree cache * fix: don't add same tree move to cache * fix: need update cache * feat: new cache * bench: add checkout bench * chore: clean * fix: apply node uncheck * perf: lamport bound * fix: calc old parent * feat: tree wasm * fix: change tree diff * fix: tree diff retreat * fix: tree diff should not apply when add node * feat: new tree loro value * chore: typo * fix: tree deep value * fix: snapshot tree index -1 * fix: decode tree snapshot use state * fix: release state lock when emit event * fix: tree node meta container * fix: need set map container when covert to local tree op * fix: tree value add deleted * fix: more then one op in a change * fix: tree fuzz deleted equal * fix: tree calc min lamport * feat: tree encoding v2 * doc: movable tree * fix: test tree meta * test: remove import bytes check * refactor: diff of text and map * refactor: del span * perf: tree state use deleted cache * fix: some details * fix: loro js tree create * feat: add un exist tree node * bench: tree depth * fix: check out should emit event * refactor: event * fix: fuzz err * fix: pass all tests * fix: fuzz err * fix: list child cache err * chore: rm debug code * fix: encode enhanced err * fix: encode enchanced * fix: fix several richtext issue * fix: richtext anchor err * chore: rm debug code * fix: richtext fuzz err * feat: speedup text snapshot decode * perf: optimize snapshot encoding * perf: speed up decode & insert * fix: fugue span merge err * perf: speedup delete & id cursor map * fix: fugue merge err * chore: update utils * fix: fix merge * fix: return err apply op * fix: fix merge * fix: get map container as tree meta
2023-10-30 03:13:52 +00:00
(Diff::Tree(a), Diff::Tree(b)) => Ok(Diff::Tree(a.compose(b))),
2023-07-22 11:02:22 +00:00
(a, _) => Err(a),
}
}
pub(crate) fn is_empty(&self) -> bool {
match self {
Diff::List(s) => s.is_empty(),
Diff::Text(t) => t.is_empty(),
Diff::Map(m) => m.updated.is_empty(),
Diff::Tree(t) => t.diff.is_empty(),
}
}
pub(crate) fn concat(self, diff: Diff) -> Diff {
match (self, diff) {
(Diff::List(a), Diff::List(b)) => Diff::List(a.compose(b)),
(Diff::Text(a), Diff::Text(b)) => Diff::Text(a.compose(b)),
(Diff::Map(a), Diff::Map(b)) => {
let mut a = a;
for (k, v) in b.updated {
a = a.with_entry(k, v);
}
Diff::Map(a)
}
(Diff::Tree(a), Diff::Tree(b)) => Diff::Tree(a.extend(b.diff)),
_ => unreachable!(),
}
}
2023-07-22 11:02:22 +00:00
}
pub fn str_to_path(s: &str) -> Option<Vec<Index>> {
s.split('/').map(|x| x.try_into()).try_collect().ok()
}
pub fn path_to_str(path: &[Index]) -> String {
path.iter().map(|x| x.to_string()).join("/")
}
#[cfg(test)]
mod test {
use std::sync::Arc;
use itertools::Itertools;
use loro_common::LoroValue;
use crate::{ApplyDiff, LoroDoc};
#[test]
fn test_text_event() {
let loro = LoroDoc::new();
loro.subscribe_root(Arc::new(|event| {
let mut value = LoroValue::String(Default::default());
value.apply_diff(&event.events.iter().map(|x| x.diff.clone()).collect_vec());
assert_eq!(value, "h223ello".into());
}));
let mut txn = loro.txn().unwrap();
let text = loro.get_text("id");
text.insert_with_txn(&mut txn, 0, "hello").unwrap();
text.insert_with_txn(&mut txn, 1, "223").unwrap();
txn.commit().unwrap();
}
}