From 9544e27be4f9b769330a0fdba8f297e78d651c72 Mon Sep 17 00:00:00 2001 From: leeeon233 Date: Wed, 1 Mar 2023 14:12:05 +0800 Subject: [PATCH] feat: add delta compose --- .vscode/settings.json | 1 + .../src/container/list/list_container.rs | 30 +- .../src/container/text/text_container.rs | 22 +- crates/loro-internal/src/delta.rs | 387 +++++++++++++++++- crates/loro-internal/src/transaction.rs | 2 +- crates/loro-internal/src/transaction/op.rs | 1 + crates/rle/src/rle_trait.rs | 12 + crates/rle/src/rle_vec_old.rs | 6 - 8 files changed, 406 insertions(+), 55 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 24c094cc..55afda20 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,6 +10,7 @@ "LOGSTORE", "napi", "nextest", + "peekable", "Peritext", "RUSTFLAGS", "smstring", diff --git a/crates/loro-internal/src/container/list/list_container.rs b/crates/loro-internal/src/container/list/list_container.rs index 386dd5b5..fb6a54c6 100644 --- a/crates/loro-internal/src/container/list/list_container.rs +++ b/crates/loro-internal/src/container/list/list_container.rs @@ -473,10 +473,8 @@ impl Container for ListContainer { InnerContent::List(op) => match op { InnerListOp::Insert { slice, pos } => { if should_notify { - let mut delta = Delta::new(); let delta_vec = self.raw_data.slice(&slice.0).to_vec(); - delta.retain(*pos); - delta.insert(delta_vec); + let delta = Delta::new().retain(*pos).insert(delta_vec); context.push_diff(&self.id, Diff::List(delta)); } @@ -485,9 +483,9 @@ impl Container for ListContainer { } InnerListOp::Delete(span) => { if should_notify { - let mut delta = Delta::new(); - delta.retain(span.start() as usize); - delta.delete(span.atom_len()); + let delta = Delta::new() + .retain(span.start() as usize) + .delete(span.atom_len()); context.push_diff(&self.id, Diff::List(delta)); } @@ -543,9 +541,7 @@ impl Container for ListContainer { match effect { Effect::Del { pos, len } => { if should_notify { - let mut delta = Delta::new(); - delta.retain(pos); - delta.delete(len); + let delta = Delta::new().retain(pos).delete(len); diff.push(Diff::List(delta)); } @@ -568,15 +564,12 @@ impl Container for ListContainer { } Effect::Ins { pos, content } => { if should_notify { - let mut delta = Delta::new(); - delta.retain(pos); let s = if content.is_unknown() { (0..content.atom_len()).map(|_| LoroValue::Null).collect() } else { self.raw_data.slice(&content.0).to_vec() }; - - delta.insert(s); + let delta = Delta::new().retain(pos).insert(s); diff.push(Diff::List(delta)); } if !content.is_unknown() { @@ -665,10 +658,9 @@ impl Container for ListContainer { // notify let should_notify = hierarchy.should_notify(&self.id); if should_notify { - let mut delta = Delta::new(); let delta_vec = self.raw_data.slice(&(0..state_len)).to_vec(); - delta.retain(0); - delta.insert(delta_vec); + let delta = Delta::new().retain(0).insert(delta_vec); + ctx.push_diff(&self.id, Diff::List(delta)); } } else { @@ -850,9 +842,9 @@ mod test { let mut loro = LoroCore::default(); let mut list = loro.get_list("id"); { - let mut txn = loro.transact(); - list.insert(&mut txn, 0, 123).unwrap(); - list.insert(&mut txn, 1, 123).unwrap(); + let txn = loro.transact(); + list.insert(&txn, 0, 123).unwrap(); + list.insert(&txn, 1, 123).unwrap(); } assert_eq!(list.get(0), Some(123.into())); assert_eq!(list.get(1), Some(123.into())); diff --git a/crates/loro-internal/src/container/text/text_container.rs b/crates/loro-internal/src/container/text/text_container.rs index d4b36d5f..5d16b525 100644 --- a/crates/loro-internal/src/container/text/text_container.rs +++ b/crates/loro-internal/src/container/text/text_container.rs @@ -367,9 +367,7 @@ impl Container for TextContainer { } else { self.raw_str.slice(&slice.0).to_owned() }; - let mut delta = Delta::new(); - delta.retain(*pos); - delta.insert(s); + let delta = Delta::new().retain(*pos).insert(s); ctx.push_diff(&self.id, Diff::Text(delta)); } self.state.insert( @@ -379,9 +377,9 @@ impl Container for TextContainer { } InnerListOp::Delete(span) => { if should_notify { - let mut delta = Delta::new(); - delta.retain(span.start() as usize); - delta.delete(span.atom_len()); + let delta = Delta::new() + .retain(span.start() as usize) + .delete(span.atom_len()); ctx.push_diff(&self.id, Diff::Text(delta)); } @@ -433,9 +431,7 @@ impl Container for TextContainer { match effect { Effect::Del { pos, len } => { if should_notify { - let mut delta = Delta::new(); - delta.retain(pos); - delta.delete(len); + let delta = Delta::new().retain(pos).delete(len); diff.push(Diff::Text(delta)); } @@ -449,9 +445,7 @@ impl Container for TextContainer { } else { self.raw_str.slice(&content.0).to_owned() }; - let mut delta = Delta::new(); - delta.retain(pos); - delta.insert(s); + let delta = Delta::new().retain(pos).insert(s); diff.push(Diff::Text(delta)); } @@ -544,9 +538,7 @@ impl Container for TextContainer { let should_notify = hierarchy.should_notify(&self.id); if should_notify { let s = self.raw_str.slice(&(0..state_len)).to_owned(); - let mut delta = Delta::new(); - delta.retain(0); - delta.insert(s); + let delta = Delta::new().retain(0).insert(s); ctx.push_diff(&self.id, Diff::Text(delta)); } } else { diff --git a/crates/loro-internal/src/delta.rs b/crates/loro-internal/src/delta.rs index a2df548a..9acc48b6 100644 --- a/crates/loro-internal/src/delta.rs +++ b/crates/loro-internal/src/delta.rs @@ -1,10 +1,14 @@ use enum_as_inner::EnumAsInner; -use rle::{HasLength, Mergable, RleVec}; +use rle::{HasLength, Mergable, RleVec, Sliceable}; use serde::Serialize; +use smallvec::{smallvec, IntoIter, SmallVec}; +use std::fmt::Debug; + +use crate::LoroValue; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Delta { - vec: RleVec<[DeltaItem; 1]>, + vec: SmallVec<[DeltaItem; 2]>, } impl Serialize for Delta { @@ -23,6 +27,42 @@ pub enum DeltaItem { Delete(usize), } +pub trait Meta: Debug + Default + Clone + PartialEq { + fn is_empty(&self) -> bool; +} + +impl Meta for () { + fn is_empty(&self) -> bool { + true + } +} + +pub trait DeltaValue: Debug + HasLength + Sliceable + Clone + PartialEq { + fn merge(&mut self, other: Self); +} + +impl DeltaItem { + pub fn meta(&self) -> Option<&M> { + match self { + DeltaItem::Insert { value, meta } => Some(meta), + DeltaItem::Retain { len, meta } => Some(meta), + _ => None, + } + } + + pub fn is_retain(&self) -> bool { + matches!(self, Self::Retain { .. }) + } + + pub fn is_insert(&self) -> bool { + matches!(self, Self::Insert { .. }) + } + + pub fn is_delete(&self) -> bool { + matches!(self, Self::Delete(_)) + } +} + impl HasLength for DeltaItem { fn content_len(&self) -> usize { match self { @@ -35,31 +75,220 @@ impl HasLength for DeltaItem { impl Mergable for DeltaItem {} -impl Delta { +pub struct DeltaIterator { + ops: SmallVec<[DeltaItem; 2]>, + index: usize, + offset: usize, +} + +impl DeltaIterator { + fn new(ops: SmallVec<[DeltaItem; 2]>) -> Self { + Self { + ops, + index: 0, + offset: 0, + } + } + + fn next>>(&mut self, len: L) -> DeltaItem { + self.next_impl(len.into()) + } + + fn next_impl(&mut self, mut len: Option) -> DeltaItem { + if len.is_none() { + len = Some(usize::MAX) + } + let mut length = len.unwrap(); + { + let next_op = self.peek(); + if next_op.is_none() { + return DeltaItem::Retain { + len: usize::MAX, + meta: Default::default(), + }; + } + } + // TODO: Maybe can avoid cloning + let op = self.peek().unwrap().clone(); + let op_length = op.content_len(); + let offset = self.offset; + if length >= op_length - offset { + length = op_length - offset; + self.index += 1; + self.offset = 0; + } else { + self.offset += length; + } + + if op.is_delete() { + DeltaItem::Delete(length) + } else { + let mut ans_op = op; + if ans_op.is_retain() { + *ans_op.as_retain_mut().unwrap().0 = length; + } else if ans_op.is_insert() { + let v = ans_op.as_insert_mut().unwrap().0; + *v = v.slice(offset, offset + length); + } + ans_op + } + } + + fn rest(&mut self) -> SmallVec<[DeltaItem; 2]> { + if !self.has_next() { + smallvec![] + } else if self.offset == 0 { + // TODO avoid cloning + self.ops[self.index..].into() + } else { + let offset = self.offset; + let index = self.index; + let next = self.next(None); + let rest = self.ops[self.index..].to_vec(); + self.offset = offset; + self.index = index; + let mut ans = smallvec![next]; + ans.extend(rest); + ans + } + } + + fn has_next(&self) -> bool { + self.peek_length() < usize::MAX + } + + fn peek(&self) -> Option<&DeltaItem> { + self.ops.get(self.index) + } + + fn peek_length(&self) -> usize { + if let Some(op) = self.peek() { + if op.content_len() == usize::MAX { + return usize::MAX; + } + op.content_len() - self.offset + } else { + usize::MAX + } + } + + // fn peek_is_retain(&self) -> bool { + // if let Some(op) = self.peek() { + // op.is_retain() + // } else { + // // default + // true + // } + // } + + fn peek_is_insert(&self) -> bool { + if let Some(op) = self.peek() { + op.is_insert() + } else { + false + } + } + + fn peek_is_delete(&self) -> bool { + if let Some(op) = self.peek() { + op.is_delete() + } else { + false + } + } +} + +impl Delta { pub fn new() -> Self { - Self { vec: RleVec::new() } + Self { + vec: SmallVec::new(), + } } - pub fn retain_with_meta(&mut self, len: usize, meta: Meta) { + pub fn retain_with_meta(mut self, len: usize, meta: M) -> Self { self.vec.push(DeltaItem::Retain { len, meta }); + self } - pub fn insert_with_meta(&mut self, value: Value, meta: Meta) { + pub fn insert_with_meta(mut self, value: Value, meta: M) -> Self { self.vec.push(DeltaItem::Insert { value, meta }); + self } - pub fn delete(&mut self, len: usize) { + pub fn delete(mut self, len: usize) -> Self { + if len == 0 { + return self; + } self.vec.push(DeltaItem::Delete(len)); + self } - pub fn iter(&self) -> impl Iterator> { + pub fn push(&mut self, new_op: DeltaItem) { + let mut index = self.vec.len(); + let last_op = self.vec.last_mut(); + if let Some(mut last_op) = last_op { + if new_op.is_delete() && last_op.is_delete() { + self.vec[index - 1] = + DeltaItem::Delete(last_op.content_len() + new_op.content_len()); + return; + } + // Since it does not matter if we insert before or after deleting at the same index, + // always prefer to insert first + if last_op.is_delete() && new_op.is_insert() { + index -= 1; + let _last_op = self.vec.get_mut(index - 1); + if let Some(_last_op_inner) = _last_op { + last_op = _last_op_inner; + } else { + self.vec.insert(0, new_op); + return; + } + } + if new_op.meta() == last_op.meta() { + if new_op.is_insert() && last_op.is_insert() { + // TODO avoid cloning + let mut value = last_op.as_insert_mut().unwrap().0.clone(); + value.merge(new_op.as_insert().unwrap().0.clone()); + self.vec[index - 1] = DeltaItem::Insert { + value, + meta: new_op.meta().unwrap().clone(), + }; + return; + } else if new_op.is_retain() && last_op.is_retain() { + println!("last op {:?} new_op {:?}", last_op, new_op); + println!( + "len = {} + {} usize MAX {}", + last_op.content_len(), + new_op.content_len(), + usize::MAX + ); + self.vec[index - 1] = DeltaItem::Retain { + len: last_op.content_len() + new_op.content_len(), + meta: new_op.meta().unwrap().clone(), + }; + return; + } + } + } + if index == self.vec.len() { + self.vec.push(new_op); + } else { + self.vec.insert(index, new_op); + } + } + + pub fn iter(&self) -> impl Iterator> { self.vec.iter() } - pub fn iter_mut(&mut self) -> impl Iterator> { + pub fn iter_mut(&mut self) -> impl Iterator> { self.vec.iter_mut() } + pub fn into_op_iter(self) -> DeltaIterator { + DeltaIterator::new(self.vec) + } + pub fn len(&self) -> usize { self.vec.len() } @@ -67,30 +296,160 @@ impl Delta { pub fn is_empty(&self) -> bool { self.len() == 0 } + + /// Reference: [Quill Delta](https://github.com/quilljs/delta) + pub fn compose(self, other: Self) -> Self { + let mut this_iter = self.into_op_iter(); + let mut other_iter = other.into_op_iter(); + let mut ops = vec![]; + let first_other = other_iter.peek(); + if let Some(first_other) = first_other { + if first_other.is_retain() + && (first_other.meta().is_none() || first_other.meta().unwrap().is_empty()) + { + let mut first_left = first_other.content_len(); + let mut first_this = this_iter.peek(); + while let Some(first_this_inner) = first_this { + if first_this_inner.is_insert() && first_this_inner.content_len() <= first_left + { + first_left -= first_this_inner.content_len(); + ops.push(this_iter.next(None)); + first_this = this_iter.peek(); + } else { + break; + } + } + if first_other.content_len() - first_left > 0 { + other_iter.next(first_other.content_len() - first_left); + } + } + } + let mut delta = Delta::new(); + while this_iter.has_next() || other_iter.has_next() { + if other_iter.peek_is_insert() { + delta.push(other_iter.next(None)); + } else if this_iter.peek_is_delete() { + delta.push(this_iter.next(None)); + } else { + let length = this_iter.peek_length().min(other_iter.peek_length()); + let this_op = this_iter.next(length); + let other_op = other_iter.next(length); + if other_op.is_retain() { + let new_op = if this_op.is_retain() { + DeltaItem::Retain { + len: length, + meta: M::default(), + } + } else { + this_op.clone() + }; + // TODO: Meta compose + delta.push(new_op.clone()); + if !other_iter.has_next() && delta.vec[delta.vec.len() - 1].eq(&new_op) { + let rest = Delta { + vec: this_iter.rest(), + }; + return delta.concat(rest).chop(); + } + } else if other_op.is_delete() { + if this_op.is_retain() { + delta.push(other_op); + } else { + // this op is insert + continue; + } + } + } + } + delta.chop() + } + + fn concat(&mut self, mut other: Self) -> Self { + let mut delta = Delta { + vec: self.vec.clone(), + }; + if !other.vec.is_empty() { + // TODO: why? + let other_first = other.vec.remove(0); + delta.push(other_first); + delta.vec.extend(other.vec); + } + delta + } + + fn chop(mut self) -> Self { + let last_op = self.vec.last(); + if let Some(last_op) = last_op { + // TODO: check + if last_op.is_retain() && last_op.meta().unwrap().is_empty() { + self.vec.pop(); + } + } + self + } } -impl Default for Delta { +impl Default for Delta { fn default() -> Self { Self::new() } } -impl Delta { - pub fn retain(&mut self, len: usize) { +impl Delta { + pub fn retain(mut self, len: usize) -> Self { if len == 0 { - return; + return self; } self.vec.push(DeltaItem::Retain { len, meta: Default::default(), }); + self } - pub fn insert(&mut self, value: Value) { + pub fn insert(mut self, value: Value) -> Self { self.vec.push(DeltaItem::Insert { value, meta: Default::default(), }); + self + } +} + +impl DeltaValue for Vec { + fn merge(&mut self, other: Self) { + self.extend(other) + } +} + +impl DeltaValue for String { + fn merge(&mut self, other: Self) { + self.push_str(&other) + } +} + +#[cfg(test)] +mod test { + use super::{Delta, DeltaItem}; + + #[test] + fn delta_push() { + let mut a: Delta = Delta::new().insert("a".to_string()); + a.push(DeltaItem::Insert { + value: "b".to_string(), + meta: (), + }); + assert_eq!(a, Delta::new().insert("ab".to_string())); + } + + #[test] + fn delta_compose() { + let a: Delta = Delta::new().retain(3).insert("abcde".to_string()); + let b = Delta::new().retain(5).delete(6); + assert_eq!( + a.compose(b), + Delta::new().retain(3).insert("ab".to_string()).delete(3) + ); } } diff --git a/crates/loro-internal/src/transaction.rs b/crates/loro-internal/src/transaction.rs index ca27379d..bcb13edd 100644 --- a/crates/loro-internal/src/transaction.rs +++ b/crates/loro-internal/src/transaction.rs @@ -57,7 +57,7 @@ impl Deref for TransactionWrap { } } -pub struct TransactionWrap(pub Arc>); +pub struct TransactionWrap(pub(crate) Arc>); pub struct Transaction { client_id: ClientID, diff --git a/crates/loro-internal/src/transaction/op.rs b/crates/loro-internal/src/transaction/op.rs index 65deda7d..f2513897 100644 --- a/crates/loro-internal/src/transaction/op.rs +++ b/crates/loro-internal/src/transaction/op.rs @@ -67,6 +67,7 @@ pub enum ListTxnOp { Delete { pos: usize, len: usize, + deleted_container: Option>, }, } diff --git a/crates/rle/src/rle_trait.rs b/crates/rle/src/rle_trait.rs index b2b475e2..a1fc782b 100644 --- a/crates/rle/src/rle_trait.rs +++ b/crates/rle/src/rle_trait.rs @@ -96,3 +96,15 @@ impl HasLength for Vec { self.len() } } + +impl Sliceable for Vec { + fn slice(&self, from: usize, to: usize) -> Self { + self[from..to].to_vec() + } +} + +impl Sliceable for String { + fn slice(&self, from: usize, to: usize) -> Self { + self[from..to].to_string() + } +} diff --git a/crates/rle/src/rle_vec_old.rs b/crates/rle/src/rle_vec_old.rs index 090c8254..1105c253 100644 --- a/crates/rle/src/rle_vec_old.rs +++ b/crates/rle/src/rle_vec_old.rs @@ -357,12 +357,6 @@ mod test { } } - impl Sliceable for String { - fn slice(&self, start: usize, end: usize) -> Self { - self[start..end].to_string() - } - } - #[test] fn get_at_atom_index() { let mut vec: RleVecWithIndex = RleVecWithIndex::new();