From bc980c5b02e08240eb71e3ac0af7f7b92313fa1b Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Tue, 11 Oct 2022 16:50:22 +0800 Subject: [PATCH] feat: iter update in rle tree --- Cargo.lock | 1 + crates/loro-core/fuzz/Cargo.lock | 1 + .../loro-core/src/container/text/tracker.rs | 18 ++- .../src/container/text/tracker/content_map.rs | 2 +- .../src/container/text/tracker/yata.rs | 24 ++- crates/loro-core/src/version.rs | 6 +- crates/rle/Cargo.toml | 1 + .../examples/string_tree_bench/string_tree.rs | 2 +- crates/rle/src/range_map.rs | 6 +- crates/rle/src/rle_trait.rs | 4 +- crates/rle/src/rle_tree.rs | 89 +++++++++++ crates/rle/src/rle_tree/iter.rs | 2 + crates/rle/src/rle_tree/node/internal_impl.rs | 74 ++++++++- crates/rle/src/rle_tree/node/leaf_impl.rs | 142 ++++++++++++++++++ .../rle/src/rle_tree/test/string_prop_test.rs | 2 +- 15 files changed, 351 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61fdf3d1..2e15c9ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -786,6 +786,7 @@ dependencies = [ name = "rle" version = "0.1.0" dependencies = [ + "arref", "bumpalo", "color-backtrace", "ctor", diff --git a/crates/loro-core/fuzz/Cargo.lock b/crates/loro-core/fuzz/Cargo.lock index 34918b71..15bb0a03 100644 --- a/crates/loro-core/fuzz/Cargo.lock +++ b/crates/loro-core/fuzz/Cargo.lock @@ -525,6 +525,7 @@ dependencies = [ name = "rle" version = "0.1.0" dependencies = [ + "arref", "bumpalo", "enum-as-inner", "num", diff --git a/crates/loro-core/src/container/text/tracker.rs b/crates/loro-core/src/container/text/tracker.rs index 9fd1b2c7..46b4db92 100644 --- a/crates/loro-core/src/container/text/tracker.rs +++ b/crates/loro-core/src/container/text/tracker.rs @@ -98,7 +98,9 @@ impl Tracker { } TextOpContent::Delete { id, pos, len } => { let spans = self.content.get_id_spans(*pos, *len); - todo!() + self.update_spans(&spans, StatusChange::Delete); + self.id_to_cursor + .set((*id).into(), cursor_map::Marker::Delete(spans)); } } } @@ -109,14 +111,26 @@ impl Tracker { } pub fn update_spans(&mut self, spans: &RleVec, change: StatusChange) { + let mut cursors = Vec::new(); for span in spans.iter() { + let mut span_start = span.min_id(); for marker in self .id_to_cursor .get_range(span.min_id().into(), span.max_id().into()) { - todo!() + let cursor = marker.as_cursor(span_start).unwrap().unwrap(); + span_start = span_start.inc(cursor.len as Counter); + cursors.push(cursor); } } + + self.content.update_at_cursors( + cursors, + &mut |v| { + v.status.apply(change); + }, + &mut make_notify(&mut self.id_to_cursor), + ) } } diff --git a/crates/loro-core/src/container/text/tracker/content_map.rs b/crates/loro-core/src/container/text/tracker/content_map.rs index 083b6153..5b076aea 100644 --- a/crates/loro-core/src/container/text/tracker/content_map.rs +++ b/crates/loro-core/src/container/text/tracker/content_map.rs @@ -141,7 +141,7 @@ impl ContentMap { } } - pub fn get_id_spans(&mut self, pos: usize, len: usize) -> RleVec { + pub fn get_id_spans(&self, pos: usize, len: usize) -> RleVec { let mut ans = RleVec::new(); for cursor in self.iter_range(pos, Some(pos + len)) { ans.push(IdSpan::new( diff --git a/crates/loro-core/src/container/text/tracker/yata.rs b/crates/loro-core/src/container/text/tracker/yata.rs index e0455163..37ad883d 100644 --- a/crates/loro-core/src/container/text/tracker/yata.rs +++ b/crates/loro-core/src/container/text/tracker/yata.rs @@ -181,7 +181,7 @@ pub mod fuzz { use rle::RleVec; use crate::{ - container::text::tracker::Tracker, + container::text::tracker::{y_span::StatusChange, Tracker}, id::{ClientID, ID}, span::IdSpan, }; @@ -241,16 +241,30 @@ pub mod fuzz { type DeleteOp = RleVec; - fn new_del_op(container: &Self::Container, pos: usize, len: usize) -> Self::DeleteOp { - todo!() + fn new_del_op( + container: &Self::Container, + mut pos: usize, + mut len: usize, + ) -> Self::DeleteOp { + if container.content.len() == 0 { + return RleVec::new(); + } + + pos %= container.content.len(); + len = std::cmp::min(len, container.content.len() - pos); + if len == 0 { + return RleVec::new(); + } + + container.content.get_id_spans(pos, len) } fn integrate_delete_op(container: &mut Self::Container, op: Self::DeleteOp) { - todo!() + container.update_spans(&op, StatusChange::Delete); } fn can_apply_del_op(container: &Self::Container, op: &Self::DeleteOp) -> bool { - todo!() + op.iter().all(|x| container.vv.includes(x.max_id())) } } diff --git a/crates/loro-core/src/version.rs b/crates/loro-core/src/version.rs index 005999c5..6a3a08a3 100644 --- a/crates/loro-core/src/version.rs +++ b/crates/loro-core/src/version.rs @@ -3,7 +3,7 @@ use std::{ ops::{Deref, DerefMut}, }; -use fxhash::{FxHashMap}; +use fxhash::FxHashMap; use im::hashmap::HashMap as ImHashMap; use crate::{ @@ -140,8 +140,8 @@ impl VersionVector { } } - pub fn includes(&mut self, id: ID) -> bool { - if let Some(end) = self.get_mut(&id.client_id) { + pub fn includes(&self, id: ID) -> bool { + if let Some(end) = self.get(&id.client_id) { if *end > id.counter { return true; } diff --git a/crates/rle/Cargo.toml b/crates/rle/Cargo.toml index 08b83089..9dd76b52 100644 --- a/crates/rle/Cargo.toml +++ b/crates/rle/Cargo.toml @@ -10,6 +10,7 @@ bumpalo = { version = "3.10.0", features = ["collections", "boxed"] } num = "0.4.0" enum-as-inner = "0.5.1" ouroboros = "0.15.2" +arref = "0.1.0" [dev-dependencies] color-backtrace = { version = "0.5" } diff --git a/crates/rle/examples/string_tree_bench/string_tree.rs b/crates/rle/examples/string_tree_bench/string_tree.rs index 9db16dac..497c0c18 100644 --- a/crates/rle/examples/string_tree_bench/string_tree.rs +++ b/crates/rle/examples/string_tree_bench/string_tree.rs @@ -10,7 +10,7 @@ use smartstring::SmartString; type SString = SmartString; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct CustomString { str: Rc>, slice: Range, diff --git a/crates/rle/src/range_map.rs b/crates/rle/src/range_map.rs index 4e602ff3..3ddd51ec 100644 --- a/crates/rle/src/range_map.rs +++ b/crates/rle/src/range_map.rs @@ -5,7 +5,7 @@ use crate::{ HasLength, Mergable, Rle, RleTree, Sliceable, }; -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct WithGlobalIndex { pub(crate) value: Value, pub(crate) index: Index, @@ -118,7 +118,7 @@ impl RangeMap } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct WithStartEnd { pub start: Index, pub end: Index, @@ -169,7 +169,7 @@ mod test { use std::ops::Range; use super::*; - #[derive(Debug, PartialEq, Eq)] + #[derive(Debug, PartialEq, Eq, Clone)] struct V { from: usize, to: usize, diff --git a/crates/rle/src/rle_trait.rs b/crates/rle/src/rle_trait.rs index 2692e6e8..eb33bb19 100644 --- a/crates/rle/src/rle_trait.rs +++ b/crates/rle/src/rle_trait.rs @@ -43,9 +43,9 @@ pub trait HasLength { fn len(&self) -> usize; } -pub trait Rle: HasLength + Sliceable + Mergable + Debug {} +pub trait Rle: HasLength + Sliceable + Mergable + Debug + Clone {} -impl + Debug, Cfg> Rle for T {} +impl + Debug + Clone, Cfg> Rle for T {} impl Sliceable for Range { fn slice(&self, start: usize, end: usize) -> Self { diff --git a/crates/rle/src/rle_tree.rs b/crates/rle/src/rle_tree.rs index 44832f80..7cc1cd9a 100644 --- a/crates/rle/src/rle_tree.rs +++ b/crates/rle/src/rle_tree.rs @@ -1,3 +1,5 @@ +use std::{collections::HashMap, ptr::NonNull}; + use self::node::{InternalNode, LeafNode, Node}; use crate::Rle; pub(self) use bumpalo::collections::vec::Vec as BumpVec; @@ -227,6 +229,93 @@ impl> RleTree { } } + pub fn update_at_cursors( + &mut self, + cursors: Vec>, + update_fn: &mut U, + notify: &mut F, + ) where + U: FnMut(&mut T), + F: FnMut(&T, *mut LeafNode), + { + let mut updates_map: HashMap, Vec<(usize, Vec)>> = Default::default(); + for cursor in cursors { + // SAFETY: we has the exclusive reference to the tree and the cursor is valid + let updates = unsafe { + cursor + .leaf + .as_ref() + .pure_update(cursor.index, cursor.offset, cursor.len, update_fn) + }; + + if let Some(update) = updates { + updates_map + .entry(cursor.leaf) + .or_default() + .push((cursor.index, update)); + } + } + + let mut internal_updates_map: HashMap< + NonNull<_>, + Vec<(usize, Vec<&'_ mut Node<'_, T, A>>)>, + > = Default::default(); + for (mut leaf, updates) in updates_map { + // SAFETY: we has the exclusive reference to the tree and the cursor is valid + let leaf = unsafe { leaf.as_mut() }; + if let Err(new) = leaf.apply_updates(updates, notify) { + internal_updates_map + .entry(leaf.parent) + .or_default() + .push((leaf.get_index_in_parent().unwrap(), new)); + } else { + // insert empty value to trigger cache update + internal_updates_map.insert(leaf.parent, Default::default()); + } + } + + while !internal_updates_map.is_empty() { + let updates_map = std::mem::take(&mut internal_updates_map); + for (mut node, updates) in updates_map { + // SAFETY: we has the exclusive reference to the tree and the cursor is valid + let node = unsafe { node.as_mut() }; + if updates.is_empty() { + A::update_cache_internal(node); + continue; + } + + if let Err(new) = node.apply_updates(updates) { + internal_updates_map + .entry(node.parent.unwrap()) + .or_default() + .push((node.get_index_in_parent().unwrap(), new)); + } else { + // insert empty value to trigger cache update + internal_updates_map.insert(node.parent.unwrap(), Default::default()); + } + } + } + } + + pub fn iter_update( + &mut self, + start: A::Int, + end: Option, + update_fn: &mut U, + notify: &mut F, + ) where + U: FnMut(&mut T), + F: FnMut(&T, *mut LeafNode<'_, T, A>), + { + let mut cursors = Vec::new(); + for cursor in self.iter_range(start, end) { + cursors.push(cursor.0); + } + + // SAFETY: it's perfectly safe here because we know what we are doing in the update_at_cursors + self.update_at_cursors(unsafe { std::mem::transmute(cursors) }, update_fn, notify); + } + pub fn debug_check(&mut self) { self.with_node_mut(|node| { node.as_internal_mut().unwrap().check(); diff --git a/crates/rle/src/rle_tree/iter.rs b/crates/rle/src/rle_tree/iter.rs index df5c2d4d..f2f2c04f 100644 --- a/crates/rle/src/rle_tree/iter.rs +++ b/crates/rle/src/rle_tree/iter.rs @@ -1,3 +1,5 @@ +use std::marker::PhantomData; + use crate::Rle; use super::{ diff --git a/crates/rle/src/rle_tree/node/internal_impl.rs b/crates/rle/src/rle_tree/node/internal_impl.rs index 391b4e95..d49cf5ff 100644 --- a/crates/rle/src/rle_tree/node/internal_impl.rs +++ b/crates/rle/src/rle_tree/node/internal_impl.rs @@ -65,13 +65,14 @@ impl<'a, T: Rle, A: RleTreeTrait> InternalNode<'a, T, A> { "children.len() = {}", self.children.len() ); - assert!( - self.children.len() <= A::MAX_CHILDREN_NUM, - "children.len() = {}", - self.children.len() - ); } + assert!( + self.children.len() <= A::MAX_CHILDREN_NUM, + "children.len() = {}", + self.children.len() + ); + let self_ptr = self as *const _; for child in self.children.iter_mut() { match child { @@ -217,6 +218,69 @@ impl<'a, T: Rle, A: RleTreeTrait> InternalNode<'a, T, A> { result } + pub(crate) fn apply_updates( + &mut self, + mut updates: Vec<(usize, Vec<&'a mut Node<'a, T, A>>)>, + ) -> Result<(), Vec<&'a mut Node<'a, T, A>>> { + updates.sort_by_key(|x| x.0); + let mut new_children: Vec<&'a mut Node<'a, T, A>> = Vec::new(); + let mut self_children = std::mem::replace(&mut self.children, BumpVec::new_in(self.bump)); + let mut last_end = 0; + for (index, replace) in updates { + let should_pop = index - last_end < self_children.len(); + for child in self_children.drain(0..index - last_end + 1) { + new_children.push(child); + } + + if should_pop { + new_children.pop(); + } + + for element in replace { + new_children.push(element); + } + + last_end = index + 1; + } + + let result = if new_children.len() <= A::MAX_CHILDREN_NUM { + for child in new_children { + self.children.push(child); + } + + A::update_cache_internal(self); + Ok(()) + } else { + for child in new_children.drain(0..A::MAX_CHILDREN_NUM) { + self.children.push(child); + } + + A::update_cache_internal(self); + let mut ans_vec = Vec::new(); + while !new_children.is_empty() { + let mut new_leaf = InternalNode::new(self.bump, self.parent); + for child in new_children.drain(0..A::MAX_CHILDREN_NUM) { + new_leaf.children.push(child); + } + + A::update_cache_internal(&mut new_leaf); + ans_vec.push(self.bump.alloc(Node::Internal(new_leaf))); + } + + Err(ans_vec) + }; + + if result.is_err() && self.is_root() { + let mut new = result.unwrap_err(); + assert!(new.len() == 1); + let v = new.pop().unwrap(); + self._create_level(v); + Ok(()) + } else { + result + } + } + /// connect [prev leaf of left] with [next leaf of right] fn connect_leaf(&mut self, left_index: usize, right_index: usize) { let prev = self.children[left_index] diff --git a/crates/rle/src/rle_tree/node/leaf_impl.rs b/crates/rle/src/rle_tree/node/leaf_impl.rs index 02df214c..36a3fa78 100644 --- a/crates/rle/src/rle_tree/node/leaf_impl.rs +++ b/crates/rle/src/rle_tree/node/leaf_impl.rs @@ -117,6 +117,7 @@ impl<'bump, T: Rle, A: RleTreeTrait> LeafNode<'bump, T, A> { pub(crate) fn check(&self) { assert!(self.children.len() <= A::MAX_CHILDREN_NUM); + // assert!(self.children.len() >= A::MIN_CHILDREN_NUM); assert!(!self.is_deleted()); A::check_cache_leaf(self); if let Some(next) = self.next { @@ -338,6 +339,147 @@ impl<'bump, T: Rle, A: RleTreeTrait> LeafNode<'bump, T, A> { } } + /// this is a effect-less operation, it will not modify the data, it returns the needed change at the given index instead + pub(crate) fn pure_update( + &self, + child_index: usize, + offset: usize, + len: usize, + update_fn: &mut U, + ) -> Option> + where + U: FnMut(&mut T), + { + let mut ans = vec![]; + if len == 0 { + return None; + } + + let child = &self.children[child_index]; + if offset == 0 && child.len() == len { + let mut element = child.slice(0, len); + update_fn(&mut element); + ans.push(element); + return Some(ans); + } + + if offset != 0 { + ans.push(child.slice(0, offset)); + } + let mut target = child.slice(offset, offset + len); + update_fn(&mut target); + if !ans.is_empty() { + if ans[0].is_mergable(&target, &()) { + ans[0].merge(&target, &()); + } else { + ans.push(target); + } + } + + if offset + len < child.len() { + let right = child.slice(offset + len, child.len()); + let mut merged = false; + if let Some(last) = ans.last_mut() { + if last.is_mergable(&right, &()) { + merged = true; + last.merge(&right, &()); + } + } + + if !merged { + ans.push(right); + } + } + + Some(ans) + } + + pub(crate) fn apply_updates( + &mut self, + mut updates: Vec<(usize, Vec)>, + notify: &mut F, + ) -> Result<(), Vec<&'bump mut Node<'bump, T, A>>> + where + F: FnMut(&T, *mut LeafNode<'_, T, A>), + { + updates.sort_by_key(|x| x.0); + let mut i = 0; + let mut j = 1; + // try merge sibling updates + while i + j < updates.len() { + if updates[i].0 + j == updates[i + j].0 { + let (a, b) = arref::array_mut_ref!(&mut updates, [i, i + j]); + for node in b.1.drain(..) { + a.1.push(node); + } + + j += 1; + } else { + i += j; + j = 1; + } + } + + let mut new_children: Vec<&mut T> = Vec::new(); + let mut self_children = std::mem::replace(&mut self.children, BumpVec::new_in(self.bump)); + let mut last_end = 0; + for (index, replace) in updates { + let should_pop = index - last_end < self_children.len(); + for child in self_children.drain(0..index - last_end + 1) { + new_children.push(child); + } + + if should_pop { + new_children.pop(); + } + + for element in replace { + let mut merged = false; + if let Some(last) = new_children.last_mut() { + if last.is_mergable(&element, &()) { + last.merge(&element, &()); + merged = true; + } + } + if !merged { + new_children.push(self.bump.alloc(element)); + } + } + + last_end = index + 1; + } + + if new_children.len() <= A::MAX_CHILDREN_NUM { + for child in new_children { + notify(child, self); + self.children.push(child); + } + + A::update_cache_leaf(self); + Ok(()) + } else { + for child in new_children.drain(0..A::MAX_CHILDREN_NUM) { + notify(child, self); + self.children.push(child); + } + + A::update_cache_leaf(self); + let mut leaf_vec = Vec::new(); + while !new_children.is_empty() { + let mut new_leaf = LeafNode::new(self.bump, self.parent); + for child in new_children.drain(0..A::MAX_CHILDREN_NUM) { + notify(child, &mut new_leaf); + new_leaf.children.push(child); + } + + A::update_cache_leaf(&mut new_leaf); + leaf_vec.push(self.bump.alloc(Node::Leaf(new_leaf))); + } + + Err(leaf_vec) + } + } + fn with_cache_updated( &mut self, result: Result<(), &'bump mut Node<'bump, T, A>>, diff --git a/crates/rle/src/rle_tree/test/string_prop_test.rs b/crates/rle/src/rle_tree/test/string_prop_test.rs index ca7abf9f..727eb8ff 100644 --- a/crates/rle/src/rle_tree/test/string_prop_test.rs +++ b/crates/rle/src/rle_tree/test/string_prop_test.rs @@ -5,7 +5,7 @@ use std::{ use crate::{rle_tree::tree_trait::CumulateTreeTrait, HasLength, Mergable, RleTree, Sliceable}; -#[derive(Debug)] +#[derive(Debug, Clone)] struct CustomString(String); impl Deref for CustomString { type Target = String;