From bf8973c7583e847a9855679fd235e5b835d250f0 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Thu, 27 Oct 2022 22:11:40 +0800 Subject: [PATCH] feat: set range fix: update cache chore: remove useless tests chore: add bench script --- crates/loro-core/justfile | 3 + .../loro-core/src/container/text/tracker.rs | 2 +- .../src/container/text/tracker/cursor_map.rs | 4 +- .../src/container/text/tracker/yata_impl.rs | 107 ++++++- crates/rle/src/range_map.rs | 303 ++++++++++++++++-- crates/rle/src/rle_tree.rs | 12 +- crates/rle/src/rle_tree/node.rs | 6 +- crates/rle/src/rle_tree/node/internal_impl.rs | 5 + crates/rle/src/rle_tree/node/leaf_impl.rs | 5 + .../rle/src/rle_tree/test/notify_prop_test.rs | 2 +- 10 files changed, 405 insertions(+), 44 deletions(-) diff --git a/crates/loro-core/justfile b/crates/loro-core/justfile index 4b6fbe94..78b0ff4e 100644 --- a/crates/loro-core/justfile +++ b/crates/loro-core/justfile @@ -26,3 +26,6 @@ quick-fuzz: flame: cargo flamegraph --example test --features=fuzzing --root + +bench: + cargo bench --features fuzzing diff --git a/crates/loro-core/src/container/text/tracker.rs b/crates/loro-core/src/container/text/tracker.rs index 2c439b46..2ecae374 100644 --- a/crates/loro-core/src/container/text/tracker.rs +++ b/crates/loro-core/src/container/text/tracker.rs @@ -255,7 +255,7 @@ impl Tracker { debug_log!("DELETED SPANS={}", format!("{:#?}", &spans).red()); self.update_spans(&spans, StatusChange::Delete); self.id_to_cursor - .set((id).into(), cursor_map::Marker::Delete(spans)); + .set_large_range((id).into(), cursor_map::Marker::Delete(spans)); } } } diff --git a/crates/loro-core/src/container/text/tracker/cursor_map.rs b/crates/loro-core/src/container/text/tracker/cursor_map.rs index 6e298f8e..f6e59995 100644 --- a/crates/loro-core/src/container/text/tracker/cursor_map.rs +++ b/crates/loro-core/src/container/text/tracker/cursor_map.rs @@ -213,7 +213,7 @@ pub(super) fn make_notify( map: &mut CursorMap, ) -> impl for<'a> FnMut(&YSpan, *mut LeafNode<'a, YSpan, YSpanTreeTrait>) + '_ { |span, leaf| { - map.set( + map.set_small_range( span.id.into(), Marker::Insert { // SAFETY: marker can only live while the bumpalo is alive. so we are safe to change lifetime here @@ -222,7 +222,7 @@ pub(super) fn make_notify( }, len: span.atom_len(), }, - ) + ); } } diff --git a/crates/loro-core/src/container/text/tracker/yata_impl.rs b/crates/loro-core/src/container/text/tracker/yata_impl.rs index b890aa30..f723afad 100644 --- a/crates/loro-core/src/container/text/tracker/yata_impl.rs +++ b/crates/loro-core/src/container/text/tracker/yata_impl.rs @@ -15,6 +15,7 @@ use super::{ Tracker, }; +// TODO: may use a simpler data structure here #[derive(Default, Debug)] pub struct OpSpanSet { map: RangeMap>, @@ -22,7 +23,7 @@ pub struct OpSpanSet { impl OpSet for OpSpanSet { fn insert(&mut self, value: &YSpan) { - self.map.set( + self.map.set_large_range( value.id.into(), WithStartEnd { start: value.id.into(), @@ -330,6 +331,59 @@ pub mod fuzz { use Action::*; #[test] + fn issue_set_range() { + crdt_list::test::test_with_actions::( + 5, + 100, + vec![ + Sync { from: 1, to: 2 }, + NewOp { + client_id: 2, + pos: 7, + }, + NewOp { + client_id: 2, + pos: 7, + }, + Delete { + client_id: 2, + pos: 37, + len: 37, + }, + Delete { + client_id: 2, + pos: 7, + len: 17, + }, + Sync { from: 2, to: 2 }, + NewOp { + client_id: 2, + pos: 52, + }, + Sync { from: 3, to: 2 }, + Delete { + client_id: 2, + pos: 52, + len: 52, + }, + Delete { + client_id: 2, + pos: 25, + len: 17, + }, + NewOp { + client_id: 2, + pos: 46, + }, + Delete { + client_id: 2, + pos: 52, + len: 52, + }, + ], + ) + } + #[test] fn issue_1() { crdt_list::test::test_with_actions::( 5, @@ -344,34 +398,53 @@ pub mod fuzz { #[test] fn normalize() { let mut actions = vec![ + Sync { from: 1, to: 107 }, NewOp { - client_id: 129, - pos: 142, + client_id: 107, + pos: 107, }, NewOp { - client_id: 0, - pos: 85, + client_id: 107, + pos: 107, }, - Sync { from: 85, to: 86 }, - NewOp { - client_id: 129, - pos: 129, + Delete { + client_id: 237, + pos: 237, + len: 237, }, - Sync { from: 129, to: 129 }, + Delete { + client_id: 107, + pos: 107, + len: 117, + }, + Sync { from: 107, to: 252 }, NewOp { - client_id: 106, - pos: 106, + client_id: 252, + pos: 252, + }, + Sync { from: 3, to: 117 }, + Delete { + client_id: 252, + pos: 252, + len: 252, + }, + Delete { + client_id: 252, + pos: 25, + len: 217, }, NewOp { - client_id: 1, - pos: 0, + client_id: 157, + pos: 146, }, - NewOp { - client_id: 129, - pos: 106, + Delete { + client_id: 252, + pos: 252, + len: 252, }, ]; crdt_list::test::normalize_actions(&mut actions, 5, 100); + dbg!(actions); } } diff --git a/crates/rle/src/range_map.rs b/crates/rle/src/range_map.rs index 6ad9e60f..6777dcda 100644 --- a/crates/rle/src/range_map.rs +++ b/crates/rle/src/range_map.rs @@ -1,8 +1,15 @@ -use std::fmt::Debug; +use bumpalo::boxed::Box as BumpBox; +use std::{cell::UnsafeCell, fmt::Debug, ptr::NonNull}; + +use fxhash::FxHashSet; use crate::{ rle_trait::{GlobalIndex, HasIndex, ZeroElement}, - rle_tree::tree_trait::GlobalTreeTrait, + rle_tree::{ + node::{InternalNode, LeafNode}, + tree_trait::GlobalTreeTrait, + Position, UnsafeCursor, + }, HasLength, Mergable, Rle, RleTree, Sliceable, }; @@ -63,11 +70,9 @@ impl Default } impl RangeMap { - pub fn set(&mut self, start: Index, value: Value) { - self.tree.delete_range( - Some(start), - Some(start + Index::from_usize(std::cmp::max(value.content_len(), 1)).unwrap()), - ); + pub fn set_large_range(&mut self, start: Index, value: Value) { + let end = start + Index::from_usize(std::cmp::max(value.content_len(), 1)).unwrap(); + self.tree.delete_range(Some(start), Some(end)); self.tree.insert( start, WithIndex { @@ -77,6 +82,169 @@ impl RangeMap< ); } + /// In our use cases, most of the set operation is at small range. + /// So we can travel from the first cursor to modify each element gradually + pub fn set_small_range(&mut self, start: Index, value: Value) { + let end = start + Index::from_usize(std::cmp::max(value.atom_len(), 1)).unwrap(); + let cursor = self.tree.get_cursor_ge(start); + if cursor.is_none() { + self.tree.insert( + start, + WithIndex { + value, + index: start, + }, + ); + return; + } + + let mut cursor = cursor.unwrap(); + // SAFETY: we have exclusive ref to the tree + let mut cur_leaf = unsafe { cursor.0.leaf.as_mut() }; + let cur_ptr = cur_leaf.into(); + let mut index = cursor.0.index; + let mut elem = &mut cur_leaf.children[index]; + let elem_end = elem.index + Index::from_usize(elem.atom_len()).unwrap(); + // there are a lot of updates are in-place, we can update them directly and return + // because cache won't change + if elem.index == start && elem_end == end { + **elem = WithIndex { + value, + index: start, + }; + return; + } + + if elem.index >= end { + // there is no elements inside the target range, we can insert directly + self.tree.insert( + start, + WithIndex { + value, + index: start, + }, + ); + return; + } + + if elem.index < start && end < elem_end { + // element contains the target range + let offset = (start - elem.index).as_(); + let leaf: NonNull<_> = cur_leaf.into(); + + self.tree.update_at_cursors( + &mut [UnsafeCursor { + // SAFETY: ignore lifetime to bypass tree mut borrow check + leaf: unsafe { std::mem::transmute(leaf) }, + index, + offset, + pos: crate::rle_tree::Position::Middle, + len: (end - start).as_(), + }], + &mut |v| { + v.value = value.clone(); + }, + &mut |_, _| {}, + ); + + return; + } + + let mut visited_nodes: FxHashSet>> = FxHashSet::default(); + visited_nodes.insert(cur_ptr); + let mut last_end: Index = start; + let mut last_inside_element: Option> = None; + // iterate over the elements inside the range + loop { + if elem.index >= end { + break; + } + + let elem_end = elem.index + Index::from_usize(elem.atom_len()).unwrap(); + if start > elem_end { + debug_assert!(false, "something wrong with get_cursor_ge") + // go to next loop + } else if elem.index < start { + // start element overlaps with target range + // let it keep its left part + **elem = elem.slice(0, (start - elem.index).as_()); + } else if elem_end > end { + // end element overlaps with target range + // let it keep its right part + **elem = elem.slice((end - elem.index).as_(), elem.atom_len()); + } else { + // elements inside the target range + // extends its start to last_end + **elem = WithIndex { + index: last_end, + value: value.slice((last_end - start).as_(), (elem_end - start).as_()), + }; + last_inside_element = Some(elem.into()); + last_end = elem_end; + } + + // move to next element + if index + 1 < cur_leaf.children().len() { + index += 1; + elem = &mut cur_leaf.children[index]; + } else { + if let Some(next) = cur_leaf.next_mut() { + visited_nodes.insert(next.into()); + cur_leaf = next; + } else { + // is the last element of the tree + break; + } + + index = 0; + elem = &mut cur_leaf.children[index]; + } + } + + if last_end != end { + if let Some(mut insider) = last_inside_element { + // we can extended the last element to the end + // SAFETY: we just got the element from the tree and save it to the option value + let insider = unsafe { insider.as_mut() }; + insider.value = value.slice((insider.index - start).as_(), (end - start).as_()); + last_end = end; + } + } + + let mut visited_internal_nodes: FxHashSet>> = + FxHashSet::default(); + for mut leaf in visited_nodes { + // SAFETY: we have exclusive ref to the tree + let leaf = unsafe { leaf.as_mut() }; + leaf.update_cache(); + visited_internal_nodes.insert(leaf.parent); + } + + while !visited_internal_nodes.is_empty() { + for mut internal in std::mem::take(&mut visited_internal_nodes) { + // SAFETY: we have exclusive ref to the tree + let internal = unsafe { internal.as_mut() }; + internal.update_cache(); + if let Some(parent) = internal.parent { + visited_internal_nodes.insert(parent); + } + } + } + + if last_end != end { + // TODO: Can be optimized? + // need to insert a new element from here + // current pointer must be greater than start or at the end of the tree + self.tree.insert( + last_end, + WithIndex { + value: value.slice((last_end - start).as_(), (end - start).as_()), + index: last_end, + }, + ); + } + } + #[inline] pub fn debug_check(&mut self) { self.tree.debug_check() @@ -201,11 +369,16 @@ mod test { struct V { from: usize, to: usize, + key: String, } impl V { - fn new(from: usize, to: usize) -> Self { - Self { from, to } + fn new(from: usize, to: usize, key: &str) -> Self { + Self { + from, + to, + key: key.into(), + } } } impl HasLength for V { @@ -219,13 +392,18 @@ mod test { V { from: self.from + from, to: self.from + to, + key: self.key.clone(), } } } impl ZeroElement for V { fn zero_element() -> Self { - Self { from: 0, to: 0 } + Self { + from: 0, + to: 0, + key: "".to_string(), + } } } @@ -234,26 +412,115 @@ mod test { #[test] fn test_0() { let mut map: VRangeMap = Default::default(); - map.set(10, V::new(10, 20)); - map.set(12, V::new(12, 15)); + map.set_large_range(10, V::new(10, 20, "a")); + map.set_large_range(12, V::new(12, 15, "b")); // 10-12, 12-15, 15-20 assert_eq!(map.get_range(7, 8), Vec::<&V>::new()); - assert_eq!(map.get_range(8, 12), vec![&V::new(10, 12)]); + assert_eq!(map.get_range(8, 12), vec![&V::new(10, 12, "a")]); assert_eq!( map.get_range(14, 16), - vec![&V::new(12, 15), &V::new(15, 20)] + vec![&V::new(12, 15, "b"), &V::new(15, 20, "a")] ); // 10-11, 11-12, 12-15, 15-20 - map.set(11, V::new(11, 12)); + map.set_large_range(11, V::new(11, 12, "c")); assert_eq!( map.get_range(9, 15), - vec![&V::new(10, 11), &V::new(11, 12), &V::new(12, 15)] + vec![ + &V::new(10, 11, "a"), + &V::new(11, 12, "c"), + &V::new(12, 15, "b") + ] ); // 5-20 - map.set(5, V::new(5, 20)); - assert_eq!(map.get_range(9, 15), vec![&V::new(5, 20)]); + map.set_large_range(5, V::new(5, 20, "k")); + assert_eq!(map.get_range(9, 15), vec![&V::new(5, 20, "k")]); + } + + #[test] + fn test_small_range() { + let mut map: VRangeMap = Default::default(); + map.set_small_range(10, V::new(10, 20, "a")); + map.set_small_range(12, V::new(12, 15, "b")); + // 10-12, 12-15, 15-20 + assert_eq!(map.get_range(7, 8), Vec::<&V>::new()); + assert_eq!(map.get_range(8, 12), vec![&V::new(10, 12, "a")]); + assert_eq!( + map.get_range(14, 16), + vec![&V::new(12, 15, "b"), &V::new(15, 20, "a")] + ); + + // 10-11, 11-12, 12-15, 15-20 + map.set_small_range(11, V::new(11, 12, "c")); + assert_eq!( + map.get_range(9, 15), + vec![ + &V::new(10, 11, "a"), + &V::new(11, 12, "c"), + &V::new(12, 15, "b") + ] + ); + + // 5-20 + map.set_small_range(5, V::new(5, 20, "k")); + assert_eq!( + map.get_range(9, 15), + vec![ + &V::new(5, 11, "k"), + &V::new(11, 12, "k"), + &V::new(12, 15, "k"), + ] + ); + } + + #[test] + fn test_small_range_across_spans() { + let mut map: VRangeMap = Default::default(); + map.set_small_range(0, V::new(0, 5, "a")); + map.set_small_range(10, V::new(10, 15, "b")); + map.set_small_range(20, V::new(20, 25, "c")); + map.set_small_range(2, V::new(2, 23, "k")); + assert_eq!( + map.get_range(0, 30), + vec![ + &V::new(0, 2, "a"), + &V::new(2, 23, "k"), + &V::new(23, 25, "c"), + ] + ) + } + + #[test] + fn test_small_range_inside() { + let mut map: VRangeMap = Default::default(); + map.set_small_range(0, V::new(0, 5, "a")); + map.set_small_range(1, V::new(1, 3, "b")); + assert_eq!( + map.get_range(0, 30), + vec![&V::new(0, 1, "a"), &V::new(1, 3, "b"), &V::new(3, 5, "a"),] + ); + + let mut map: VRangeMap = Default::default(); + map.set_small_range(0, V::new(0, 5, "a")); + map.set_small_range(0, V::new(0, 3, "b")); + assert_eq!( + map.get_range(0, 30), + vec![&V::new(0, 3, "b"), &V::new(3, 5, "a"),] + ); + + let mut map: VRangeMap = Default::default(); + map.set_small_range(0, V::new(0, 5, "a")); + map.set_small_range(3, V::new(3, 5, "b")); + assert_eq!( + map.get_range(0, 30), + vec![&V::new(0, 3, "a"), &V::new(3, 5, "b"),] + ); + map.set_small_range(3, V::new(3, 6, "c")); + assert_eq!( + map.get_range(0, 30), + vec![&V::new(0, 3, "a"), &V::new(3, 6, "c")] + ); } static_assertions::assert_not_impl_any!(RangeMap>: Sync, Send); diff --git a/crates/rle/src/rle_tree.rs b/crates/rle/src/rle_tree.rs index 63cbcbbc..685625c4 100644 --- a/crates/rle/src/rle_tree.rs +++ b/crates/rle/src/rle_tree.rs @@ -22,7 +22,7 @@ pub mod tree_trait; #[self_referencing] #[derive(Debug)] pub struct RleTree + 'static> { - bump: Bump, + pub(crate) bump: Bump, #[borrows(bump)] node: &'this mut Node<'this, T, A>, } @@ -127,7 +127,7 @@ impl> RleTree { /// return the first valid cursor after the given index /// reviewed by @Leeeon233 #[inline] - fn get_cursor_ge(&self, mut index: A::Int) -> Option> { + pub(crate) fn get_cursor_ge(&self, mut index: A::Int) -> Option> { let mut node = self.root(); loop { match node { @@ -146,6 +146,14 @@ impl> RleTree { return None; } + if result.child_index == leaf.children.len() - 1 + && leaf.next().is_none() + && !result.found + && (result.pos == Position::After || result.pos == Position::End) + { + return None; + } + return Some(SafeCursor::from_leaf( leaf, result.child_index, diff --git a/crates/rle/src/rle_tree/node.rs b/crates/rle/src/rle_tree/node.rs index 8b87f1af..bfdc105f 100644 --- a/crates/rle/src/rle_tree/node.rs +++ b/crates/rle/src/rle_tree/node.rs @@ -23,7 +23,7 @@ pub enum Node<'a, T: Rle, A: RleTreeTrait> { pub struct InternalNode<'a, T: Rle, A: RleTreeTrait> { bump: &'a Bump, - pub(super) parent: Option>>, + pub(crate) parent: Option>>, pub(super) children: BumpVec<'a, &'a mut Node<'a, T, A>>, pub cache: A::InternalCache, _pin: PhantomPinned, @@ -35,8 +35,8 @@ pub struct InternalNode<'a, T: Rle, A: RleTreeTrait> { // TODO: only one child? pub struct LeafNode<'a, T: Rle, A: RleTreeTrait> { bump: &'a Bump, - pub(super) parent: NonNull>, - pub(super) children: BumpVec<'a, BumpBox<'a, T>>, + pub(crate) parent: NonNull>, + pub(crate) children: BumpVec<'a, BumpBox<'a, T>>, pub(crate) prev: Option>>, pub(crate) next: Option>>, pub cache: A::LeafCache, diff --git a/crates/rle/src/rle_tree/node/internal_impl.rs b/crates/rle/src/rle_tree/node/internal_impl.rs index 42066574..f043cb0e 100644 --- a/crates/rle/src/rle_tree/node/internal_impl.rs +++ b/crates/rle/src/rle_tree/node/internal_impl.rs @@ -672,6 +672,11 @@ impl<'a, T: Rle, A: RleTreeTrait> InternalNode<'a, T, A> { .iter() .position(|child| std::ptr::eq(child.as_internal().unwrap(), self)) } + + #[inline(always)] + pub(crate) fn update_cache(&mut self) { + A::update_cache_internal(self); + } } impl<'a, T: Rle, A: RleTreeTrait> Debug for InternalNode<'a, T, A> { diff --git a/crates/rle/src/rle_tree/node/leaf_impl.rs b/crates/rle/src/rle_tree/node/leaf_impl.rs index a18eb4eb..359bb06a 100644 --- a/crates/rle/src/rle_tree/node/leaf_impl.rs +++ b/crates/rle/src/rle_tree/node/leaf_impl.rs @@ -768,6 +768,11 @@ impl<'a, T: Rle, A: RleTreeTrait> LeafNode<'a, T, A> { .iter() .position(|child| std::ptr::eq(child.as_leaf().unwrap(), self)) } + + #[inline(always)] + pub(crate) fn update_cache(&mut self) { + A::update_cache_leaf(self); + } } impl<'a, T: Rle, A: RleTreeTrait> Debug for LeafNode<'a, T, A> { diff --git a/crates/rle/src/rle_tree/test/notify_prop_test.rs b/crates/rle/src/rle_tree/test/notify_prop_test.rs index 82d9eab3..2838db68 100644 --- a/crates/rle/src/rle_tree/test/notify_prop_test.rs +++ b/crates/rle/src/rle_tree/test/notify_prop_test.rs @@ -172,7 +172,7 @@ fn test(interactions: &[Interaction]) { let mut func = |value: &Value, node: *mut LeafNode<'_, Value, ValueTreeTrait>| { // SAFETY: this is safe because node must be valid let ptr = unsafe { NonNull::new_unchecked(node as usize as *mut _) }; - range_map.set( + range_map.set_small_range( value.value, WithStartEnd::new( value.value,