From df1892bd4efaf4d8b41d48c23a7dc60660ef8638 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Tue, 1 Nov 2022 16:31:16 +0800 Subject: [PATCH] refactor: merge reverse deletions fix: counter span merge err --- .../loro-core/src/container/list/list_op.rs | 243 ++++++++++++++++-- .../src/container/text/text_container.rs | 5 +- .../loro-core/src/container/text/tracker.rs | 17 +- .../src/container/text/tracker/cursor_map.rs | 1 - .../container/text/tracker/effects_iter.rs | 3 +- .../src/container/text/tracker/y_span.rs | 9 +- crates/loro-core/src/fuzz.rs | 225 ++++++++++++++++ crates/loro-core/src/span.rs | 76 ++++-- crates/loro-core/src/version.rs | 3 +- crates/rle/src/rle_vec.rs | 33 ++- 10 files changed, 561 insertions(+), 54 deletions(-) diff --git a/crates/loro-core/src/container/list/list_op.rs b/crates/loro-core/src/container/list/list_op.rs index a340c5ec..559d7b59 100644 --- a/crates/loro-core/src/container/list/list_op.rs +++ b/crates/loro-core/src/container/list/list_op.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use enum_as_inner::EnumAsInner; use rle::{HasLength, Mergable, Sliceable}; @@ -6,7 +8,164 @@ use crate::container::text::text_content::ListSlice; #[derive(EnumAsInner, Debug, Clone)] pub(crate) enum ListOp { Insert { slice: ListSlice, pos: usize }, - Delete { pos: usize, len: usize }, + Delete(DeleteSpan), +} + +/// `len` can be negative so that we can merge text deletions efficiently. +/// It looks like [crate::span::CounterSpan], but how should they merge ([Mergable] impl) and slice ([Sliceable] impl) are very different +/// +/// len cannot be zero; +/// +/// pos: 5, len: -3 eq a range of (2, 5] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct DeleteSpan { + pos: isize, + len: isize, +} + +impl ListOp { + pub fn new_del(pos: usize, len: usize) -> Self { + assert!(len != 0); + Self::Delete(DeleteSpan { + pos: pos as isize, + len: len as isize, + }) + } +} + +impl HasLength for DeleteSpan { + fn content_len(&self) -> usize { + self.len.unsigned_abs() + } +} + +impl DeleteSpan { + pub fn new(pos: isize, len: isize) -> Self { + debug_assert!(len != 0); + Self { pos, len } + } + + #[inline(always)] + pub fn start(&self) -> isize { + if self.len > 0 { + self.pos + } else { + self.pos + 1 + self.len + } + } + + #[inline(always)] + pub fn last(&self) -> isize { + if self.len > 0 { + self.pos + self.len - 1 + } else { + self.pos + } + } + + #[inline(always)] + pub fn end(&self) -> isize { + if self.len > 0 { + self.pos + self.len + } else { + self.pos + 1 + } + } + + #[inline(always)] + pub fn to_range(self) -> Range { + self.start()..self.end() + } + + #[inline(always)] + pub fn bidirectional(&self) -> bool { + self.len.abs() == 1 + } + + #[inline(always)] + pub fn is_reversed(&self) -> bool { + self.len < 0 + } + + #[inline(always)] + pub fn direction(&self) -> isize { + if self.len > 0 { + 1 + } else { + -1 + } + } + + #[inline(always)] + pub fn next_pos(&self) -> isize { + if self.len > 0 { + self.start() + } else { + self.start() - 1 + } + } + + #[inline(always)] + pub fn prev_pos(&self) -> isize { + if self.len > 0 { + self.pos + } else { + self.end() + } + } +} + +impl Mergable for DeleteSpan { + fn is_mergable(&self, other: &Self, _conf: &()) -> bool + where + Self: Sized, + { + match (self.bidirectional(), other.bidirectional()) { + (true, true) => self.pos == other.pos || self.pos == other.pos + 1, + (true, false) => self.pos == other.prev_pos(), + (false, true) => self.next_pos() == other.pos, + (false, false) => self.next_pos() == other.pos && self.direction() == other.direction(), + } + } + + fn merge(&mut self, other: &Self, _conf: &()) + where + Self: Sized, + { + match (self.bidirectional(), other.bidirectional()) { + (true, true) => { + if self.pos == other.pos { + self.len = 2; + } else if self.pos == other.pos + 1 { + self.len = -2; + } else { + unreachable!() + } + } + (true, false) => { + assert!(self.pos == other.prev_pos()); + self.len = other.len + other.direction(); + } + (false, true) => { + assert!(self.next_pos() == other.pos); + self.len += self.direction(); + } + (false, false) => { + assert!(self.next_pos() == other.pos && self.direction() == other.direction()); + self.len += other.len; + } + } + } +} + +impl Sliceable for DeleteSpan { + fn slice(&self, from: usize, to: usize) -> Self { + if self.len > 0 { + Self::new(self.pos, to as isize - from as isize) + } else { + Self::new(self.pos - from as isize, from as isize - to as isize) + } + } } impl Mergable for ListOp { @@ -22,9 +181,8 @@ impl Mergable for ListOp { } => pos + slice.content_len() == *other_pos && slice.is_mergable(other_slice, &()), _ => false, }, - // TODO: add support for reverse merge - ListOp::Delete { pos, len: _ } => match _other { - ListOp::Delete { pos: other_pos, .. } => *pos == *other_pos, + &ListOp::Delete(span) => match _other { + ListOp::Delete(other_span) => span.is_mergable(other_span, &()), _ => false, }, } @@ -43,10 +201,8 @@ impl Mergable for ListOp { } _ => unreachable!(), }, - ListOp::Delete { len, .. } => match _other { - ListOp::Delete { len: other_len, .. } => { - *len += other_len; - } + ListOp::Delete(span) => match _other { + ListOp::Delete(other_span) => span.merge(other_span, &()), _ => unreachable!(), }, } @@ -57,7 +213,7 @@ impl HasLength for ListOp { fn content_len(&self) -> usize { match self { ListOp::Insert { slice, .. } => slice.content_len(), - ListOp::Delete { len, .. } => *len, + ListOp::Delete(span) => span.atom_len(), } } } @@ -69,13 +225,68 @@ impl Sliceable for ListOp { slice: slice.slice(from, to), pos: *pos + from, }, - ListOp::Delete { pos, .. } => ListOp::Delete { - // this looks weird but it's correct - // because right now two adjacent delete can be merge - // only when they delete at the same position. - pos: *pos, - len: to - from, - }, + ListOp::Delete(span) => ListOp::Delete(span.slice(from, to)), } } } + +#[cfg(test)] +mod test { + use rle::{Mergable, Sliceable}; + + use super::DeleteSpan; + + #[test] + fn test_del_span_merge_slice() { + let a = DeleteSpan::new(0, 100); + let mut b = a.slice(0, 1); + let c = a.slice(1, 100); + b.merge(&c, &()); + assert_eq!(b, a); + + // reverse + let a = DeleteSpan::new(99, -100); + let mut b = a.slice(0, 1); + let c = a.slice(1, 100); + b.merge(&c, &()); + assert_eq!(b, a); + + // merge bidirectional + let mut a = DeleteSpan::new(1, -1); + let b = DeleteSpan::new(1, -1); + assert!(a.is_mergable(&b, &())); + a.merge(&b, &()); + assert_eq!(a, DeleteSpan::new(1, 2)); + + let mut a = DeleteSpan::new(1, -1); + let b = DeleteSpan::new(0, -1); + assert_eq!(b.to_range(), 0..1); + assert!(a.is_mergable(&b, &())); + a.merge(&b, &()); + assert_eq!(a, DeleteSpan::new(1, -2)); + + // not merging + let a = DeleteSpan::new(4, 1); + let b = DeleteSpan::new(5, 2); + assert!(!a.is_mergable(&b, &())); + + // next/prev span + let a = DeleteSpan::new(6, -2); + assert_eq!(a.next_pos(), 4); + assert_eq!(a.prev_pos(), 7); + let a = DeleteSpan::new(6, 2); + assert_eq!(a.next_pos(), 6); + assert_eq!(a.prev_pos(), 6); + assert!(a.slice(0, 1).is_mergable(&a.slice(1, 2), &())); + + // neg merge + let mut a = DeleteSpan::new(1, 1); + let b = DeleteSpan::new(0, 1); + a.merge(&b, &()); + assert_eq!(a, DeleteSpan::new(1, -2)); + assert_eq!(a.slice(0, 1), DeleteSpan::new(1, -1)); + assert_eq!(a.slice(1, 2), DeleteSpan::new(0, -1)); + assert_eq!(a.slice(0, 1).to_range(), 1..2); + assert_eq!(a.slice(1, 2).to_range(), 0..1); + } +} diff --git a/crates/loro-core/src/container/text/text_container.rs b/crates/loro-core/src/container/text/text_container.rs index 2a1a6dde..870997af 100644 --- a/crates/loro-core/src/container/text/text_container.rs +++ b/crates/loro-core/src/container/text/text_container.rs @@ -93,7 +93,7 @@ impl TextContainer { Some(id) } - pub fn delete(&mut self, pos: usize, len: usize) -> Option { + pub fn delete(&mut self, mut pos: usize, mut len: usize) -> Option { if len == 0 { return None; } @@ -104,7 +104,7 @@ impl TextContainer { let op = Op::new( id, OpContent::Normal { - content: InsertContent::List(ListOp::Delete { len, pos }), + content: InsertContent::List(ListOp::new_del(pos, len)), }, store.get_or_create_container_idx(&self.id), ); @@ -238,6 +238,7 @@ impl Container for TextContainer { self.state.insert(pos, content.as_slice().unwrap().clone()); } } + debug_log!("AFTER EFFECT"); } debug_log!( "AFTER EFFECT STATE={}", diff --git a/crates/loro-core/src/container/text/tracker.rs b/crates/loro-core/src/container/text/tracker.rs index 11c58cfa..9f64d3c7 100644 --- a/crates/loro-core/src/container/text/tracker.rs +++ b/crates/loro-core/src/container/text/tracker.rs @@ -250,10 +250,23 @@ impl Tracker { // SAFETY: we know this is safe because in [YataImpl::insert_after] there is no access to shared elements unsafe { crdt_list::yata::integrate::(self, yspan) }; } - ListOp::Delete { pos, len } => { - let spans = self.content.get_active_id_spans(*pos, *len); + ListOp::Delete(span) => { + let mut spans = self + .content + .get_active_id_spans(span.start() as usize, span.atom_len()); debug_log!("DELETED SPANS={}", format!("{:#?}", &spans).red()); self.update_spans(&spans, StatusChange::Delete); + + if span.is_reversed() && span.atom_len() > 1 { + spans.reverse(); + // SAFETY: we don't change the size of the span + unsafe { + for span in spans.iter_mut() { + span.reverse(); + } + } + } + self.id_to_cursor .set_small_range((id).into(), cursor_map::Marker::Delete(spans)); } diff --git a/crates/loro-core/src/container/text/tracker/cursor_map.rs b/crates/loro-core/src/container/text/tracker/cursor_map.rs index f6e59995..e7b3a50e 100644 --- a/crates/loro-core/src/container/text/tracker/cursor_map.rs +++ b/crates/loro-core/src/container/text/tracker/cursor_map.rs @@ -292,7 +292,6 @@ impl CursorMap { } pub fn get_first_cursors_at_id_span(&self, span: IdSpan) -> Option { - // TODO: do we need this index for (id, marker) in self.get_range_with_index(span.min_id().into(), span.end_id().into()) { let id: ID = id.into(); match marker { diff --git a/crates/loro-core/src/container/text/tracker/effects_iter.rs b/crates/loro-core/src/container/text/tracker/effects_iter.rs index 542cbc24..de0f31c2 100644 --- a/crates/loro-core/src/container/text/tracker/effects_iter.rs +++ b/crates/loro-core/src/container/text/tracker/effects_iter.rs @@ -46,7 +46,8 @@ impl<'a> Iterator for EffectIter<'a> { while let Some((ref mut delete_op_id, ref mut delete_targets)) = self.current_delete_targets { - if let Some(target) = delete_targets.pop() { + if let Some(mut target) = delete_targets.pop() { + target.normalize_(); let result = self .tracker .id_to_cursor diff --git a/crates/loro-core/src/container/text/tracker/y_span.rs b/crates/loro-core/src/container/text/tracker/y_span.rs index 0b217ec5..a28fc598 100644 --- a/crates/loro-core/src/container/text/tracker/y_span.rs +++ b/crates/loro-core/src/container/text/tracker/y_span.rs @@ -1,8 +1,10 @@ use std::fmt::Display; use crate::{ - container::text::text_content::ListSlice, id::Counter, span::IdSpan, ContentType, - InsertContentTrait, ID, + container::text::text_content::ListSlice, + id::Counter, + span::{HasCounter, HasCounterSpan, IdSpan}, + ContentType, InsertContentTrait, ID, }; use rle::{rle_tree::tree_trait::CumulateTreeTrait, HasLength, Mergable, Sliceable}; @@ -108,8 +110,7 @@ impl YSpan { return false; } - self.id.counter < id.counter.end - && self.id.counter + (self.len as Counter) > id.counter.min() + self.id.counter < id.ctr_end() && self.id.counter + (self.len as Counter) > id.ctr_start() } } diff --git a/crates/loro-core/src/fuzz.rs b/crates/loro-core/src/fuzz.rs index b98db3ac..10153947 100644 --- a/crates/loro-core/src/fuzz.rs +++ b/crates/loro-core/src/fuzz.rs @@ -313,6 +313,230 @@ mod test { use super::Action::*; use super::*; + #[test] + fn test_15() { + // retreat failed + test_multi_sites( + 2, + vec![ + Ins { + content: "012345".into(), + pos: 16145685675428772607, + site: 50, + }, + Ins { + content: "k".into(), + pos: 12514849900981321857, + site: 173, + }, + Ins { + content: "jjj".into(), + pos: 827253908580597753, + site: 235, + }, + Ins { + content: "hh".into(), + pos: 10619084171383039, + site: 186, + }, + Sync { from: 186, to: 187 }, + Ins { + content: "bb".into(), + pos: 9154669102093696963, + site: 1, + }, + Ins { + content: "ccc".into(), + pos: 10619084154605823, + site: 186, + }, + Sync { from: 186, to: 187 }, + Ins { + content: "mm".into(), + pos: 10619084154662902, + site: 186, + }, + Sync { from: 103, to: 186 }, + Ins { + content: "abcdef".into(), + pos: 2718485543582577120, + site: 0, + }, + Del { + pos: 77426591251806906, + len: 15800371181095160576, + site: 1, + }, + Del { + pos: 16348878094905390375, + len: 16348879061405328098, + site: 226, + }, + Del { + pos: 196469752251612538, + len: 13455202075478711110, + site: 186, + }, + Del { + pos: 13527329853585490429, + len: 8863007108824969696, + site: 186, + }, + Sync { from: 186, to: 65 }, + Ins { + content: "kk".into(), + pos: 18230571291461418738, + site: 255, + }, + Sync { from: 0, to: 43 }, + Del { + pos: 9007624456401910, + len: 17728429828748607488, + site: 0, + }, + Ins { + content: "666666".into(), + pos: 5058070217968582656, + site: 219, + }, + Del { + pos: 1437795153694407354, + len: 10619084140647187, + site: 0, + }, + ], + ) + } + #[test] + fn test_14() { + // retreat failed + test_multi_sites( + 8, + vec![ + Ins { + content: "(`".into(), + pos: 289360693583710602, + site: 4, + }, + Sync { from: 4, to: 213 }, + Ins { + content: "333FFFF\u{3}\0(".into(), + pos: 289360693583710602, + site: 4, + }, + Sync { from: 4, to: 137 }, + Ins { + content: "3\u{3}\0@".into(), + pos: 289360693583710602, + site: 4, + }, + Sync { from: 4, to: 137 }, + Ins { + content: "\u{4}(".into(), + pos: 9877801357708624266, + site: 5, + }, + Del { + pos: 14806280867407362682, + len: 289360691352074874, + site: 4, + }, + Sync { from: 137, to: 0 }, + Sync { from: 129, to: 10 }, + Ins { + content: "".into(), + pos: 15408455913343287690, + site: 14, + }, + Sync { from: 204, to: 52 }, + Ins { + content: "".into(), + pos: 1513209475975492403, + site: 137, + }, + Ins { + content: "\u{4}\0\u{4}(".into(), + pos: 106532207656077961, + site: 28, + }, + Del { + pos: 7089336938131513954, + len: 7089336938131513954, + site: 98, + }, + Del { + pos: 7089336938131513954, + len: 7089336938131513899, + site: 98, + }, + Del { + pos: 7089336938131513954, + len: 7089336938131513954, + site: 98, + }, + Del { + pos: 7089336938131513954, + len: 7089336938131513954, + site: 98, + }, + Del { + pos: 7089336938131513954, + len: 7089336938131513954, + site: 98, + }, + Del { + pos: 7089336938131513954, + len: 9971017955808381026, + site: 137, + }, + Sync { from: 98, to: 98 }, + ], + ) + } + #[test] + fn test_13() { + // retreat failed + test_multi_sites( + 8, + vec![ + Ins { + content: "ab".into(), + pos: 289360693583710602, + site: 4, + }, + Sync { from: 4, to: 137 }, + Del { + pos: 9873061956456284998, + len: 9955211391596233748, + site: 137, + }, + Ins { + content: "xxx".into(), + pos: 289365091630221706, + site: 4, + }, + Sync { from: 137, to: 0 }, + Sync { from: 129, to: 10 }, + Del { + pos: 1513209475199240744, + len: 1130315200595337, + site: 44, + }, + Del { + pos: 0, + len: 1, + site: 1, + }, + Sync { from: 4, to: 0 }, + Ins { + content: "m".into(), + pos: 8825501086237362561, + site: 122, + }, + ], + ) + } + #[test] fn test_12() { // retreat failed @@ -375,6 +599,7 @@ mod test { }], ) } + #[test] fn test_9() { test_multi_sites( diff --git a/crates/loro-core/src/span.rs b/crates/loro-core/src/span.rs index e7d3c06e..3f9e1179 100644 --- a/crates/loro-core/src/span.rs +++ b/crates/loro-core/src/span.rs @@ -47,16 +47,37 @@ impl CounterSpan { } } + /// Make end greater than start + pub fn normalize_(&mut self) { + if self.end < self.start { + self.reverse(); + } + } + + #[inline(always)] + pub fn direction(&self) -> i32 { + if self.start < self.end { + 1 + } else { + -1 + } + } + + #[inline(always)] + pub fn is_reversed(&self) -> bool { + self.end < self.start + } + #[inline] pub fn min(&self) -> Counter { if self.start < self.end { self.start } else { - self.end + self.end + 1 } } - #[inline] + #[inline(always)] pub fn max(&self) -> Counter { if self.start > self.end { self.start @@ -65,11 +86,12 @@ impl CounterSpan { } } + #[inline(always)] pub fn end(&self) -> i32 { - if self.start > self.end { - self.start + 1 - } else { + if self.start < self.end { self.end + } else { + self.start + 1 } } @@ -82,19 +104,19 @@ impl CounterSpan { } } - pub fn set_start(&mut self, start: Counter) { + pub fn set_start(&mut self, new_start: Counter) { if self.start < self.end { - self.start = start.min(self.end); + self.start = new_start.min(self.end); } else { - self.start = start.max(self.end); + self.start = new_start.max(self.end); } } - pub fn set_end(&mut self, end: Counter) { + pub fn set_end(&mut self, new_end: Counter) { if self.start < self.end { - self.end = end.max(self.start); + self.end = new_end.max(self.start); } else { - self.end = end.min(self.start); + self.end = new_end.min(self.start); } } } @@ -102,7 +124,7 @@ impl CounterSpan { impl HasLength for CounterSpan { #[inline] fn content_len(&self) -> usize { - if self.end > self.start { + if self.start < self.end { (self.end - self.start) as usize } else { (self.start - self.end) as usize @@ -132,7 +154,8 @@ impl Sliceable for CounterSpan { impl Mergable for CounterSpan { #[inline] fn is_mergable(&self, other: &Self, _: &()) -> bool { - self.end == other.start + // TODO: can use the similar logic as [DeleteSpan] to merge + self.end == other.start && self.direction() == other.direction() } #[inline] @@ -161,14 +184,24 @@ impl IdSpan { } } - #[inline] - pub fn min_ctr(&self) -> Counter { - self.counter.min() + #[inline(always)] + pub fn is_reversed(&self) -> bool { + self.counter.end < self.counter.start } - #[inline] - pub fn max_ctr(&self) -> Counter { - self.counter.max() + #[inline(always)] + pub fn id_at_begin(&self) -> ID { + ID::new(self.client_id, self.counter.start) + } + + #[inline(always)] + pub fn reverse(&mut self) { + self.counter.reverse(); + } + + #[inline(always)] + pub fn normalize_(&mut self) { + self.counter.normalize_(); } #[inline] @@ -176,11 +209,6 @@ impl IdSpan { ID::new(self.client_id, self.counter.min()) } - #[inline] - pub fn max_id(&self) -> ID { - ID::new(self.client_id, self.counter.max()) - } - #[inline] pub fn end_id(&self) -> ID { ID::new(self.client_id, self.counter.end()) diff --git a/crates/loro-core/src/version.rs b/crates/loro-core/src/version.rs index 037ae357..23a2b3fb 100644 --- a/crates/loro-core/src/version.rs +++ b/crates/loro-core/src/version.rs @@ -123,7 +123,8 @@ fn subtract_start(m: &mut FxHashMap, target: IdSpan) { } } -fn merge(m: &mut FxHashMap, target: IdSpan) { +fn merge(m: &mut FxHashMap, mut target: IdSpan) { + target.normalize_(); if let Some(span) = m.get_mut(&target.client_id) { span.start = span.start.min(target.counter.start); span.end = span.end.max(target.counter.end); diff --git a/crates/rle/src/rle_vec.rs b/crates/rle/src/rle_vec.rs index c6d9e358..cacbecd6 100644 --- a/crates/rle/src/rle_vec.rs +++ b/crates/rle/src/rle_vec.rs @@ -1,6 +1,6 @@ use std::{ fmt::Debug, - ops::{Deref, Index, Range}, + ops::{Deref, DerefMut, Index, Range}, }; use num::{traits::AsPrimitive, FromPrimitive}; @@ -59,6 +59,24 @@ where pub fn iter(&self) -> std::slice::Iter<'_, A::Item> { self.vec.iter() } + + /// # Safety + /// + /// should not change the element's length during iter + pub unsafe fn iter_mut(&mut self) -> std::slice::IterMut<'_, A::Item> { + self.vec.iter_mut() + } + + pub fn reverse(&mut self) { + self.vec.reverse() + } + + pub fn check(&self) { + assert_eq!( + self.atom_len, + self.vec.iter().map(|x| x.atom_len()).sum::() + ); + } } impl Default for RleVecWithLen @@ -128,6 +146,10 @@ impl RleVec { pub fn len(&self) -> usize { self.vec.len() } + + pub fn reverse(&mut self) { + self.vec.reverse() + } } impl IntoIterator for RleVec { @@ -432,6 +454,11 @@ impl RleVec { self.vec.iter() } + #[inline(always)] + pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, A::Item> { + self.vec.iter_mut() + } + #[inline(always)] pub fn get_merged(&self, index: usize) -> Option<&A::Item> { self.vec.get(index) @@ -536,10 +563,10 @@ impl Deref for RleVec { } impl Deref for RleVecWithLen { - type Target = [A::Item]; + type Target = RleVec; fn deref(&self) -> &Self::Target { - &self.vec.vec + &self.vec } }