Merge pull request #14 from loro-dev/perf-sync

perf: improve performance on syncing data
This commit is contained in:
Zixuan Chen 2022-11-10 14:56:03 +08:00 committed by GitHub
commit 61397025de
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 4750 additions and 244 deletions

View file

@ -15,18 +15,28 @@ mod run {
use rand::SeedableRng;
use serde_json::Value;
pub fn criterion_benchmark(c: &mut Criterion) {
pub fn two_client_edits(c: &mut Criterion) {
let mut rgn = rand::rngs::StdRng::seed_from_u64(0);
let mut bytes = Vec::new();
for _ in 0..1000 {
for _ in 0..8000 {
bytes.push(rgn.gen::<u8>());
}
let mut gen = Unstructured::new(&bytes);
let mut c = c.benchmark_group("sync");
let actions = gen.arbitrary::<[Action; 200]>().unwrap();
c.bench_function("random text edit 2 sites", |b| {
b.iter(|| test_multi_sites(2, actions.clone().into()))
});
c.bench_function("random text edit 8 sites", |b| {
b.iter(|| test_multi_sites(8, actions.clone().into()))
});
let actions = gen.arbitrary::<[Action; 4000]>().unwrap();
c.sample_size(10);
c.bench_function("random text edit 8 sites long", |b| {
b.iter(|| test_multi_sites(8, actions.clone().into()))
});
}
pub fn b4(c: &mut Criterion) {
@ -35,7 +45,9 @@ mod run {
d.read_to_string(&mut s).unwrap();
let json: Value = serde_json::from_str(&s).unwrap();
let txns = json.as_object().unwrap().get("txns");
c.bench_function("B4", |b| {
println!("{}", txns.unwrap().as_array().unwrap().len());
let mut b = c.benchmark_group("directapply");
b.bench_function("B4", |b| {
b.iter(|| {
let mut loro = LoroCore::default();
let mut text = loro.get_or_create_root_text("text").unwrap();
@ -57,12 +69,85 @@ mod run {
}
})
});
b.sample_size(10);
b.bench_function("B4DirectSync", |b| {
b.iter(|| {
let mut loro = LoroCore::default();
let mut loro_b = LoroCore::default();
for txn in txns.unwrap().as_array().unwrap() {
let mut text = loro.get_or_create_root_text("text").unwrap();
let patches = txn
.as_object()
.unwrap()
.get("patches")
.unwrap()
.as_array()
.unwrap();
for patch in patches {
let pos = patch[0].as_u64().unwrap() as usize;
let del_here = patch[1].as_u64().unwrap() as usize;
let ins_content = patch[2].as_str().unwrap();
text.delete(pos, del_here);
text.insert(pos, ins_content);
}
drop(text);
loro_b.import(loro.export(loro_b.vv()));
}
})
});
drop(b);
let mut b = c.benchmark_group("sync");
b.bench_function("B4Parallel", |b| {
b.iter(|| {
let mut loro = LoroCore::default();
let mut loro_b = LoroCore::default();
let mut i = 0;
for txn in txns.unwrap().as_array().unwrap() {
i += 1;
if i > 1000 {
break;
}
let mut text = loro.get_or_create_root_text("text").unwrap();
let patches = txn
.as_object()
.unwrap()
.get("patches")
.unwrap()
.as_array()
.unwrap();
for patch in patches {
let pos = patch[0].as_u64().unwrap() as usize;
let del_here = patch[1].as_u64().unwrap() as usize;
let ins_content = patch[2].as_str().unwrap();
text.delete(pos, del_here);
text.insert(pos, ins_content);
}
drop(text);
let mut text = loro_b.get_or_create_root_text("text").unwrap();
for patch in patches {
let pos = patch[0].as_u64().unwrap() as usize;
let del_here = patch[1].as_u64().unwrap() as usize;
let ins_content = patch[2].as_str().unwrap();
text.delete(pos, del_here);
text.insert(pos, ins_content);
}
drop(text);
loro_b.import(loro.export(loro_b.vv()));
loro.import(loro_b.export(loro.vv()));
}
})
});
}
}
pub fn dumb(_c: &mut Criterion) {}
#[cfg(feature = "fuzzing")]
criterion_group!(benches, run::criterion_benchmark, run::b4);
criterion_group!(benches, run::two_client_edits, run::b4);
#[cfg(not(feature = "fuzzing"))]
criterion_group!(benches, dumb);
criterion_main!(benches);

View file

@ -0,0 +1,61 @@
#[cfg(not(feature = "fuzzing"))]
fn main() {}
#[cfg(feature = "fuzzing")]
fn main() {
const RAW_DATA: &[u8; 901823] = include_bytes!("../benches/automerge-paper.json.gz");
use std::{io::Read, time::Instant};
use flate2::read::GzDecoder;
use loro_core::LoroCore;
use serde_json::Value;
let mut d = GzDecoder::new(&RAW_DATA[..]);
let mut s = String::new();
d.read_to_string(&mut s).unwrap();
let json: Value = serde_json::from_str(&s).unwrap();
let txns = json.as_object().unwrap().get("txns");
println!("Txn: {}", txns.unwrap().as_array().unwrap().len());
let mut loro = LoroCore::default();
let mut loro_b = LoroCore::default();
let mut loro_c = LoroCore::default();
let start = Instant::now();
for (i, txn) in txns.unwrap().as_array().unwrap().into_iter().enumerate() {
let mut text = loro.get_or_create_root_text("text").unwrap();
let patches = txn
.as_object()
.unwrap()
.get("patches")
.unwrap()
.as_array()
.unwrap();
for patch in patches {
let pos = patch[0].as_u64().unwrap() as usize;
let del_here = patch[1].as_u64().unwrap() as usize;
let ins_content = patch[2].as_str().unwrap();
text.delete(pos, del_here);
text.insert(pos, ins_content);
}
drop(text);
let mut text = loro_b.get_or_create_root_text("text").unwrap();
for patch in patches {
let pos = patch[0].as_u64().unwrap() as usize;
let del_here = patch[1].as_u64().unwrap() as usize;
let ins_content = patch[2].as_str().unwrap();
text.delete(pos, del_here);
text.insert(pos, ins_content);
}
drop(text);
if i % 10 == 0 {
loro.import(loro_b.export(loro.vv()));
loro_b.import(loro.export(loro_b.vv()));
}
}
loro_b.debug_inspect();
loro.debug_inspect();
println!("Elapsed {}ms", start.elapsed().as_millis());
loro_c.import(loro.export(loro_c.vv()));
println!("Elapsed {}ms", start.elapsed().as_millis());
}

View file

@ -25,7 +25,7 @@ quick-fuzz:
cargo fuzz run text -- -max_total_time=10 -max_len=1000
flame:
cargo flamegraph --example test --features=fuzzing --root
cargo flamegraph --example text_sync --features=fuzzing --root
bench *FLAGS:
cargo bench --features fuzzing {{FLAGS}}

View file

@ -48,7 +48,6 @@ impl<O> Change<O> {
id: ID,
lamport: Lamport,
timestamp: Timestamp,
_freezed: bool,
) -> Self {
Change {
ops,
@ -83,13 +82,11 @@ impl<O: Mergable + HasLength + HasIndex> HasLength for Change<O> {
pub struct ChangeMergeCfg {
pub max_change_length: usize,
pub max_change_interval: usize,
pub from_this_client: bool,
}
impl ChangeMergeCfg {
pub fn new(from_this: bool) -> Self {
pub fn new() -> Self {
ChangeMergeCfg {
from_this_client: from_this,
max_change_length: 1024,
max_change_interval: 60,
}
@ -101,7 +98,6 @@ impl Default for ChangeMergeCfg {
Self {
max_change_length: 1024,
max_change_interval: 60,
from_this_client: false,
}
}
}
@ -112,10 +108,6 @@ impl Mergable<ChangeMergeCfg> for Change {
}
fn is_mergable(&self, other: &Self, cfg: &ChangeMergeCfg) -> bool {
if !cfg.from_this_client {
return false;
}
if other.deps.is_empty() || !(other.deps.len() == 1 && self.id_last() == other.deps[0]) {
return false;
}

View file

@ -19,8 +19,8 @@ pub(crate) enum ListOp {
/// pos: 5, len: -3 eq a range of (2, 5]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct DeleteSpan {
pos: isize,
len: isize,
pub pos: isize,
pub len: isize,
}
impl ListOp {

View file

@ -19,7 +19,7 @@ fn basic() {
let mut container = loro.get_or_create_root_map("map").unwrap();
container.insert("haha".into(), InsertValue::Int32(1));
let ans = fx_map!(
"haha".into() => LoroValue::Integer(1)
"haha".into() => LoroValue::I32(1)
);
assert_eq!(container.get_value(), LoroValue::Map(Box::new(ans)));

View file

@ -7,7 +7,10 @@ use rle::{
use smallvec::{smallvec, SmallVec};
use crate::{
container::{list::list_op::ListOp, Container, ContainerID, ContainerType},
container::{
list::list_op::{DeleteSpan, ListOp},
Container, ContainerID, ContainerType,
},
dag::DagUtils,
debug_log,
id::{Counter, ID},
@ -71,7 +74,7 @@ impl TextContainer {
id,
OpContent::Normal {
content: InsertContent::List(ListOp::Insert {
slice: ListSlice::Slice(slice),
slice: slice.into(),
pos,
}),
},
@ -143,13 +146,83 @@ impl Container for TextContainer {
// TODO: move main logic to tracker module
fn apply(&mut self, id_span: IdSpan, store: &LogStore) {
debug_log!("APPLY ENTRY client={}", store.this_client_id);
let self_idx = store.get_container_idx(&self.id).unwrap();
let new_op_id = id_span.id_last();
// TODO: may reduce following two into one op
let common_ancestors = store.find_common_ancestor(&[new_op_id], &self.head);
if common_ancestors == self.head {
let latest_head = smallvec![new_op_id];
let path = store.find_path(&self.head, &latest_head);
if path.right.len() == 1 {
// linear updates, we can apply them directly
let start = self.vv.get(&new_op_id.client_id).copied().unwrap_or(0);
for op in store.iter_ops_at_id_span(
IdSpan::new(new_op_id.client_id, start, new_op_id.counter + 1),
self.id.clone(),
) {
let op = op.get_sliced();
match &op.content {
OpContent::Normal {
content: InsertContent::List(op),
} => match op {
ListOp::Insert { slice, pos } => {
self.state.insert(*pos, slice.as_slice().unwrap().clone().0)
}
ListOp::Delete(span) => self.state.delete_range(
Some(span.start() as usize),
Some(span.end() as usize),
),
},
_ => unreachable!(),
}
}
self.head = latest_head;
self.vv.set_last(new_op_id);
return;
} else {
let path: Vec<_> = store.iter_partial(&self.head, path.right).collect();
if path
.iter()
.all(|x| x.forward.is_empty() && x.retreat.is_empty())
{
// if we don't need to retreat or forward, we can update the state directly
for iter in path {
let change = iter
.data
.slice(iter.slice.start as usize, iter.slice.end as usize);
for op in change.ops.iter() {
if op.container == self_idx {
match &op.content {
OpContent::Normal {
content: InsertContent::List(op),
} => match op {
ListOp::Insert { slice, pos } => self
.state
.insert(*pos, slice.as_slice().unwrap().clone().0),
ListOp::Delete(span) => self.state.delete_range(
Some(span.start() as usize),
Some(span.end() as usize),
),
},
_ => unreachable!(),
}
}
}
}
self.head = latest_head;
self.vv.set_last(new_op_id);
return;
}
}
}
let path_to_head = store.find_path(&common_ancestors, &self.head);
let mut common_ancestors_vv = self.vv.clone();
common_ancestors_vv.retreat(&path_to_head.right);
let mut latest_head: SmallVec<[ID; 2]> = self.head.clone();
latest_head.retain(|x| !common_ancestors_vv.includes_id(*x));
latest_head.push(new_op_id);
// println!("{}", store.mermaid());
debug_log!(
@ -158,17 +231,13 @@ impl Container for TextContainer {
new_op_id,
&self.head
);
// TODO: reuse tracker
// let head = if true {
let head = if common_ancestors.is_empty()
let head = if (common_ancestors.is_empty() && !self.tracker.start_vv().is_empty())
|| !common_ancestors.iter().all(|x| self.tracker.contains(*x))
{
debug_log!("NewTracker");
// FIXME use common ancestors
self.tracker = Tracker::new(common_ancestors_vv, Counter::MAX / 2);
common_ancestors
// self.tracker = Tracker::new(Default::default(), 0);
// smallvec![]
} else {
debug_log!("OldTracker");
self.tracker.checkout_to_latest();
@ -176,10 +245,8 @@ impl Container for TextContainer {
};
// stage 1
// TODO: need a better mechanism to track the head (KEEP IT IN TRACKER?)
let path = store.find_path(&head, &latest_head);
debug_log!("path={:?}", &path.right);
let self_idx = store.get_container_idx(&self.id).unwrap();
for iter in store.iter_partial(&head, path.right) {
// TODO: avoid this clone
let change = iter
@ -230,7 +297,8 @@ impl Container for TextContainer {
match effect {
Effect::Del { pos, len } => self.state.delete_range(Some(pos), Some(pos + len)),
Effect::Ins { pos, content } => {
self.state.insert(pos, content.as_slice().unwrap().clone());
self.state
.insert(pos, content.as_slice().unwrap().clone().0);
}
}
debug_log!("AFTER EFFECT");
@ -240,7 +308,7 @@ impl Container for TextContainer {
self.get_value().as_string().unwrap()
);
self.head.push(new_op_id);
self.head = latest_head;
self.vv.set_last(new_op_id);
debug_log!("--------------------------------");
}
@ -268,7 +336,7 @@ impl Container for TextContainer {
.and_then(|x| x.as_insert_mut())
{
if let Some(change) = if let ListSlice::Slice(ranges) = slice {
Some(self.raw_str.get_str(ranges))
Some(self.raw_str.get_str(&ranges.0))
} else {
None
} {
@ -292,7 +360,7 @@ impl Container for TextContainer {
ListSlice::Slice(_) => unreachable!(),
ListSlice::Unknown(_) => unreachable!(),
} {
*slice = ListSlice::Slice(slice_range);
*slice = slice_range.into();
}
}
}

View file

@ -8,24 +8,94 @@ use crate::smstring::SmString;
#[derive(PartialEq, Eq, Debug, EnumAsInner, Clone)]
pub enum ListSlice {
RawStr(SmString),
Slice(Range<u32>),
Slice(SliceRange),
Unknown(usize),
}
#[repr(transparent)]
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct SliceRange(pub Range<u32>);
const UNKNOWN_START: u32 = u32::MAX / 2;
impl SliceRange {
#[inline(always)]
pub fn is_unknown(&self) -> bool {
self.0.start == UNKNOWN_START
}
pub fn new_unknown(size: u32) -> Self {
Self(UNKNOWN_START..UNKNOWN_START + size)
}
}
impl Default for ListSlice {
fn default() -> Self {
ListSlice::Unknown(0)
}
}
impl ListSlice {
#[inline(always)]
pub fn from_range(range: Range<u32>) -> ListSlice {
Self::Slice(range)
impl From<Range<u32>> for ListSlice {
fn from(a: Range<u32>) -> Self {
ListSlice::Slice(a.into())
}
}
impl From<SliceRange> for ListSlice {
fn from(a: SliceRange) -> Self {
ListSlice::Slice(a)
}
}
impl From<Range<u32>> for SliceRange {
fn from(a: Range<u32>) -> Self {
SliceRange(a)
}
}
impl HasLength for SliceRange {
fn content_len(&self) -> usize {
self.0.len()
}
}
impl Sliceable for SliceRange {
fn slice(&self, from: usize, to: usize) -> Self {
if self.is_unknown() {
Self::new_unknown((to - from) as u32)
} else {
SliceRange(self.0.start + from as u32..self.0.start + to as u32)
}
}
}
impl Mergable for SliceRange {
fn merge(&mut self, other: &Self, _: &()) {
if self.is_unknown() {
self.0.end += other.0.end - other.0.start;
} else {
self.0.end = other.0.end;
}
}
pub fn from_raw(str: SmString) -> ListSlice {
Self::RawStr(str)
fn is_mergable(&self, other: &Self, _conf: &()) -> bool
where
Self: Sized,
{
(self.is_unknown() && other.is_unknown()) || self.0.end == other.0.start
}
}
impl ListSlice {
#[inline(always)]
pub fn UnknownRange(len: usize) -> SliceRange {
let start = UNKNOWN_START;
let end = len as u32 + UNKNOWN_START;
SliceRange(start..end)
}
#[inline(always)]
pub fn is_unknown(range: &SliceRange) -> bool {
range.is_unknown()
}
}

View file

@ -72,8 +72,7 @@ impl Tracker {
origin_right: None,
id: ID::unknown(0),
status: Status::new(),
len: init_len as usize,
slice: ListSlice::Unknown(init_len as usize),
slice: ListSlice::UnknownRange(init_len as usize),
},
&mut make_notify(&mut id_to_cursor),
);
@ -114,7 +113,7 @@ impl Tracker {
let id_span = IdSpan::new(
yspan.id.client_id,
yspan.id.counter,
yspan.len as Counter + yspan.id.counter,
yspan.atom_len() as Counter + yspan.id.counter,
);
let mut len = 0;
for marker in self
@ -126,7 +125,7 @@ impl Tracker {
}
}
assert_eq!(len, yspan.len);
assert_eq!(len, yspan.atom_len());
}
self.content.debug_check();
@ -147,6 +146,10 @@ impl Tracker {
}
pub fn forward(&mut self, spans: &IdSpanVector) {
if spans.is_empty() {
return;
}
let mut cursors = Vec::with_capacity(spans.len());
let mut args = Vec::with_capacity(spans.len());
for span in spans.iter() {
@ -187,6 +190,10 @@ impl Tracker {
}
pub fn retreat(&mut self, spans: &IdSpanVector) {
if spans.is_empty() {
return;
}
let mut cursors = Vec::with_capacity(spans.len());
let mut args = Vec::with_capacity(spans.len());
for span in spans.iter() {
@ -247,7 +254,7 @@ impl Tracker {
id,
*pos,
slice.content_len(),
slice.clone(),
slice.as_slice().unwrap().clone(),
);
debug_log!("INSERT YSPAN={}", format!("{:#?}", &yspan).red());
// SAFETY: we know this is safe because in [YataImpl::insert_after] there is no access to shared elements
@ -270,8 +277,10 @@ impl Tracker {
}
}
self.id_to_cursor
.set_small_range((id).into(), cursor_map::Marker::Delete(spans));
self.id_to_cursor.set_small_range(
(id).into(),
cursor_map::Marker::Delete(Box::new(spans)),
);
}
}
}

View file

@ -1,11 +1,11 @@
use std::ops::{Deref, DerefMut};
use std::ops::{Deref, DerefMut, Range};
use rle::{
rle_tree::{BumpMode, Position, SafeCursor, SafeCursorMut},
rle_tree::{Position, SafeCursor, SafeCursorMut},
HasLength, RleTree, RleVecWithLen,
};
use crate::{container::text::text_content::ListSlice, id::ID, span::IdSpan};
use crate::{container::text::text_content::SliceRange, id::ID, span::IdSpan};
use super::y_span::{StatusChange, YSpan, YSpanTreeTrait};
@ -23,7 +23,7 @@ impl ContentMap {
id: ID,
pos: usize,
len: usize,
slice: ListSlice,
slice: SliceRange,
) -> YSpan {
debug_assert!(slice.content_len() == len);
let (left, right) = self.get_sibling_at(pos);
@ -31,7 +31,6 @@ impl ContentMap {
origin_left: left,
origin_right: right,
id,
len,
status: Default::default(),
slice,
}

View file

@ -8,7 +8,7 @@ use enum_as_inner::EnumAsInner;
use rle::{
range_map::RangeMap,
rle_tree::{node::LeafNode, BumpMode, Position, SafeCursor, SafeCursorMut, UnsafeCursor},
rle_tree::{node::LeafNode, Position, SafeCursor, SafeCursorMut, UnsafeCursor},
HasLength, Mergable, RleVecWithLen, Sliceable, ZeroElement,
};
@ -27,7 +27,7 @@ pub(super) enum Marker {
ptr: NonNull<LeafNode<'static, YSpan, YSpanTreeTrait>>,
len: usize,
},
Delete(RleVecWithLen<[IdSpan; 2]>),
Delete(Box<RleVecWithLen<[IdSpan; 2]>>),
// FUTURE: REDO, UNDO
}
@ -91,10 +91,10 @@ impl Marker {
if child.overlap(id_span) {
let start_counter = child.id.counter;
let offset = std::cmp::max(id_span.counter.min() - start_counter, 0);
debug_assert!((offset as usize) < child.len);
debug_assert!((offset as usize) < child.atom_len());
let max_offset = std::cmp::min(
id_span.counter.max() - start_counter,
(child.len - 1) as i32,
(child.atom_len() - 1) as i32,
);
let len = max_offset - offset + 1;
// SAFETY: we just checked it is valid
@ -103,7 +103,7 @@ impl Marker {
*ptr,
i,
offset as usize,
Position::from_offset(offset as isize, child.len),
Position::from_offset(offset as isize, child.atom_len()),
len as usize,
))
})
@ -151,7 +151,7 @@ impl Sliceable for Marker {
ptr: *ptr,
len: to - from,
},
Marker::Delete(x) => Marker::Delete(x.slice(from, to)),
Marker::Delete(x) => Marker::Delete(Box::new(x.slice(from, to))),
}
}
}
@ -194,10 +194,10 @@ impl Mergable for Marker {
}
#[derive(Debug, Default)]
pub(super) struct CursorMap(RangeMap<u128, Marker, BumpMode>);
pub(super) struct CursorMap(RangeMap<u128, Marker>);
impl Deref for CursorMap {
type Target = RangeMap<u128, Marker, BumpMode>;
type Target = RangeMap<u128, Marker>;
fn deref(&self) -> &Self::Target {
&self.0

View file

@ -116,7 +116,7 @@ impl<'a> Iterator for EffectIter<'a> {
debug_assert_eq!(length_diff, len as i32);
return Some(Effect::Ins {
pos: index,
content,
content: content.into(),
});
}
}

View file

@ -1,7 +1,7 @@
use std::fmt::Display;
use std::{fmt::Display, ops::Range};
use crate::{
container::text::text_content::ListSlice,
container::text::text_content::{ListSlice, SliceRange},
id::Counter,
span::{HasCounter, HasCounterSpan, IdSpan},
ContentType, InsertContentTrait, ID,
@ -15,8 +15,8 @@ use rle::{
pub struct Status {
/// is this span from a future operation
pub future: bool,
pub delete_times: usize,
pub undo_times: usize,
pub delete_times: u16,
pub undo_times: u16,
}
impl Display for Status {
@ -65,15 +65,25 @@ impl Status {
}
}
/// 80 bytes
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct YSpan {
// 16 bytes
pub id: ID,
pub len: usize,
// 8 bytes
pub status: Status,
// 24 bytes
pub origin_left: Option<ID>,
// 24 bytes
pub origin_right: Option<ID>,
// 8 bytes
// TODO: remove this field when the system is stable
pub slice: ListSlice,
pub slice: SliceRange,
}
#[test]
fn y_span_size() {
println!("{}", std::mem::size_of::<YSpan>());
}
#[derive(Clone, Copy, Debug)]
@ -92,7 +102,7 @@ impl YSpan {
/// this is the last id of the span, which is **included** by self
#[inline]
pub fn last_id(&self) -> ID {
self.id.inc(self.len as i32 - 1)
self.id.inc(self.atom_len() as i32 - 1)
}
#[inline]
@ -104,7 +114,7 @@ impl YSpan {
pub fn contain_id(&self, id: ID) -> bool {
self.id.client_id == id.client_id
&& self.id.counter <= id.counter
&& id.counter < self.id.counter + self.len as i32
&& id.counter < self.id.counter + self.atom_len() as i32
}
#[inline]
@ -113,7 +123,8 @@ impl YSpan {
return false;
}
self.id.counter < id.ctr_end() && self.id.counter + (self.len as Counter) > id.ctr_start()
self.id.counter < id.ctr_end()
&& self.id.counter + (self.atom_len() as Counter) > id.ctr_start()
}
}
@ -121,15 +132,14 @@ impl Mergable for YSpan {
fn is_mergable(&self, other: &Self, _: &()) -> bool {
other.id.client_id == self.id.client_id
&& self.status == other.status
&& self.id.counter + self.len as Counter == other.id.counter
&& self.id.counter + self.atom_len() as Counter == other.id.counter
&& self.origin_right == other.origin_right
&& Some(self.id.inc(self.len as Counter - 1)) == other.origin_left
&& Some(self.id.inc(self.atom_len() as Counter - 1)) == other.origin_left
&& self.slice.is_mergable(&other.slice, &())
}
fn merge(&mut self, other: &Self, _: &()) {
self.origin_right = other.origin_right;
self.len += other.len;
self.slice.merge(&other.slice, &())
}
}
@ -152,7 +162,6 @@ impl Sliceable for YSpan {
origin_left,
origin_right,
id: self.id.inc(from as i32),
len: to - from,
status: self.status.clone(),
slice: self.slice.slice(from, to),
}
@ -166,24 +175,25 @@ impl InsertContentTrait for YSpan {
}
impl HasLength for YSpan {
#[inline]
#[inline(always)]
fn content_len(&self) -> usize {
if self.status.is_activated() {
self.len
self.slice.atom_len()
} else {
0
}
}
#[inline]
#[inline(always)]
fn atom_len(&self) -> usize {
self.len
self.slice.atom_len()
}
}
#[cfg(any(test, features = "fuzzing"))]
pub mod test {
use crate::{
container::text::text_content::ListSlice,
op::{InsertContent, OpContent},
ContentType, Op, ID,
};
@ -201,9 +211,8 @@ pub mod test {
origin_left: Some(ID::new(0, 0)),
origin_right: None,
id: ID::new(0, 1),
len: 1,
status: Default::default(),
slice: Default::default(),
slice: ListSlice::UnknownRange(1),
})),
},
5,
@ -215,9 +224,8 @@ pub mod test {
origin_left: Some(ID::new(0, 1)),
origin_right: None,
id: ID::new(0, 2),
len: 1,
status: Default::default(),
slice: Default::default(),
slice: ListSlice::UnknownRange(1),
})),
},
5,
@ -226,6 +234,7 @@ pub mod test {
let merged = vec.get_merged(0).unwrap();
assert_eq!(merged.content.as_normal().unwrap().id(), ContentType::Text);
let text_content = merged.content.as_normal().unwrap().as_dyn().unwrap();
dbg!(&merged);
assert_eq!(text_content.content_len(), 2);
}
@ -239,9 +248,8 @@ pub mod test {
origin_left: Some(ID::new(0, 0)),
origin_right: None,
id: ID::new(0, 1),
len: 4,
status: Default::default(),
slice: Default::default(),
slice: ListSlice::UnknownRange(4),
})),
},
5,
@ -253,9 +261,8 @@ pub mod test {
origin_left: Some(ID::new(0, 0)),
origin_right: Some(ID::new(0, 1)),
id: ID::new(0, 5),
len: 4,
status: Default::default(),
slice: Default::default(),
slice: ListSlice::UnknownRange(4),
})),
},
5,

View file

@ -5,6 +5,7 @@ use crdt_list::{
use rle::{
range_map::{RangeMap, WithStartEnd},
rle_tree::{iter::IterMut, SafeCursorMut},
HasLength,
};
use crate::id::{Counter, ID};
@ -27,7 +28,7 @@ impl OpSet<YSpan, ID> for OpSpanSet {
value.id.into(),
WithStartEnd {
start: value.id.into(),
end: value.id.inc(value.len as i32).into(),
end: value.id.inc(value.atom_len() as i32).into(),
value: true,
},
)
@ -92,7 +93,7 @@ impl ListCrdt for YataImpl {
}
fn contains(op: &Self::OpUnit, id: Self::OpId) -> bool {
op.id.contains(op.len as Counter, id)
op.id.contains(op.atom_len() as Counter, id)
}
}
@ -134,7 +135,10 @@ mod test {
use crdt_list::crdt::OpSet;
use crate::{
container::text::tracker::y_span::{Status, YSpan},
container::text::{
text_content::ListSlice,
tracker::y_span::{Status, YSpan},
},
id::ID,
};
@ -145,11 +149,10 @@ mod test {
let mut set = OpSpanSet::default();
set.insert(&YSpan {
id: ID::new(1, 10),
len: 10,
origin_left: Some(ID::new(0, 1)),
origin_right: Some(ID::new(0, 2)),
status: Status::new(),
slice: Default::default(),
slice: ListSlice::UnknownRange(10),
});
assert!(set.contain(ID::new(1, 10)));
assert!(set.contain(ID::new(1, 11)));
@ -173,7 +176,7 @@ pub mod fuzz {
fn fields(&self) -> Vec<std::borrow::Cow<'_, str>> {
vec![
self.id.to_string().into(),
self.len.to_string().into(),
self.atom_len().to_string().into(),
self.status.future.to_string().into(),
self.status.delete_times.to_string().into(),
self.status.undo_times.to_string().into(),
@ -202,7 +205,7 @@ pub mod fuzz {
}
use crdt_list::test::{Action, TestFramework};
use rle::{RleVecWithIndex, RleVecWithLen};
use rle::{HasLength, RleVecWithIndex, RleVecWithLen};
use tabled::TableIteratorExt;
use crate::{
@ -221,7 +224,7 @@ pub mod fuzz {
impl TestFramework for YataImpl {
fn integrate(container: &mut Self::Container, op: Self::OpUnit) {
container.head_vv.set_end(op.id.inc(op.len as i32));
container.head_vv.set_end(op.id.inc(op.atom_len() as i32));
// SAFETY: we know this is safe because in [YataImpl::insert_after] there is no access to shared elements
unsafe { crdt_list::yata::integrate::<Self>(container, op) };
}
@ -299,7 +302,7 @@ pub mod fuzz {
),
pos % container.content.len(),
len,
ListSlice::Unknown(len),
ListSlice::UnknownRange(len),
);
ans
}
@ -329,6 +332,191 @@ pub mod fuzz {
}
}
#[test]
fn issue_global_tree_trait() {
crdt_list::test::test_with_actions::<YataImpl>(
5,
100,
vec![
Delete {
client_id: 252,
pos: 58,
len: 179,
},
Delete {
client_id: 227,
pos: 227,
len: 126,
},
Delete {
client_id: 227,
pos: 227,
len: 227,
},
Delete {
client_id: 177,
pos: 177,
len: 202,
},
Delete {
client_id: 202,
pos: 177,
len: 177,
},
Delete {
client_id: 202,
pos: 202,
len: 202,
},
Delete {
client_id: 176,
pos: 177,
len: 177,
},
Delete {
client_id: 177,
pos: 177,
len: 162,
},
NewOp {
client_id: 217,
pos: 0,
},
Sync { from: 126, to: 126 },
Sync { from: 177, to: 6 },
NewOp {
client_id: 96,
pos: 64,
},
NewOp {
client_id: 217,
pos: 227,
},
Delete {
client_id: 227,
pos: 227,
len: 227,
},
Delete {
client_id: 227,
pos: 227,
len: 227,
},
Delete {
client_id: 176,
pos: 177,
len: 177,
},
Delete {
client_id: 202,
pos: 202,
len: 202,
},
Delete {
client_id: 202,
pos: 202,
len: 202,
},
Delete {
client_id: 241,
pos: 177,
len: 176,
},
Delete {
client_id: 177,
pos: 101,
len: 101,
},
NewOp {
client_id: 101,
pos: 101,
},
Delete {
client_id: 153,
pos: 255,
len: 126,
},
NewOp {
client_id: 232,
pos: 156,
},
Delete {
client_id: 177,
pos: 176,
len: 177,
},
Delete {
client_id: 241,
pos: 177,
len: 202,
},
Delete {
client_id: 202,
pos: 177,
len: 177,
},
Delete {
client_id: 202,
pos: 202,
len: 202,
},
Delete {
client_id: 176,
pos: 177,
len: 177,
},
Delete {
client_id: 177,
pos: 176,
len: 177,
},
NewOp {
client_id: 153,
pos: 153,
},
Delete {
client_id: 0,
pos: 0,
len: 126,
},
Sync { from: 0, to: 162 },
NewOp {
client_id: 96,
pos: 96,
},
Delete {
client_id: 232,
pos: 126,
len: 126,
},
Delete {
client_id: 126,
pos: 126,
len: 126,
},
Delete {
client_id: 227,
pos: 227,
len: 227,
},
Delete {
client_id: 227,
pos: 227,
len: 227,
},
NewOp {
client_id: 158,
pos: 107,
},
Delete {
client_id: 126,
pos: 126,
len: 43,
},
],
)
}
use Action::*;
#[test]
fn issue_set_range() {
@ -383,6 +571,56 @@ pub mod fuzz {
],
)
}
#[test]
fn issue_range_map() {
crdt_list::test::test_with_actions::<YataImpl>(
5,
100,
vec![
NewOp {
client_id: 124,
pos: 124,
},
Delete {
client_id: 8,
pos: 47,
len: 68,
},
Delete {
client_id: 255,
pos: 255,
len: 255,
},
Delete {
client_id: 184,
pos: 184,
len: 48,
},
Sync { from: 158, to: 0 },
Delete {
client_id: 182,
pos: 182,
len: 182,
},
NewOp {
client_id: 255,
pos: 252,
},
Sync { from: 134, to: 2 },
Delete {
client_id: 246,
pos: 246,
len: 246,
},
Delete {
client_id: 246,
pos: 246,
len: 246,
},
],
)
}
#[test]
fn issue_1() {
crdt_list::test::test_with_actions::<YataImpl>(

View file

@ -16,7 +16,7 @@ use std::{
use colored::Colorize;
use fxhash::{FxHashMap, FxHashSet};
use rle::{HasLength, Sliceable};
use smallvec::SmallVec;
use smallvec::{smallvec, SmallVec};
mod iter;
mod mermaid;
#[cfg(test)]
@ -26,7 +26,7 @@ use crate::{
change::Lamport,
debug_log,
id::{ClientID, Counter, ID},
span::{HasId, HasIdSpan, HasLamport, HasLamportSpan, IdSpan},
span::{CounterSpan, HasId, HasIdSpan, HasLamport, HasLamportSpan, IdSpan},
version::{IdSpanVector, VersionVector, VersionVectorDiff},
};
@ -111,6 +111,44 @@ impl<T: Dag + ?Sized> DagUtils for T {
if from == to {
return ans;
}
if from.len() == 1 && to.len() == 1 {
let from = from[0];
let to = to[0];
if from.client_id == to.client_id {
let from_span = self.get(from).unwrap();
let to_span = self.get(to).unwrap();
if std::ptr::eq(from_span, to_span) {
if from.counter < to.counter {
ans.right.insert(
from.client_id,
CounterSpan::new(from.counter + 1, to.counter + 1),
);
} else {
ans.left.insert(
from.client_id,
CounterSpan::new(to.counter + 1, from.counter + 1),
);
}
return ans;
}
if from_span.deps().len() == 1 && to_span.contains_id(from_span.deps()[0]) {
ans.left.insert(
from.client_id,
CounterSpan::new(to.counter + 1, from.counter + 1),
);
return ans;
}
if to_span.deps().len() == 1 && from_span.contains_id(to_span.deps()[0]) {
ans.right.insert(
from.client_id,
CounterSpan::new(from.counter + 1, to.counter + 1),
);
return ans;
}
}
}
_find_common_ancestor(
&|v| self.get(v),
@ -504,6 +542,30 @@ where
D: DagNode + 'a,
F: Fn(ID) -> Option<&'a D>,
{
if left.len() == 1 && right.len() == 1 {
let left = left[0];
let right = right[0];
if left.client_id == right.client_id {
let left_span = get(left).unwrap();
let right_span = get(right).unwrap();
if std::ptr::eq(left_span, right_span) {
if left.counter < right.counter {
return smallvec![left];
} else {
return smallvec![right];
}
}
if left_span.deps().len() == 1 && right_span.contains_id(left_span.deps()[0]) {
return smallvec![right];
}
if right_span.deps().len() == 1 && left_span.contains_id(right_span.deps()[0]) {
return smallvec![left];
}
}
}
let mut ans: SmallVec<[ID; 2]> = Default::default();
let mut queue: BinaryHeap<(SmallVec<[OrdIdSpan; 1]>, NodeType)> = BinaryHeap::new();

View file

@ -315,6 +315,147 @@ mod test {
use super::Action::*;
use super::*;
#[test]
fn test_16() {
// retreat failed
test_multi_sites(
8,
vec![
Ins {
content: "\0\0\0\0".into(),
pos: 46183951198975,
site: 0,
},
Ins {
content: "hhhh\0\u{15}".into(),
pos: 18446463180827232135,
site: 255,
},
Ins {
content: "\u{1}\0\0\0".into(),
pos: 17872543955649300641,
site: 247,
},
SyncAll,
Ins {
content: "I".into(),
pos: 17144359356472099242,
site: 237,
},
Ins {
content: "\u{1}\0\0\u{14}\0".into(),
pos: 17149707381026848749,
site: 64,
},
Ins {
content: "\0\0".into(),
pos: 576460752320727201,
site: 8,
},
Ins {
content: "\u{8}\u{8}\u{8}".into(),
pos: 74451787075125633,
site: 7,
},
Ins {
content: "\0DD*\u{1}\0\n\0".into(),
pos: 1100718403053,
site: 161,
},
SyncAll,
SyncAll,
Ins {
content: "".into(),
pos: 18444210798919163626,
site: 255,
},
Del {
pos: 7523377975159973992,
len: 7523377975159973992,
site: 104,
},
Del {
pos: 90728552,
len: 7523377975153131520,
site: 104,
},
Del {
pos: 6842472,
len: 8259072,
site: 48,
},
Ins {
content: "0002".into(),
pos: 60868003017,
site: 0,
},
Ins {
content: "\0\0\0\0hhhh\0\u{15}".into(),
pos: 201792722274183,
site: 0,
},
Ins {
content: "\t\0,`".into(),
pos: 2882585236494125450,
site: 136,
},
Ins {
content: "\u{4}\u{4}\u{4}".into(),
pos: 6929914515027398276,
site: 138,
},
Sync { from: 137, to: 4 },
Ins {
content: "\u{4}".into(),
pos: 2879109919149700,
site: 4,
},
Del {
pos: 8680820740569200760,
len: 18446744071435745400,
site: 255,
},
SyncAll,
SyncAll,
Del {
pos: 144116287587483658,
len: 8646911284728217599,
site: 120,
},
Ins {
content: "\0\0SSSSSS\0".into(),
pos: 4755847789890108906,
site: 38,
},
Ins {
content: "\0\u{10}I\u{4}\u{4}\u{4}\u{4}\u{4}\0\0\u{1}\0".into(),
pos: 10768900350821001,
site: 0,
},
Ins {
content: "I\u{4}\u{4}".into(),
pos: 73387494324306057,
site: 0,
},
Ins {
content: "\u{4}\u{4}\u{4}\u{4}\u{4}".into(),
pos: 47890328526783742,
site: 0,
},
Ins {
content: "\0\0\0\0\0\0\0\0\0\0\0\0\0\0".into(),
pos: 792926255917040777,
site: 0,
},
Ins {
content: "\u{4}\0\0\0\0\u{4}".into(),
pos: 3497652043353424901,
site: 48,
},
],
)
}
#[test]
fn test_15() {
// retreat failed

View file

@ -277,7 +277,7 @@ impl LogStore {
self.vv.set_end(change.id_end());
self.changes
.entry(self.this_client_id)
.or_insert_with(|| RleVecWithIndex::new_with_conf(ChangeMergeCfg::new(true)))
.or_insert_with(|| RleVecWithIndex::new_with_conf(ChangeMergeCfg::new()))
.push(change);
debug_log!("CHANGES---------------- site {}", self.this_client_id);

View file

@ -5,10 +5,12 @@ use crate::change::Lamport;
use crate::id::ClientID;
use crate::id::ContainerIdx;
use crate::op::RichOp;
use crate::span::HasCounter;
use crate::span::HasId;
use crate::span::IdSpan;
use fxhash::FxHashMap;
use rle::HasLength;
use crate::change::ChangeMergeCfg;
@ -68,7 +70,11 @@ impl<'a> OpSpanIter<'a> {
container,
changes,
change_index,
op_index: 0,
op_index: rle_changes[change_index]
.ops
.get(target_span.counter.start)
.unwrap()
.merged_index,
}
}
}
@ -85,13 +91,22 @@ impl<'a> Iterator for OpSpanIter<'a> {
let change = &self.changes[self.change_index];
let ops = change.ops.vec();
if let Some(op) = ops.get(self.op_index) {
if op.counter >= self.span.counter.end {
return None;
}
self.op_index += 1;
if op.container != self.container {
self.op_index += 1;
continue;
}
let start = (self.span.counter.min() - op.counter).max(0) as usize;
let end = ((self.span.counter.end() - op.counter) as usize).min(op.atom_len());
assert!(start < end, "{:?} {:#?}", self.span, op);
return Some(RichOp {
op,
start,
end,
lamport: (op.counter - change.id.counter) as Lamport + change.lamport,
timestamp: change.timestamp,
});

View file

@ -198,6 +198,14 @@ pub struct RichOp<'a> {
pub op: &'a Op,
pub lamport: Lamport,
pub timestamp: Timestamp,
pub start: usize,
pub end: usize,
}
impl<'a> RichOp<'a> {
pub fn get_sliced(&self) -> Op {
self.op.slice(self.start, self.end)
}
}
impl HasIndex for Op {

View file

@ -9,7 +9,7 @@ pub enum LoroValue {
Null,
Bool(bool),
Double(f64),
Integer(i32),
I32(i32),
String(Box<str>),
List(Box<Vec<LoroValue>>),
Map(Box<FxHashMap<InternalString, LoroValue>>),
@ -40,7 +40,7 @@ impl From<InsertValue> for LoroValue {
InsertValue::Null => LoroValue::Null,
InsertValue::Bool(b) => LoroValue::Bool(b),
InsertValue::Double(d) => LoroValue::Double(d),
InsertValue::Int32(i) => LoroValue::Integer(i),
InsertValue::Int32(i) => LoroValue::I32(i),
InsertValue::String(s) => LoroValue::String(s),
InsertValue::Container(c) => LoroValue::Unresolved(c),
}
@ -53,7 +53,7 @@ impl From<LoroValue> for InsertValue {
LoroValue::Null => InsertValue::Null,
LoroValue::Bool(b) => InsertValue::Bool(b),
LoroValue::Double(d) => InsertValue::Double(d),
LoroValue::Integer(i) => InsertValue::Int32(i),
LoroValue::I32(i) => InsertValue::Int32(i),
LoroValue::String(s) => InsertValue::String(s),
LoroValue::Unresolved(c) => InsertValue::Container(c),
_ => unreachable!("Unsupported convert from LoroValue to InsertValue"),
@ -61,6 +61,36 @@ impl From<LoroValue> for InsertValue {
}
}
impl From<i32> for LoroValue {
fn from(v: i32) -> Self {
LoroValue::I32(v)
}
}
impl From<f64> for LoroValue {
fn from(v: f64) -> Self {
LoroValue::Double(v)
}
}
impl From<bool> for LoroValue {
fn from(v: bool) -> Self {
LoroValue::Bool(v)
}
}
impl From<&str> for LoroValue {
fn from(v: &str) -> Self {
LoroValue::String(v.into())
}
}
impl From<String> for LoroValue {
fn from(v: String) -> Self {
LoroValue::String(v.into())
}
}
/// [InsertValue] can be inserted to Map or List
/// It's different from [LoroValue] because some of the states in [LoroValue] are illegal to be inserted
#[derive(Debug, PartialEq, Clone)]
@ -87,7 +117,7 @@ pub mod wasm {
LoroValue::Null => JsValue::NULL,
LoroValue::Bool(b) => JsValue::from_bool(b),
LoroValue::Double(f) => JsValue::from_f64(f),
LoroValue::Integer(i) => JsValue::from_f64(i as f64),
LoroValue::I32(i) => JsValue::from_f64(i as f64),
LoroValue::String(s) => JsValue::from_str(&s),
LoroValue::List(list) => {
let arr = Array::new_with_length(list.len() as u32);

View file

@ -28,7 +28,7 @@ use crate::{
/// see also [im].
#[repr(transparent)]
#[derive(Debug, Clone)]
pub struct VersionVector(ImHashMap<ClientID, Counter>);
pub struct VersionVector(FxHashMap<ClientID, Counter>);
impl PartialEq for VersionVector {
fn eq(&self, other: &Self) -> bool {
@ -43,7 +43,7 @@ impl PartialEq for VersionVector {
impl Eq for VersionVector {}
impl Deref for VersionVector {
type Target = ImHashMap<ClientID, Counter>;
type Target = FxHashMap<ClientID, Counter>;
fn deref(&self) -> &Self::Target {
&self.0
@ -250,7 +250,7 @@ impl VersionVector {
#[inline]
pub fn new() -> Self {
Self(ImHashMap::new())
Self(Default::default())
}
/// set the inclusive ending point. target id will be included by self
@ -377,7 +377,7 @@ impl Default for VersionVector {
impl From<FxHashMap<ClientID, Counter>> for VersionVector {
fn from(map: FxHashMap<ClientID, Counter>) -> Self {
let mut im_map = ImHashMap::new();
let mut im_map = FxHashMap::default();
for (client_id, counter) in map {
im_map.insert(client_id, counter);
}

File diff suppressed because it is too large Load diff

View file

@ -1,13 +1,13 @@
use std::{fmt::Debug, ptr::NonNull};
use fxhash::FxHashSet;
use fxhash::{FxHashMap, FxHashSet};
use crate::{
rle_trait::{GlobalIndex, HasIndex, ZeroElement},
rle_tree::{
node::{InternalNode, LeafNode},
tree_trait::GlobalTreeTrait,
Arena, HeapMode, UnsafeCursor,
Arena, HeapMode, UnsafeCursor, VecTrait,
},
HasLength, Mergable, Rle, RleTree, Sliceable,
};
@ -52,6 +52,9 @@ impl<Value: Rle, Index: GlobalIndex> HasIndex for WithIndex<Value, Index> {
}
}
type RangeMapTrait<Index, Value, TreeArena> =
GlobalTreeTrait<WithIndex<Value, Index>, 32, TreeArena>;
#[repr(transparent)]
#[derive(Debug)]
pub struct RangeMap<
@ -59,8 +62,7 @@ pub struct RangeMap<
Value: Rle + ZeroElement + 'static,
TreeArena: Arena + 'static = HeapMode,
> {
pub(crate) tree:
RleTree<WithIndex<Value, Index>, GlobalTreeTrait<WithIndex<Value, Index>, 10, TreeArena>>,
pub(crate) tree: RleTree<WithIndex<Value, Index>, RangeMapTrait<Index, Value, TreeArena>>,
}
impl<
@ -113,7 +115,7 @@ impl<
let mut cursor = cursor.unwrap();
// SAFETY: we have exclusive ref to the tree
let mut cur_leaf = unsafe { cursor.0.leaf.as_mut() };
let cur_ptr = cur_leaf.into();
let mut cur_ptr = cur_leaf.into();
let mut index = cursor.0.index;
let mut elem = &mut cur_leaf.children[index];
let elem_end = elem.index + Index::from_usize(elem.atom_len()).unwrap();
@ -162,37 +164,46 @@ impl<
return;
}
let mut visited_nodes: FxHashSet<NonNull<LeafNode<_, _>>> = FxHashSet::default();
visited_nodes.insert(cur_ptr);
let mut last_end: Index = start;
#[derive(Default, Debug)]
struct Data {
delete_start: Option<usize>,
delete_end: Option<usize>,
}
let mut visited_nodes: FxHashMap<NonNull<LeafNode<_, _>>, Data> = Default::default();
let mut cur_data: Data = Default::default();
let mut last_inside_element: Option<NonNull<_>> = None;
// iterate over the elements inside the range
loop {
if elem.index >= end {
visited_nodes.insert(cur_leaf.into(), cur_data);
break;
}
let elem_end = elem.index + Index::from_usize(elem.atom_len()).unwrap();
if start > elem_end {
debug_assert!(false, "something wrong with get_cursor_ge")
// go to next loop
} else if elem.index < start {
// start element overlaps with target range
// let it keep its left part
*elem = elem.slice(0, (start - elem.index).as_());
let new_len = (start - elem.index).as_();
*elem = elem.slice(0, new_len);
} else if elem_end > end {
// end element overlaps with target range
// let it keep its right part
*elem = elem.slice((end - elem.index).as_(), elem.atom_len());
let start = (end - elem.index).as_();
*elem = elem.slice(start, elem.atom_len());
visited_nodes.insert(cur_ptr, cur_data);
break;
} else {
// elements inside the target range
// extends its start to last_end
*elem = WithIndex {
index: last_end,
value: value.slice((last_end - start).as_(), (elem_end - start).as_()),
};
last_inside_element = Some(elem.into());
last_end = elem_end;
if last_inside_element.is_none() {
last_inside_element = Some(elem.into());
} else {
cur_data.delete_start.get_or_insert(index);
cur_data.delete_end = Some(index + 1);
}
}
// move to next element
@ -201,9 +212,12 @@ impl<
elem = &mut cur_leaf.children[index];
} else {
if let Some(next) = cur_leaf.next_mut() {
visited_nodes.insert(next.into());
visited_nodes.insert(cur_ptr, cur_data);
cur_ptr = next.into();
cur_data = Default::default();
cur_leaf = next;
} else {
visited_nodes.insert(cur_ptr, cur_data);
// is the last element of the tree
break;
}
@ -213,48 +227,83 @@ impl<
}
}
if last_end != end {
if let Some(mut insider) = last_inside_element {
// we can extended the last element to the end
// SAFETY: we just got the element from the tree and save it to the option value
let insider = unsafe { insider.as_mut() };
insider.value = value.slice((insider.index - start).as_(), (end - start).as_());
last_end = end;
if let Some(mut insider) = last_inside_element {
// we can extended the last element to the end
// SAFETY: we just got the element from the tree and save it to the option value
let insider = unsafe { insider.as_mut() };
insider.index = start;
insider.value = value;
} else {
// need to insert a new element from here
// current pointer must be greater than start or at the end of the tree
// SAFETY: we just visited cursor
unsafe {
let cursor: UnsafeCursor<_, RangeMapTrait<Index, Value, TreeArena>> =
UnsafeCursor::new(cur_ptr, index, 0, crate::rle_tree::Position::Start, 0);
let last_item = cursor.as_ref();
if last_item.index >= end {
let value = WithIndex {
value,
index: start,
};
cursor.insert_notify(value, &mut |_, _| {});
} else if last_item.get_end_index() <= start {
// current pointer points to the end of the tree
let cursor: UnsafeCursor<_, RangeMapTrait<Index, Value, TreeArena>> =
cursor.shift(last_item.atom_len()).unwrap();
cursor.insert_notify(
WithIndex {
value,
index: start,
},
&mut |_, _| {},
);
} else {
unreachable!()
}
}
}
let mut visited_internal_nodes: FxHashSet<NonNull<InternalNode<_, _>>> =
FxHashSet::default();
for mut leaf in visited_nodes {
FxHashSet::with_capacity_and_hasher(visited_nodes.len(), Default::default());
for (mut leaf, data) in visited_nodes {
// SAFETY: we have exclusive ref to the tree
let leaf = unsafe { leaf.as_mut() };
if let (Some(start), Some(end)) = (data.delete_start, data.delete_end) {
leaf.children.drain(start..end);
}
leaf.update_cache();
visited_internal_nodes.insert(leaf.parent);
}
while !visited_internal_nodes.is_empty() {
for mut internal in std::mem::take(&mut visited_internal_nodes) {
let len = visited_internal_nodes.len();
for mut internal in std::mem::replace(
&mut visited_internal_nodes,
FxHashSet::with_capacity_and_hasher(len, Default::default()),
) {
// SAFETY: we have exclusive ref to the tree
let internal = unsafe { internal.as_mut() };
let mut del_start = None;
let mut del_end = None;
for i in 0..internal.children().len() {
let child = &internal.children()[i];
if child.is_empty() {
del_start.get_or_insert(i);
del_end = Some(i + 1);
}
}
if let (Some(start), Some(end)) = (del_start, del_end) {
internal.drain_children(start, end);
}
internal.update_cache();
if let Some(parent) = internal.parent {
visited_internal_nodes.insert(parent);
}
}
}
if last_end != end {
// TODO: Can be optimized?
// need to insert a new element from here
// current pointer must be greater than start or at the end of the tree
self.tree.insert(
last_end,
WithIndex {
value: value.slice((last_end - start).as_(), (end - start).as_()),
index: last_end,
},
);
}
}
#[inline]
@ -476,14 +525,7 @@ mod test {
// 5-20
map.set_small_range(5, V::new(5, 20, "k"));
assert_eq!(
map.get_range(9, 15),
vec![
&V::new(5, 11, "k"),
&V::new(11, 12, "k"),
&V::new(12, 15, "k"),
]
);
assert_eq!(map.get_range(9, 15), vec![&V::new(5, 20, "k"),]);
}
#[test]

View file

@ -13,7 +13,7 @@ pub use tree_trait::Position;
use tree_trait::RleTreeTrait;
mod arena;
pub use arena::{Arena, BumpMode, HeapMode};
pub use arena::{Arena, BumpMode, HeapMode, VecTrait};
mod cursor;
pub mod iter;
pub mod node;

View file

@ -224,6 +224,19 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> Node<'a, T, A> {
sibling.update_cache();
}
#[inline(always)]
pub(crate) fn child_num(&self) -> usize {
match self {
Node::Internal(x) => x.children.len(),
Node::Leaf(x) => x.children.len(),
}
}
#[inline(always)]
pub(crate) fn is_empty(&self) -> bool {
self.child_num() == 0
}
pub(crate) fn borrow_from_sibling<F>(
&mut self,
sibling: &mut Node<'a, T, A>,

View file

@ -247,8 +247,7 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
}
if deleted_len > 0 {
self.connect_leaf(direct_delete_start, direct_delete_end - 1);
self.children.drain(direct_delete_start..direct_delete_end);
self.drain_children(direct_delete_start, direct_delete_end);
}
insertions.sort_by_key(|x| -(x.0 as isize));
@ -272,6 +271,12 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
result
}
#[inline(always)]
pub fn drain_children(&mut self, direct_delete_start: usize, direct_delete_end: usize) {
self.connect_leaf(direct_delete_start, direct_delete_end - 1);
self.children.drain(direct_delete_start..direct_delete_end);
}
pub(crate) fn apply_updates(
&mut self,
mut updates: Vec<(usize, Vec<<A::Arena as Arena>::Boxed<'a, Node<'a, T, A>>>)>,

View file

@ -1,4 +1,4 @@
use std::{fmt::Debug, ops::Deref};
use std::{cmp::Ordering, fmt::Debug, ops::Deref};
use bumpalo::Bump;
use num::{traits::AsPrimitive, FromPrimitive, Integer};
@ -42,6 +42,7 @@ impl Position {
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FindPosResult<I> {
pub child_index: usize,
pub offset: I,
@ -299,12 +300,7 @@ impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T
}
// TODO: Maybe panic if overlap?
node.cache.end = node
.children()
.iter()
.map(|x| x.get_end_index())
.max()
.unwrap();
node.cache.end = node.children().last().unwrap().get_end_index();
node.cache.start = node.children()[0].get_start_index();
}
@ -313,12 +309,7 @@ impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T
return;
}
node.cache.end = node
.children()
.iter()
.map(|x| get_cache(x).end)
.max()
.unwrap();
node.cache.end = get_cache(node.children().last().unwrap()).end;
node.cache.start = get_cache(&node.children()[0]).start;
}
@ -326,65 +317,126 @@ impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T
node: &InternalNode<'_, T, Self>,
index: Self::Int,
) -> FindPosResult<Self::Int> {
for (i, child) in node.children().iter().enumerate() {
let cache = get_cache(child);
if index <= cache.end {
if index < cache.start {
return FindPosResult::new_not_found(i, index, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == get_cache(&node.children[i + 1]).start
{
return FindPosResult::new(i + 1, index, Position::Start);
}
return FindPosResult::new(i, index, get_pos_global(index, cache));
}
if node.children.is_empty() || index > node.cache.end {
return FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
index,
Position::After,
);
}
FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
index,
Position::After,
)
if index < node.cache.start {
return FindPosResult::new_not_found(0, index, Position::Before);
}
let ans = node
.children
.binary_search_by(|x| {
let cache = get_cache(x);
if index < cache.start {
Ordering::Greater
} else if index > cache.end {
Ordering::Less
} else {
Ordering::Equal
}
})
.map_or_else(
|x| {
FindPosResult::new_not_found(
x,
index,
get_pos_global(index, get_cache(&node.children[x])),
)
},
|x| {
FindPosResult::new(
x,
index,
get_pos_global(index, get_cache(&node.children[x])),
)
},
);
if ans.pos == Position::End {
if ans.child_index + 1 < node.children.len()
&& index == get_cache(&node.children[ans.child_index + 1]).start
{
FindPosResult::new(ans.child_index + 1, index, Position::Start)
} else {
ans
}
} else {
ans
}
}
fn find_pos_leaf(node: &LeafNode<'_, T, Self>, index: Self::Int) -> FindPosResult<usize> {
for (i, child) in node.children().iter().enumerate() {
let cache = Cache {
start: child.get_start_index(),
end: child.get_end_index(),
};
if index <= cache.end {
if index < cache.start {
return FindPosResult::new_not_found(i, 0, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == node.children[i + 1].get_start_index()
{
return FindPosResult::new(i + 1, 0, Position::Start);
}
return FindPosResult::new(
i,
(index - cache.start).as_(),
get_pos_global(index, cache),
);
}
if node.children.is_empty() || index > node.cache.end {
return FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
node.children.last().map(|x| x.atom_len()).unwrap_or(0),
Position::After,
);
}
FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
node.children().last().unwrap().atom_len(),
Position::After,
)
if index < node.cache.start {
return FindPosResult::new_not_found(0, 0, Position::Before);
}
let ans = node
.children
.binary_search_by(|x| {
let cache = Cache {
start: x.get_start_index(),
end: x.get_end_index(),
};
if index < cache.start {
Ordering::Greater
} else if index > cache.end {
Ordering::Less
} else {
Ordering::Equal
}
})
.map_or_else(
|x| {
FindPosResult::new_not_found(
x,
0,
get_pos_global(
index,
Cache {
start: node.children[x].get_start_index(),
end: node.children[x].get_end_index(),
},
),
)
},
|x| {
FindPosResult::new(
x,
(index - node.children[x].get_start_index()).as_(),
get_pos_global(
index,
Cache {
start: node.children[x].get_start_index(),
end: node.children[x].get_end_index(),
},
),
)
},
);
if ans.pos == Position::End {
if ans.child_index + 1 < node.children.len()
&& index == node.children[ans.child_index + 1].get_start_index()
{
FindPosResult::new(ans.child_index + 1, 0, Position::Start)
} else {
ans
}
} else {
ans
}
}
fn len_leaf(node: &LeafNode<'_, T, Self>) -> Self::Int {
@ -408,6 +460,10 @@ impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T
}
fn check_cache_internal(node: &InternalNode<'_, T, Self>) {
if node.children().is_empty() {
return;
}
assert_eq!(
node.cache.end,
node.children()
@ -430,6 +486,10 @@ fn get_pos_global<I: Integer>(index: I, cache: Cache<I>) -> Position {
Position::Start
} else if index == cache.end {
Position::End
} else if index < cache.start {
Position::Before
} else if index > cache.end {
Position::After
} else {
Position::Middle
}

View file

@ -86,21 +86,7 @@ impl<T: Mergable<Cfg> + HasLength, Cfg> RleVecWithIndex<T, Cfg> {
return None;
}
let mut start = 0;
let mut end = self.index.len() - 1;
while start < end {
let mid = (start + end) / 2;
if self.index[mid] == index {
start = mid;
break;
}
if self.index[mid] < index {
start = mid + 1;
} else {
end = mid;
}
}
let mut start = self.index.binary_search(&index).unwrap_or_else(|x| x);
if index < self.index[start] {
start -= 1;

View file

@ -1,12 +1,17 @@
use std::{hash::Hash, mem::MaybeUninit};
use std::{collections::hash_set::IntoIter, hash::Hash, mem::MaybeUninit};
use fxhash::FxHashSet;
pub enum SmallSet<T, const SIZE: usize> {
Arr([Option<T>; SIZE]),
Arr([Option<T>; SIZE], usize),
Set(FxHashSet<T>),
}
pub enum SmallSetIter<T, const SIZE: usize> {
Arr([Option<T>; SIZE], usize),
Set(IntoIter<T>),
}
impl<T: Eq + Hash, const SIZE: usize> SmallSet<T, SIZE> {
pub fn new() -> Self {
let a: MaybeUninit<[Option<T>; SIZE]> = MaybeUninit::zeroed();
@ -15,7 +20,21 @@ impl<T: Eq + Hash, const SIZE: usize> SmallSet<T, SIZE> {
for i in a.iter_mut() {
*i = None;
}
SmallSet::Arr(a)
SmallSet::Arr(a, 0)
}
pub fn is_empty(&self) -> bool {
match self {
SmallSet::Arr(_, size) => *size == 0,
SmallSet::Set(s) => s.is_empty(),
}
}
pub fn len(&self) -> usize {
match self {
SmallSet::Arr(_, size) => *size,
SmallSet::Set(s) => s.len(),
}
}
/// Adds a value to the set.
@ -27,7 +46,8 @@ impl<T: Eq + Hash, const SIZE: usize> SmallSet<T, SIZE> {
///
pub fn insert(&mut self, v: T) -> bool {
match self {
SmallSet::Arr(a) => {
SmallSet::Arr(a, i) => {
*i += 1;
for i in a.iter_mut() {
if let Some(i) = i {
if *i == v {
@ -54,7 +74,7 @@ impl<T: Eq + Hash, const SIZE: usize> SmallSet<T, SIZE> {
pub fn contains(&mut self, v: &T) -> bool {
match self {
SmallSet::Arr(a) => {
SmallSet::Arr(a, _) => {
for i in a.iter_mut() {
if let Some(i) = i {
if i == v {
@ -73,7 +93,8 @@ impl<T: Eq + Hash, const SIZE: usize> SmallSet<T, SIZE> {
pub fn remove(&mut self, v: &T) -> bool {
match self {
SmallSet::Arr(a) => {
SmallSet::Arr(a, i) => {
*i -= 1;
for i in a.iter_mut() {
if i.as_ref() == Some(v) {
*i = None;
@ -93,3 +114,36 @@ impl<T: Eq + Hash, const SIZE: usize> Default for SmallSet<T, SIZE> {
Self::new()
}
}
impl<T, const SIZE: usize> Iterator for SmallSetIter<T, SIZE> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
match self {
SmallSetIter::Arr(arr, i) => {
#[allow(clippy::needless_range_loop)]
for index in *i..arr.len() {
if let Some(v) = std::mem::take(&mut arr[index]) {
*i += 1;
return Some(v);
}
}
None
}
SmallSetIter::Set(set) => set.next(),
}
}
}
impl<T: Eq + Hash, const SIZE: usize> IntoIterator for SmallSet<T, SIZE> {
type Item = T;
type IntoIter = SmallSetIter<T, SIZE>;
fn into_iter(self) -> Self::IntoIter {
match self {
SmallSet::Arr(arr, _) => SmallSetIter::Arr(arr, 0),
SmallSet::Set(arr) => SmallSetIter::Set(arr.into_iter()),
}
}
}