From cd95e2276cfc1e09a13ae67906c1c2b32fc75675 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Mon, 3 Oct 2022 17:35:44 +0800 Subject: [PATCH] feat: introduce crdt-list --- Cargo.lock | 16 +++ README.md | 6 + crates/loro-core/Cargo.toml | 1 + .../loro-core/src/container/text/tracker.rs | 8 +- .../src/container/text/tracker/content_map.rs | 4 +- .../src/container/text/tracker/cursor_map.rs | 41 ++++++- .../src/container/text/tracker/y_span.rs | 7 ++ .../src/container/text/tracker/yata.rs | 98 ++++++++++++++++ crates/rle/src/range_map.rs | 1 + crates/rle/src/rle_tree.rs | 55 +++++++-- crates/rle/src/rle_tree/cursor.rs | 31 ++++- crates/rle/src/rle_tree/iter.rs | 108 ++++++++++++++++-- crates/rle/src/rle_tree/node.rs | 11 ++ crates/rle/src/rle_tree/node/leaf_impl.rs | 14 ++- rust-toolchain | 2 +- 15 files changed, 370 insertions(+), 33 deletions(-) create mode 100644 crates/loro-core/src/container/text/tracker/yata.rs diff --git a/Cargo.lock b/Cargo.lock index bcff5751..dffab2ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "arref" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ccd462b64c3c72f1be8305905a85d85403d768e8690c9b8bd3b9009a5761679" + [[package]] name = "atty" version = "0.2.14" @@ -144,6 +150,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crdt-list" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4a759e0bf62cfa3fbc92c569e0ef8d133f8e3c0fc28ff743f43af9da923d068" +dependencies = [ + "arref", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -319,6 +334,7 @@ dependencies = [ name = "loro-core" version = "0.1.0" dependencies = [ + "crdt-list", "enum-as-inner", "fxhash", "im", diff --git a/README.md b/README.md index e69de29b..5f79b0fa 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,6 @@ +# Loro + +# Dev + +we use nightly rust due to GAT in crdt-list. we can use stable rust when GAT is +stable diff --git a/crates/loro-core/Cargo.toml b/crates/loro-core/Cargo.toml index e508b8e0..779f5f82 100644 --- a/crates/loro-core/Cargo.toml +++ b/crates/loro-core/Cargo.toml @@ -19,6 +19,7 @@ thiserror = "1.0.31" im = "15.1.0" enum-as-inner = "0.5.1" num = "0.4.0" +crdt-list = "0.1.1" [dev-dependencies] proptest = "1.0.0" diff --git a/crates/loro-core/src/container/text/tracker.rs b/crates/loro-core/src/container/text/tracker.rs index 407f4857..07cc6c26 100644 --- a/crates/loro-core/src/container/text/tracker.rs +++ b/crates/loro-core/src/container/text/tracker.rs @@ -1,9 +1,6 @@ use std::ptr::NonNull; -use rle::{ - rle_tree::{iter::Iter, node::LeafNode}, - HasLength, -}; +use rle::{rle_tree::node::LeafNode, HasLength}; use crate::{ id::{Counter, ID}, @@ -23,6 +20,7 @@ use super::text_content::TextOpContent; mod content_map; mod cursor_map; mod y_span; +mod yata; /// A tracker for a single text, we can use it to calculate the effect of an operation on a text. /// @@ -30,6 +28,8 @@ mod y_span; /// /// - [YSpan] never gets removed in both [ContentMap] and [CursorMap] /// - The deleted contents are marked with deleted, but still lives on the [ContentMap] with length of 0 +/// +#[derive(Debug)] struct Tracker { content: ContentMap, id_to_cursor: CursorMap, diff --git a/crates/loro-core/src/container/text/tracker/content_map.rs b/crates/loro-core/src/container/text/tracker/content_map.rs index 52c4bc68..9d88283b 100644 --- a/crates/loro-core/src/container/text/tracker/content_map.rs +++ b/crates/loro-core/src/container/text/tracker/content_map.rs @@ -1,3 +1,5 @@ +use crdt_list::crdt::ListCrdt; +use crdt_list::yata::Yata; use std::ops::{Deref, DerefMut}; use rle::{ @@ -36,7 +38,7 @@ impl ContentMap { status: Default::default(), }; - // TODO: insert between left & right + // TODO: integrate yjs } /// When we insert a new [YSpan] at given position, we need to calculate its `originLeft` and `originRight` diff --git a/crates/loro-core/src/container/text/tracker/cursor_map.rs b/crates/loro-core/src/container/text/tracker/cursor_map.rs index 6df5d2b4..f3dc99e1 100644 --- a/crates/loro-core/src/container/text/tracker/cursor_map.rs +++ b/crates/loro-core/src/container/text/tracker/cursor_map.rs @@ -2,9 +2,13 @@ use std::{fmt::Debug, ptr::NonNull}; use enum_as_inner::EnumAsInner; -use rle::{range_map::RangeMap, rle_tree::node::LeafNode, HasLength, Mergable, Sliceable}; +use rle::{ + range_map::RangeMap, + rle_tree::{node::LeafNode, Position, SafeCursor, SafeCursorMut}, + HasLength, Mergable, Sliceable, +}; -use crate::span::IdSpan; +use crate::{id::ID, span::IdSpan}; use super::y_span::{YSpan, YSpanTreeTrait}; @@ -20,6 +24,39 @@ pub(super) enum Marker { // TODO: REDO, UNDO } +impl Marker { + pub fn as_cursor(&self, id: ID) -> Option> { + match self { + Marker::Insert { ptr, len } => { + // SAFETY: tree data is always valid + let node = unsafe { ptr.as_ref() }; + debug_assert!(!node.is_deleted()); + let position = node.children().iter().position(|x| x.contain_id(id))?; + // SAFETY: we just checked it is valid + Some(unsafe { SafeCursor::new(*ptr, position, 0, rle::rle_tree::Position::Start) }) + } + Marker::Delete(_) => None, + } + } + + pub fn as_cursor_mut( + &mut self, + id: ID, + ) -> Option> { + match self { + Marker::Insert { ptr, len } => { + // SAFETY: tree data is always valid + let node = unsafe { ptr.as_ref() }; + debug_assert!(!node.is_deleted()); + let position = node.children().iter().position(|x| x.contain_id(id))?; + // SAFETY: we just checked it is valid + Some(unsafe { SafeCursorMut::new(*ptr, position, 0, Position::Start) }) + } + Marker::Delete(_) => None, + } + } +} + impl Sliceable for Marker { fn slice(&self, from: usize, to: usize) -> Self { match self { diff --git a/crates/loro-core/src/container/text/tracker/y_span.rs b/crates/loro-core/src/container/text/tracker/y_span.rs index b915398c..992fea16 100644 --- a/crates/loro-core/src/container/text/tracker/y_span.rs +++ b/crates/loro-core/src/container/text/tracker/y_span.rs @@ -72,6 +72,13 @@ impl YSpan { debug_assert!(self.len > 0); self.status.is_activated() } + + #[inline] + pub fn contain_id(&self, id: ID) -> bool { + self.id.client_id == id.client_id + && self.id.counter <= id.counter + && self.last_id().counter > id.counter + } } impl Mergable for YSpan { diff --git a/crates/loro-core/src/container/text/tracker/yata.rs b/crates/loro-core/src/container/text/tracker/yata.rs new file mode 100644 index 00000000..9664d789 --- /dev/null +++ b/crates/loro-core/src/container/text/tracker/yata.rs @@ -0,0 +1,98 @@ +use crdt_list::crdt::{ListCrdt, OpSet}; +use rle::rle_tree::{iter::IterMut, SafeCursorMut, RleTreeRaw}; + +use crate::id::ID; + +use super::{ + content_map::ContentMap, + y_span::{YSpan, YSpanTreeTrait}, + Tracker, +}; + +#[derive(Default, Debug)] +struct OpSpanSet {} + +impl OpSet for OpSpanSet { + fn insert(&mut self, value: &YSpan) { + todo!() + } + + fn contain(&self, id: ID) -> bool { + todo!() + } + + fn clear(&mut self) { + todo!() + } +} + +struct YataImpl; + +impl ListCrdt for YataImpl { + type OpUnit = YSpan; + + type OpId = ID; + + type Container = Tracker; + + type Set = OpSpanSet; + + type Cursor<'a> = SafeCursorMut<'a, 'static, YSpan, YSpanTreeTrait>; + + type Iterator<'a> = IterMut<'a, 'static, YSpan, YSpanTreeTrait>; + + fn iter( + container: &mut Self::Container, + from: Option, + to: Option, + ) -> Self::Iterator<'_> { + let from = from.and_then(|x| { + container + .id_to_cursor + .get(x.into()) + .and_then(|m| m.as_cursor(x)) + }); + let to = to.and_then(|x| { + container + .id_to_cursor + .get(x.into()) + .and_then(|m| m.as_cursor(x)) + }); + + container + .content + .with_tree_mut(|tree| + // SAFETY: loosen lifetime requirement here. It's safe because the function + // signature can limit the lifetime of the returned iterator + unsafe {std::mem::transmute::<_, &mut &mut RleTreeRaw<_, _>>(tree)}.iter_mut_in(from, to) + ) + } + + fn insert_at(container: &mut Self::Container, op: Self::OpUnit, pos: usize) { + todo!() + } + + fn id(op: &Self::OpUnit) -> Self::OpId { + todo!() + } + + fn cmp_id(op_a: &Self::OpUnit, op_b: &Self::OpUnit) -> std::cmp::Ordering { + todo!() + } + + fn contains(op: &Self::OpUnit, id: Self::OpId) -> bool { + todo!() + } + + fn integrate(container: &mut Self::Container, op: Self::OpUnit) { + todo!() + } + + fn can_integrate(container: &Self::Container, op: &Self::OpUnit) -> bool { + todo!() + } + + fn len(container: &Self::Container) -> usize { + todo!() + } +} diff --git a/crates/rle/src/range_map.rs b/crates/rle/src/range_map.rs index 7c78d62a..39065c0b 100644 --- a/crates/rle/src/range_map.rs +++ b/crates/rle/src/range_map.rs @@ -46,6 +46,7 @@ impl HasGlobalIndex for WithGlobalIndex { pub(crate) tree: RleTree, GlobalTreeTrait, 10>>, diff --git a/crates/rle/src/rle_tree.rs b/crates/rle/src/rle_tree.rs index c62fce97..e4a3971d 100644 --- a/crates/rle/src/rle_tree.rs +++ b/crates/rle/src/rle_tree.rs @@ -3,6 +3,7 @@ use crate::Rle; pub(self) use bumpalo::collections::vec::Vec as BumpVec; use bumpalo::Bump; pub use cursor::{SafeCursor, SafeCursorMut, UnsafeCursor}; +use num::FromPrimitive; use ouroboros::self_referencing; use std::marker::{PhantomData, PhantomPinned}; pub use tree_trait::Position; @@ -93,12 +94,10 @@ impl<'bump, T: Rle, A: RleTreeTrait> RleTreeRaw<'bump, T, A> { return None; } - return Some(SafeCursor::new( - leaf.into(), - result.child_index, - result.offset, - result.pos, - )); + // SAFETY: result is valid + return Some(unsafe { + SafeCursor::new(leaf.into(), result.child_index, result.offset, result.pos) + }); } } } @@ -125,12 +124,10 @@ impl<'bump, T: Rle, A: RleTreeTrait> RleTreeRaw<'bump, T, A> { return None; } - return Some(SafeCursor::new( - leaf.into(), - result.child_index, - result.offset, - result.pos, - )); + // SAFETY: result is valid + return Some(unsafe { + SafeCursor::new(leaf.into(), result.child_index, result.offset, result.pos) + }); } } } @@ -142,10 +139,44 @@ impl<'bump, T: Rle, A: RleTreeTrait> RleTreeRaw<'bump, T, A> { cursor.map(|x| SafeCursorMut(x.0)) } + #[inline] pub fn iter(&self) -> iter::Iter<'_, 'bump, T, A> { iter::Iter::new(self.node.get_first_leaf()) } + #[inline] + pub fn iter_mut(&mut self) -> iter::IterMut<'_, 'bump, T, A> { + iter::IterMut::new(self.node.get_first_leaf_mut()) + } + + #[inline] + pub fn empty(&self) -> bool { + self.len() == A::Int::from_usize(0).unwrap() + } + + pub fn iter_mut_in<'tree>( + &'tree mut self, + start: Option>, + end: Option>, + ) -> iter::IterMut<'tree, 'bump, T, A> { + if self.empty() || (start.is_none() && end.is_none()) { + self.iter_mut() + } else { + // SAFETY: this is safe because we know there are at least one element in the tree + let start = start.unwrap_or_else(|| unsafe { + SafeCursor::new( + self.node.get_first_leaf().unwrap().into(), + 0, + 0, + Position::Start, + ) + }); + + let start: SafeCursorMut<'tree, 'bump, T, A> = SafeCursorMut(start.0); + iter::IterMut::from_cursor(start, end).unwrap_or_else(|| self.iter_mut()) + } + } + pub fn delete_range(&mut self, start: Option, end: Option) { self.node .as_internal_mut() diff --git a/crates/rle/src/rle_tree/cursor.rs b/crates/rle/src/rle_tree/cursor.rs index 8339069a..cdc6ece7 100644 --- a/crates/rle/src/rle_tree/cursor.rs +++ b/crates/rle/src/rle_tree/cursor.rs @@ -1,4 +1,4 @@ -use std::{marker::PhantomData, ptr::NonNull}; +use std::{marker::PhantomData, ops::Deref, ptr::NonNull}; use crate::{Rle, RleTreeTrait}; @@ -184,8 +184,11 @@ impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> SafeCursor<'tree, 'bump, T self.0.offset } + /// # Safety + /// + /// Users should make sure aht leaf is pointing to a valid LeafNode with 'bump lifetime, and index is inbound #[inline] - pub(crate) fn new( + pub unsafe fn new( leaf: NonNull>, index: usize, offset: usize, @@ -223,6 +226,12 @@ impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> SafeCursorMut<'tree, 'bump unsafe { self.0.leaf.as_ref() } } + #[inline] + pub fn leaf_mut(&mut self) -> &'tree mut LeafNode<'bump, T, A> { + // SAFETY: SafeCursorMut is a exclusive reference to the tree + unsafe { self.0.leaf.as_mut() } + } + #[inline] pub fn child_index(&self) -> usize { self.0.index @@ -231,7 +240,7 @@ impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> SafeCursorMut<'tree, 'bump impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> SafeCursorMut<'tree, 'bump, T, A> { #[inline] - pub(crate) fn new( + pub unsafe fn new( leaf: NonNull>, index: usize, offset: usize, @@ -316,3 +325,19 @@ impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> AsMut unsafe { self.0.as_mut() } } } + +impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> Deref for SafeCursor<'tree, 'bump, T, A> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl<'tree, 'bump: 'tree, T: Rle, A: RleTreeTrait> Deref for SafeCursorMut<'tree, 'bump, T, A> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} diff --git a/crates/rle/src/rle_tree/iter.rs b/crates/rle/src/rle_tree/iter.rs index 408185b4..23c35c49 100644 --- a/crates/rle/src/rle_tree/iter.rs +++ b/crates/rle/src/rle_tree/iter.rs @@ -3,16 +3,61 @@ use crate::Rle; use super::{ node::LeafNode, tree_trait::{Position, RleTreeTrait}, - SafeCursor, + SafeCursor, SafeCursorMut, }; -pub struct Iter<'some, 'bump: 'some, T: Rle, A: RleTreeTrait> { +pub struct Iter<'some, 'bump, T: Rle, A: RleTreeTrait> { node: Option<&'some LeafNode<'bump, T, A>>, child_index: usize, end_node: Option<&'some LeafNode<'bump, T, A>>, end_index: Option, } +pub struct IterMut<'some, 'bump, T: Rle, A: RleTreeTrait> { + node: Option<&'some mut LeafNode<'bump, T, A>>, + child_index: usize, + end_node: Option<&'some LeafNode<'bump, T, A>>, + end_index: Option, +} + +impl<'tree, 'bump, T: Rle, A: RleTreeTrait> IterMut<'tree, 'bump, T, A> { + #[inline] + pub fn new(node: Option<&'tree mut LeafNode<'bump, T, A>>) -> Self { + Self { + node, + child_index: 0, + end_node: None, + end_index: None, + } + } + + #[inline] + pub fn from_cursor( + mut start: SafeCursorMut<'tree, 'bump, T, A>, + mut end: Option>, + ) -> Option { + if start.0.pos == Position::After { + start = start.next()? + } + + if let Some(end_inner) = end { + if end_inner.0.pos == Position::Middle + || end_inner.0.pos == Position::End + || end_inner.0.pos == Position::After + { + end = end_inner.next(); + } + } + + Some(Self { + node: Some(start.leaf_mut()), + child_index: start.0.index, + end_node: end.map(|end| end.leaf()), + end_index: end.map(|end| end.index()), + }) + } +} + impl<'tree, 'bump, T: Rle, A: RleTreeTrait> Iter<'tree, 'bump, T, A> { #[inline] pub fn new(node: Option<&'tree LeafNode<'bump, T, A>>) -> Self { @@ -67,12 +112,10 @@ impl<'rf, 'bump, T: Rle, A: RleTreeTrait> Iterator for Iter<'rf, 'bump, T, A> match node.children.get(self.child_index) { Some(_) => { self.child_index += 1; - return Some(SafeCursor::new( - node.into(), - self.child_index - 1, - 0, - Position::Start, - )); + // SAFETY: we just checked that the child exists + return Some(unsafe { + SafeCursor::new(node.into(), self.child_index - 1, 0, Position::Start) + }); } None => match node.next() { Some(next) => { @@ -96,3 +139,52 @@ impl<'rf, 'bump, T: Rle, A: RleTreeTrait> Iterator for Iter<'rf, 'bump, T, A> None } } + +impl<'rf, 'bump, T: Rle, A: RleTreeTrait> Iterator for IterMut<'rf, 'bump, T, A> { + type Item = SafeCursorMut<'rf, 'bump, T, A>; + + fn next(&mut self) -> Option { + if let (Some(end_node), Some(node), Some(end_index)) = ( + self.end_node, + self.node.as_mut().map(|x| *x as *const LeafNode<_, _>), + self.end_index, + ) { + if std::ptr::eq(end_node, node as *const _) && self.child_index == end_index { + return None; + } + } + + while let Some(node) = std::mem::take(&mut self.node) { + let node_ptr = node as *const _; + match node.children.get(self.child_index) { + Some(_) => { + self.child_index += 1; + let leaf = node.into(); + self.node = Some(node); + // SAFETY: we just checked that the child exists + return Some(unsafe { + SafeCursorMut::new(leaf, self.child_index - 1, 0, Position::Start) + }); + } + None => match node.next_mut() { + Some(next) => { + if let Some(end_node) = self.end_node { + // if node == end_node, should not go to next node + // in this case end_index == node.children.len() + if std::ptr::eq(end_node, node_ptr) { + return None; + } + } + + self.node = Some(next); + self.child_index = 0; + continue; + } + None => return None, + }, + } + } + + None + } +} diff --git a/crates/rle/src/rle_tree/node.rs b/crates/rle/src/rle_tree/node.rs index 2f248bc5..c6881ee0 100644 --- a/crates/rle/src/rle_tree/node.rs +++ b/crates/rle/src/rle_tree/node.rs @@ -70,6 +70,17 @@ impl<'a, T: Rle, A: RleTreeTrait> Node<'a, T, A> { } } + #[inline] + pub(crate) fn get_first_leaf_mut(&mut self) -> Option<&mut LeafNode<'a, T, A>> { + match self { + Self::Internal(node) => node + .children + .first_mut() + .and_then(|child| child.get_first_leaf_mut()), + Self::Leaf(node) => Some(node), + } + } + #[inline] pub(crate) fn get_last_leaf(&self) -> Option<&LeafNode<'a, T, A>> { match self { diff --git a/crates/rle/src/rle_tree/node/leaf_impl.rs b/crates/rle/src/rle_tree/node/leaf_impl.rs index b7df4931..52d5f7df 100644 --- a/crates/rle/src/rle_tree/node/leaf_impl.rs +++ b/crates/rle/src/rle_tree/node/leaf_impl.rs @@ -52,13 +52,17 @@ impl<'bump, T: Rle, A: RleTreeTrait> LeafNode<'bump, T, A> { #[inline] pub fn get_cursor<'tree>(&'tree self, pos: A::Int) -> SafeCursor<'tree, 'bump, T, A> { let result = A::find_pos_leaf(self, pos); - SafeCursor::new(self.into(), result.child_index, result.offset, result.pos) + assert!(result.found); + // SAFETY: result.found is true + unsafe { SafeCursor::new(self.into(), result.child_index, result.offset, result.pos) } } #[inline] pub fn get_cursor_mut<'b>(&'b mut self, pos: A::Int) -> SafeCursorMut<'b, 'bump, T, A> { let result = A::find_pos_leaf(self, pos); - SafeCursorMut::new(self.into(), result.child_index, result.offset, result.pos) + assert!(result.found); + // SAFETY: result.found is true + unsafe { SafeCursorMut::new(self.into(), result.child_index, result.offset, result.pos) } } pub fn push_child( @@ -307,6 +311,12 @@ impl<'bump, T: Rle, A: RleTreeTrait> LeafNode<'bump, T, A> { unsafe { self.next.map(|p| p.as_ref()) } } + #[inline] + pub fn next_mut(&mut self) -> Option<&mut Self> { + // SAFETY: internal variant ensure prev and next are valid reference + unsafe { self.next.map(|mut p| p.as_mut()) } + } + #[inline] pub fn prev(&self) -> Option<&Self> { // SAFETY: internal variant ensure prev and next are valid reference diff --git a/rust-toolchain b/rust-toolchain index 2bf5ad04..bf867e0a 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -stable +nightly