diff --git a/crates/rle/Cargo.toml b/crates/rle/Cargo.toml index 5384fc4a..2aa73a86 100644 --- a/crates/rle/Cargo.toml +++ b/crates/rle/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -bumpalo = { version = "3.10.0", features = ["collections"] } +bumpalo = { version = "3.10.0", features = ["collections", "boxed"] } num = "0.4.0" enum-as-inner = "0.5.1" owning_ref = "0.4.1" diff --git a/crates/rle/src/lib.rs b/crates/rle/src/lib.rs index 8d1cf1ec..9bbdf923 100644 --- a/crates/rle/src/lib.rs +++ b/crates/rle/src/lib.rs @@ -1,3 +1,4 @@ +#![allow(unused)] //! Run length encoding library. //! //! There are many mergeable types. By merging them together we can get a more compact representation of the data. diff --git a/crates/rle/src/rle_tree.rs b/crates/rle/src/rle_tree.rs index 4d62f3a4..0755de14 100644 --- a/crates/rle/src/rle_tree.rs +++ b/crates/rle/src/rle_tree.rs @@ -1,3 +1,4 @@ +pub(self) use bumpalo::boxed::Box as BumpBox; pub(self) use bumpalo::collections::vec::Vec as BumpVec; use owning_ref::OwningRefMut; use std::marker::{PhantomData, PhantomPinned}; @@ -8,6 +9,7 @@ use tree_trait::RleTreeTrait; use self::node::{InternalNode, Node}; +mod fixed_size_vec; mod node; mod tree_trait; @@ -23,11 +25,11 @@ pub struct RleTreeRaw<'a, T: Rle, A: RleTreeTrait> { type TreeRef = OwningRefMut, RleTreeRaw<'static, T, A>)>, RleTreeRaw<'static, T, A>>; -pub struct RleTreeCreator + 'static> { +pub struct RleTree + 'static> { tree: TreeRef, } -impl + 'static> RleTreeCreator { +impl + 'static> RleTree { pub fn new() -> Self { let bump = Box::new(Bump::new()); let tree = RleTreeRaw::new(unsafe { &*(&*bump as *const _) }); @@ -50,30 +52,38 @@ impl<'a, T: Rle, A: RleTreeTrait> RleTreeRaw<'a, T, A> { fn new(bump: &'a Bump) -> Self { Self { bump, - node: Node::Internal(InternalNode::new(bump)), + node: Node::Internal(BumpBox::new_in(InternalNode::new(bump, None), bump)), _pin: PhantomPinned, _a: PhantomData, } } - fn insert(&mut self, index: A::Int, value: T) { - self.node.insert(index, value); + #[inline] + pub fn insert(&mut self, index: A::Int, value: T) { + match self.node { + Node::Internal(ref mut node) => { + node.insert(index, value); + } + _ => { + unreachable!() + } + } } /// return a cursor to the tree - fn get(&self, index: A::Int) { + pub fn get(&self, index: A::Int) { todo!() } - fn iter(&self) { + pub fn iter(&self) { todo!() } - fn delete_range(&mut self, from: A::Int, to: A::Int) { + pub fn delete_range(&mut self, from: A::Int, to: A::Int) { todo!() } - fn iter_range(&self, from: A::Int, to: A::Int) { + pub fn iter_range(&self, from: A::Int, to: A::Int) { todo!() } @@ -91,29 +101,35 @@ fn test() { struct Trait; impl RleTreeTrait> for Trait { + const MAX_CHILDREN_NUM: usize = 4; type Int = usize; type InternalCache = (); - fn update_cache() { - todo!() - } - - fn min_children() -> usize { - 5 - } - - fn before_insert_internal(_: InternalNode<'_, Range, Self>) { - todo!() - } - fn find_insert_pos_internal( - _: InternalNode<'_, Range, Self>, + _: &mut InternalNode<'_, Range, Self>, _: Self::Int, ) -> usize { todo!() } + + const MIN_CHILDREN_NUM: usize = Self::MAX_CHILDREN_NUM / 2; + + fn update_cache_leaf(node: &mut node::LeafNode<'_, Range, Self>) { + todo!() + } + + fn update_cache_internal(node: &mut InternalNode<'_, Range, Self>) { + todo!() + } + + fn find_insert_pos_leaf( + node: &mut node::LeafNode<'_, Range, Self>, + index: Self::Int, + ) -> (usize, usize) { + todo!() + } } - let mut t: RleTreeCreator, Trait> = RleTreeCreator::new(); + let mut t: RleTree, Trait> = RleTree::new(); let tree = t.get_mut(); tree.insert(10, 0..5); } diff --git a/crates/rle/src/rle_tree/fixed_size_vec.rs b/crates/rle/src/rle_tree/fixed_size_vec.rs new file mode 100644 index 00000000..6cd5516c --- /dev/null +++ b/crates/rle/src/rle_tree/fixed_size_vec.rs @@ -0,0 +1,52 @@ +use super::Bump; +use super::BumpVec; +use std::marker::PhantomPinned; +use std::ops::Deref; +use std::ops::DerefMut; + +#[derive(Debug)] +pub(super) struct FixedSizedVec<'a, T> { + data: BumpVec<'a, T>, + _pin: PhantomPinned, +} + +impl<'a, T> FixedSizedVec<'a, T> { + #[inline] + pub(super) fn with_capacity(capacity: usize, bump: &'a Bump) -> Self { + Self { + data: BumpVec::with_capacity_in(capacity, bump), + _pin: PhantomPinned, + } + } + + #[inline] + pub(super) fn push(&mut self, value: T) { + debug_assert!(self.data.len() < self.data.capacity()); + self.data.push(value); + } + + #[inline] + pub(super) fn insert(&mut self, index: usize, value: T) { + debug_assert!(self.data.len() < self.data.capacity()); + self.data.insert(index, value); + } + + #[inline] + pub(super) fn pop(&mut self) -> Option { + self.data.pop() + } +} + +impl<'a, T> Deref for FixedSizedVec<'a, T> { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl<'a, T> DerefMut for FixedSizedVec<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data + } +} diff --git a/crates/rle/src/rle_tree/node.rs b/crates/rle/src/rle_tree/node.rs index 0a1113f7..b9a357cf 100644 --- a/crates/rle/src/rle_tree/node.rs +++ b/crates/rle/src/rle_tree/node.rs @@ -1,8 +1,14 @@ -use std::marker::{PhantomData, PhantomPinned}; +use std::{ + marker::{PhantomData, PhantomPinned}, + pin::Pin, + ptr::NonNull, +}; use crate::Rle; -use super::{tree_trait::RleTreeTrait, BumpVec, RleTreeRaw}; +use super::{ + fixed_size_vec::FixedSizedVec, tree_trait::RleTreeTrait, BumpBox, BumpVec, RleTreeRaw, +}; use bumpalo::Bump; use enum_as_inner::EnumAsInner; mod internal_impl; @@ -10,15 +16,16 @@ mod leaf_impl; #[derive(Debug, EnumAsInner)] pub enum Node<'a, T: Rle, A: RleTreeTrait> { - Internal(InternalNode<'a, T, A>), - Leaf(LeafNode<'a, T, A>), + Internal(BumpBox<'a, InternalNode<'a, T, A>>), + Leaf(BumpBox<'a, LeafNode<'a, T, A>>), } #[derive(Debug)] pub struct InternalNode<'a, T: Rle, A: RleTreeTrait> { bump: &'a Bump, - parent: Option<&'a InternalNode<'a, T, A>>, - children: BumpVec<'a, Node<'a, T, A>>, + parent: Option>>, + children: FixedSizedVec<'a, Node<'a, T, A>>, + cache: A::InternalCache, _pin: PhantomPinned, _a: PhantomData, } @@ -26,19 +33,20 @@ pub struct InternalNode<'a, T: Rle, A: RleTreeTrait> { #[derive(Debug)] pub struct LeafNode<'a, T: Rle, A: RleTreeTrait> { bump: &'a Bump, - parent: &'a InternalNode<'a, T, A>, - children: BumpVec<'a, T>, - prev: Option<&'a LeafNode<'a, T, A>>, - next: Option<&'a LeafNode<'a, T, A>>, + parent: NonNull>, + children: FixedSizedVec<'a, T>, + prev: Option>>, + next: Option>>, _pin: PhantomPinned, _a: PhantomData, } impl<'a, T: Rle, A: RleTreeTrait> Node<'a, T, A> { - pub(super) fn insert(&mut self, index: A::Int, value: T) { - match self { - Node::Internal(node) => {} - Node::Leaf(node) => {} - } + fn new_internal(bump: &'a Bump) -> Self { + Self::Internal(BumpBox::new_in(InternalNode::new(bump, None), bump)) + } + + fn new_leaf(bump: &'a Bump, parent: NonNull>) -> Self { + Self::Leaf(BumpBox::new_in(LeafNode::new(bump, parent), bump)) } } diff --git a/crates/rle/src/rle_tree/node/internal_impl.rs b/crates/rle/src/rle_tree/node/internal_impl.rs index 68db1e2f..7f786cc0 100644 --- a/crates/rle/src/rle_tree/node/internal_impl.rs +++ b/crates/rle/src/rle_tree/node/internal_impl.rs @@ -1,13 +1,74 @@ use super::*; impl<'a, T: Rle, A: RleTreeTrait> InternalNode<'a, T, A> { - pub fn new(bump: &'a Bump) -> Self { + pub fn new(bump: &'a Bump, parent: Option>) -> Self { Self { bump, - parent: None, - children: BumpVec::with_capacity_in(A::max_children(), bump), + parent, + children: FixedSizedVec::with_capacity(A::MAX_CHILDREN_NUM, bump), + cache: Default::default(), _pin: PhantomPinned, _a: PhantomData, } } + + #[inline] + fn _split(&mut self) -> Self { + let mut ans = Self::new(self.bump, self.parent); + for i in 0..A::MIN_CHILDREN_NUM { + ans.children.push(self.children.pop().unwrap()); + } + + ans + } + + pub fn insert(&mut self, index: A::Int, value: T) -> Result<(), Self> { + if self.children.len() == 0 { + debug_assert!(self.parent.is_none()); + let ptr = NonNull::new(self as *mut _).unwrap(); + self.children.push(Node::new_leaf(self.bump, ptr)); + return Ok(()); + } + + let insert_pos = A::find_insert_pos_internal(self, index); + let child = &mut self.children[insert_pos]; + let new = match child { + Node::Internal(child) => { + if let Err(new) = child.insert(index, value) { + let new = Node::Internal(BumpBox::new_in(new, self.bump)); + Some(new) + } else { + None + } + } + Node::Leaf(child) => { + if let Err(new) = child.insert(index, value) { + let new = Node::Leaf(BumpBox::new_in(new, self.bump)); + Some(new) + } else { + None + } + } + }; + + if let Some(new) = new { + if self.children.len() == A::MAX_CHILDREN_NUM { + let mut ans = self._split(); + if insert_pos <= self.children.len() { + self.children.insert(insert_pos, new); + } else { + ans.children.insert(insert_pos - self.children.len(), new); + } + + A::update_cache_internal(self); + A::update_cache_internal(&mut ans); + return Err(ans); + } + + self.children.insert(insert_pos, new); + A::update_cache_internal(self); + } + + Ok(()) + } } diff --git a/crates/rle/src/rle_tree/node/leaf_impl.rs b/crates/rle/src/rle_tree/node/leaf_impl.rs index 5109bc4c..9b7729b0 100644 --- a/crates/rle/src/rle_tree/node/leaf_impl.rs +++ b/crates/rle/src/rle_tree/node/leaf_impl.rs @@ -1,15 +1,117 @@ use super::*; impl<'a, T: Rle, A: RleTreeTrait> LeafNode<'a, T, A> { - pub fn new(bump: &'a Bump, parent: &'a InternalNode<'a, T, A>) -> Self { + #[inline] + pub fn new(bump: &'a Bump, parent: NonNull>) -> Self { Self { bump, parent, - children: BumpVec::with_capacity_in(A::max_children(), bump), + children: FixedSizedVec::with_capacity(A::MAX_CHILDREN_NUM, bump), prev: None, next: None, _pin: PhantomPinned, _a: PhantomData, } } + + #[inline] + fn _split(&mut self) -> Self { + let mut ans = Self::new(self.bump, self.parent); + for i in 0..A::MIN_CHILDREN_NUM { + ans.children.push(self.children.pop().unwrap()); + } + ans.next = self.next; + ans.prev = Some(NonNull::new(self).unwrap()); + self.next = Some(NonNull::new(&mut ans).unwrap()); + ans + } + + pub fn push_child(&mut self, value: T) -> Result<(), Self> { + if self.children.len() > 0 { + let last = self.children.last_mut().unwrap(); + if last.is_mergable(&value, &()) { + last.merge(&value, &()); + A::update_cache_leaf(self); + return Ok(()); + } + } + + if self.children.len() == A::MAX_CHILDREN_NUM { + let mut ans = self._split(); + ans.push_child(value); + A::update_cache_leaf(self); + A::update_cache_leaf(&mut ans); + return Err(ans); + } + + self.children.push(value); + A::update_cache_leaf(self); + Ok(()) + } + + pub fn insert(&mut self, raw_index: A::Int, value: T) -> Result<(), Self> { + if self.children.len() == 0 { + self.children.push(value); + return Ok(()); + } + + let (mut index, mut offset) = A::find_insert_pos_leaf(self, raw_index); + let prev = if offset == 0 { + Some(&mut self.children[index - 1]) + } else if offset == self.children[index].len() { + index += 1; + offset = 0; + Some(&mut self.children[index - 1]) + } else { + None + }; + + if let Some(prev) = prev { + // clean cut, should no split + if prev.is_mergable(&value, &()) { + prev.merge(&value, &()); + A::update_cache_leaf(self); + return Ok(()); + } + + if self.children.len() == A::MAX_CHILDREN_NUM { + let mut ans = self._split(); + if index <= self.children.len() { + self.children.insert(index, value); + } else { + ans.children.insert(index - self.children.len(), value); + } + + A::update_cache_leaf(self); + A::update_cache_leaf(&mut ans); + return Err(ans); + } else { + self.children.insert(index, value); + A::update_cache_leaf(self); + return Ok(()); + } + } + + // need to split child + let a = self.children[index].slice(0, offset); + let b = self.children[index].slice(offset, self.children[index].len()); + self.children[index] = a; + + if self.children.len() == A::MAX_CHILDREN_NUM { + let mut ans = self._split(); + if index < self.children.len() { + self.children.insert(index + 1, b); + } else { + ans.children.insert(index - self.children.len() + 1, b); + } + + A::update_cache_leaf(self); + A::update_cache_leaf(&mut ans); + return Err(ans); + } + + self.children.insert(index + 1, value); + A::update_cache_leaf(self); + Ok(()) + } } diff --git a/crates/rle/src/rle_tree/tree_trait.rs b/crates/rle/src/rle_tree/tree_trait.rs index 67012abb..2bbc10f7 100644 --- a/crates/rle/src/rle_tree/tree_trait.rs +++ b/crates/rle/src/rle_tree/tree_trait.rs @@ -1,19 +1,19 @@ +use std::fmt::Debug; + use crate::Rle; -use super::node::{InternalNode, Node}; +use super::node::{InternalNode, LeafNode, Node}; pub trait RleTreeTrait: Sized { - type Int: num::Integer; - type InternalCache; + const MAX_CHILDREN_NUM: usize; + const MIN_CHILDREN_NUM: usize = Self::MAX_CHILDREN_NUM / 2; + type Int: num::Integer + Copy; + type InternalCache: Default + Debug; - fn update_cache(); - fn min_children() -> usize; - - #[inline] - fn max_children() -> usize { - Self::min_children() * 2 - } - - fn before_insert_internal(node: InternalNode<'_, T, Self>); - fn find_insert_pos_internal(node: InternalNode<'_, T, Self>, index: Self::Int) -> usize; + fn update_cache_leaf(node: &mut LeafNode<'_, T, Self>); + fn update_cache_internal(node: &mut InternalNode<'_, T, Self>); + fn find_insert_pos_internal(node: &mut InternalNode<'_, T, Self>, index: Self::Int) -> usize; + /// returns (index, offset) + /// if 0 < offset < children[index].len(), we need to split the node + fn find_insert_pos_leaf(node: &mut LeafNode<'_, T, Self>, index: Self::Int) -> (usize, usize); }