feat: rle tree insert

This commit is contained in:
Zixuan Chen 2022-08-09 21:25:24 +08:00
parent 80ea31883e
commit 028e3ba3f9
8 changed files with 297 additions and 57 deletions

View file

@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bumpalo = { version = "3.10.0", features = ["collections"] }
bumpalo = { version = "3.10.0", features = ["collections", "boxed"] }
num = "0.4.0"
enum-as-inner = "0.5.1"
owning_ref = "0.4.1"

View file

@ -1,3 +1,4 @@
#![allow(unused)]
//! Run length encoding library.
//!
//! There are many mergeable types. By merging them together we can get a more compact representation of the data.

View file

@ -1,3 +1,4 @@
pub(self) use bumpalo::boxed::Box as BumpBox;
pub(self) use bumpalo::collections::vec::Vec as BumpVec;
use owning_ref::OwningRefMut;
use std::marker::{PhantomData, PhantomPinned};
@ -8,6 +9,7 @@ use tree_trait::RleTreeTrait;
use self::node::{InternalNode, Node};
mod fixed_size_vec;
mod node;
mod tree_trait;
@ -23,11 +25,11 @@ pub struct RleTreeRaw<'a, T: Rle, A: RleTreeTrait<T>> {
type TreeRef<T, A> =
OwningRefMut<Box<(Box<Bump>, RleTreeRaw<'static, T, A>)>, RleTreeRaw<'static, T, A>>;
pub struct RleTreeCreator<T: Rle + 'static, A: RleTreeTrait<T> + 'static> {
pub struct RleTree<T: Rle + 'static, A: RleTreeTrait<T> + 'static> {
tree: TreeRef<T, A>,
}
impl<T: Rle + 'static, A: RleTreeTrait<T> + 'static> RleTreeCreator<T, A> {
impl<T: Rle + 'static, A: RleTreeTrait<T> + 'static> RleTree<T, A> {
pub fn new() -> Self {
let bump = Box::new(Bump::new());
let tree = RleTreeRaw::new(unsafe { &*(&*bump as *const _) });
@ -50,30 +52,38 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> RleTreeRaw<'a, T, A> {
fn new(bump: &'a Bump) -> Self {
Self {
bump,
node: Node::Internal(InternalNode::new(bump)),
node: Node::Internal(BumpBox::new_in(InternalNode::new(bump, None), bump)),
_pin: PhantomPinned,
_a: PhantomData,
}
}
fn insert(&mut self, index: A::Int, value: T) {
self.node.insert(index, value);
#[inline]
pub fn insert(&mut self, index: A::Int, value: T) {
match self.node {
Node::Internal(ref mut node) => {
node.insert(index, value);
}
_ => {
unreachable!()
}
}
}
/// return a cursor to the tree
fn get(&self, index: A::Int) {
pub fn get(&self, index: A::Int) {
todo!()
}
fn iter(&self) {
pub fn iter(&self) {
todo!()
}
fn delete_range(&mut self, from: A::Int, to: A::Int) {
pub fn delete_range(&mut self, from: A::Int, to: A::Int) {
todo!()
}
fn iter_range(&self, from: A::Int, to: A::Int) {
pub fn iter_range(&self, from: A::Int, to: A::Int) {
todo!()
}
@ -91,29 +101,35 @@ fn test() {
struct Trait;
impl RleTreeTrait<Range<usize>> for Trait {
const MAX_CHILDREN_NUM: usize = 4;
type Int = usize;
type InternalCache = ();
fn update_cache() {
todo!()
}
fn min_children() -> usize {
5
}
fn before_insert_internal(_: InternalNode<'_, Range<usize>, Self>) {
todo!()
}
fn find_insert_pos_internal(
_: InternalNode<'_, Range<usize>, Self>,
_: &mut InternalNode<'_, Range<usize>, Self>,
_: Self::Int,
) -> usize {
todo!()
}
const MIN_CHILDREN_NUM: usize = Self::MAX_CHILDREN_NUM / 2;
fn update_cache_leaf(node: &mut node::LeafNode<'_, Range<usize>, Self>) {
todo!()
}
fn update_cache_internal(node: &mut InternalNode<'_, Range<usize>, Self>) {
todo!()
}
fn find_insert_pos_leaf(
node: &mut node::LeafNode<'_, Range<usize>, Self>,
index: Self::Int,
) -> (usize, usize) {
todo!()
}
}
let mut t: RleTreeCreator<Range<usize>, Trait> = RleTreeCreator::new();
let mut t: RleTree<Range<usize>, Trait> = RleTree::new();
let tree = t.get_mut();
tree.insert(10, 0..5);
}

View file

@ -0,0 +1,52 @@
use super::Bump;
use super::BumpVec;
use std::marker::PhantomPinned;
use std::ops::Deref;
use std::ops::DerefMut;
#[derive(Debug)]
pub(super) struct FixedSizedVec<'a, T> {
data: BumpVec<'a, T>,
_pin: PhantomPinned,
}
impl<'a, T> FixedSizedVec<'a, T> {
#[inline]
pub(super) fn with_capacity(capacity: usize, bump: &'a Bump) -> Self {
Self {
data: BumpVec::with_capacity_in(capacity, bump),
_pin: PhantomPinned,
}
}
#[inline]
pub(super) fn push(&mut self, value: T) {
debug_assert!(self.data.len() < self.data.capacity());
self.data.push(value);
}
#[inline]
pub(super) fn insert(&mut self, index: usize, value: T) {
debug_assert!(self.data.len() < self.data.capacity());
self.data.insert(index, value);
}
#[inline]
pub(super) fn pop(&mut self) -> Option<T> {
self.data.pop()
}
}
impl<'a, T> Deref for FixedSizedVec<'a, T> {
type Target = [T];
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<'a, T> DerefMut for FixedSizedVec<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}

View file

@ -1,8 +1,14 @@
use std::marker::{PhantomData, PhantomPinned};
use std::{
marker::{PhantomData, PhantomPinned},
pin::Pin,
ptr::NonNull,
};
use crate::Rle;
use super::{tree_trait::RleTreeTrait, BumpVec, RleTreeRaw};
use super::{
fixed_size_vec::FixedSizedVec, tree_trait::RleTreeTrait, BumpBox, BumpVec, RleTreeRaw,
};
use bumpalo::Bump;
use enum_as_inner::EnumAsInner;
mod internal_impl;
@ -10,15 +16,16 @@ mod leaf_impl;
#[derive(Debug, EnumAsInner)]
pub enum Node<'a, T: Rle, A: RleTreeTrait<T>> {
Internal(InternalNode<'a, T, A>),
Leaf(LeafNode<'a, T, A>),
Internal(BumpBox<'a, InternalNode<'a, T, A>>),
Leaf(BumpBox<'a, LeafNode<'a, T, A>>),
}
#[derive(Debug)]
pub struct InternalNode<'a, T: Rle, A: RleTreeTrait<T>> {
bump: &'a Bump,
parent: Option<&'a InternalNode<'a, T, A>>,
children: BumpVec<'a, Node<'a, T, A>>,
parent: Option<NonNull<InternalNode<'a, T, A>>>,
children: FixedSizedVec<'a, Node<'a, T, A>>,
cache: A::InternalCache,
_pin: PhantomPinned,
_a: PhantomData<A>,
}
@ -26,19 +33,20 @@ pub struct InternalNode<'a, T: Rle, A: RleTreeTrait<T>> {
#[derive(Debug)]
pub struct LeafNode<'a, T: Rle, A: RleTreeTrait<T>> {
bump: &'a Bump,
parent: &'a InternalNode<'a, T, A>,
children: BumpVec<'a, T>,
prev: Option<&'a LeafNode<'a, T, A>>,
next: Option<&'a LeafNode<'a, T, A>>,
parent: NonNull<InternalNode<'a, T, A>>,
children: FixedSizedVec<'a, T>,
prev: Option<NonNull<LeafNode<'a, T, A>>>,
next: Option<NonNull<LeafNode<'a, T, A>>>,
_pin: PhantomPinned,
_a: PhantomData<A>,
}
impl<'a, T: Rle, A: RleTreeTrait<T>> Node<'a, T, A> {
pub(super) fn insert(&mut self, index: A::Int, value: T) {
match self {
Node::Internal(node) => {}
Node::Leaf(node) => {}
}
fn new_internal(bump: &'a Bump) -> Self {
Self::Internal(BumpBox::new_in(InternalNode::new(bump, None), bump))
}
fn new_leaf(bump: &'a Bump, parent: NonNull<InternalNode<'a, T, A>>) -> Self {
Self::Leaf(BumpBox::new_in(LeafNode::new(bump, parent), bump))
}
}

View file

@ -1,13 +1,74 @@
use super::*;
impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
pub fn new(bump: &'a Bump) -> Self {
pub fn new(bump: &'a Bump, parent: Option<NonNull<Self>>) -> Self {
Self {
bump,
parent: None,
children: BumpVec::with_capacity_in(A::max_children(), bump),
parent,
children: FixedSizedVec::with_capacity(A::MAX_CHILDREN_NUM, bump),
cache: Default::default(),
_pin: PhantomPinned,
_a: PhantomData,
}
}
#[inline]
fn _split(&mut self) -> Self {
let mut ans = Self::new(self.bump, self.parent);
for i in 0..A::MIN_CHILDREN_NUM {
ans.children.push(self.children.pop().unwrap());
}
ans
}
pub fn insert(&mut self, index: A::Int, value: T) -> Result<(), Self> {
if self.children.len() == 0 {
debug_assert!(self.parent.is_none());
let ptr = NonNull::new(self as *mut _).unwrap();
self.children.push(Node::new_leaf(self.bump, ptr));
return Ok(());
}
let insert_pos = A::find_insert_pos_internal(self, index);
let child = &mut self.children[insert_pos];
let new = match child {
Node::Internal(child) => {
if let Err(new) = child.insert(index, value) {
let new = Node::Internal(BumpBox::new_in(new, self.bump));
Some(new)
} else {
None
}
}
Node::Leaf(child) => {
if let Err(new) = child.insert(index, value) {
let new = Node::Leaf(BumpBox::new_in(new, self.bump));
Some(new)
} else {
None
}
}
};
if let Some(new) = new {
if self.children.len() == A::MAX_CHILDREN_NUM {
let mut ans = self._split();
if insert_pos <= self.children.len() {
self.children.insert(insert_pos, new);
} else {
ans.children.insert(insert_pos - self.children.len(), new);
}
A::update_cache_internal(self);
A::update_cache_internal(&mut ans);
return Err(ans);
}
self.children.insert(insert_pos, new);
A::update_cache_internal(self);
}
Ok(())
}
}

View file

@ -1,15 +1,117 @@
use super::*;
impl<'a, T: Rle, A: RleTreeTrait<T>> LeafNode<'a, T, A> {
pub fn new(bump: &'a Bump, parent: &'a InternalNode<'a, T, A>) -> Self {
#[inline]
pub fn new(bump: &'a Bump, parent: NonNull<InternalNode<'a, T, A>>) -> Self {
Self {
bump,
parent,
children: BumpVec::with_capacity_in(A::max_children(), bump),
children: FixedSizedVec::with_capacity(A::MAX_CHILDREN_NUM, bump),
prev: None,
next: None,
_pin: PhantomPinned,
_a: PhantomData,
}
}
#[inline]
fn _split(&mut self) -> Self {
let mut ans = Self::new(self.bump, self.parent);
for i in 0..A::MIN_CHILDREN_NUM {
ans.children.push(self.children.pop().unwrap());
}
ans.next = self.next;
ans.prev = Some(NonNull::new(self).unwrap());
self.next = Some(NonNull::new(&mut ans).unwrap());
ans
}
pub fn push_child(&mut self, value: T) -> Result<(), Self> {
if self.children.len() > 0 {
let last = self.children.last_mut().unwrap();
if last.is_mergable(&value, &()) {
last.merge(&value, &());
A::update_cache_leaf(self);
return Ok(());
}
}
if self.children.len() == A::MAX_CHILDREN_NUM {
let mut ans = self._split();
ans.push_child(value);
A::update_cache_leaf(self);
A::update_cache_leaf(&mut ans);
return Err(ans);
}
self.children.push(value);
A::update_cache_leaf(self);
Ok(())
}
pub fn insert(&mut self, raw_index: A::Int, value: T) -> Result<(), Self> {
if self.children.len() == 0 {
self.children.push(value);
return Ok(());
}
let (mut index, mut offset) = A::find_insert_pos_leaf(self, raw_index);
let prev = if offset == 0 {
Some(&mut self.children[index - 1])
} else if offset == self.children[index].len() {
index += 1;
offset = 0;
Some(&mut self.children[index - 1])
} else {
None
};
if let Some(prev) = prev {
// clean cut, should no split
if prev.is_mergable(&value, &()) {
prev.merge(&value, &());
A::update_cache_leaf(self);
return Ok(());
}
if self.children.len() == A::MAX_CHILDREN_NUM {
let mut ans = self._split();
if index <= self.children.len() {
self.children.insert(index, value);
} else {
ans.children.insert(index - self.children.len(), value);
}
A::update_cache_leaf(self);
A::update_cache_leaf(&mut ans);
return Err(ans);
} else {
self.children.insert(index, value);
A::update_cache_leaf(self);
return Ok(());
}
}
// need to split child
let a = self.children[index].slice(0, offset);
let b = self.children[index].slice(offset, self.children[index].len());
self.children[index] = a;
if self.children.len() == A::MAX_CHILDREN_NUM {
let mut ans = self._split();
if index < self.children.len() {
self.children.insert(index + 1, b);
} else {
ans.children.insert(index - self.children.len() + 1, b);
}
A::update_cache_leaf(self);
A::update_cache_leaf(&mut ans);
return Err(ans);
}
self.children.insert(index + 1, value);
A::update_cache_leaf(self);
Ok(())
}
}

View file

@ -1,19 +1,19 @@
use std::fmt::Debug;
use crate::Rle;
use super::node::{InternalNode, Node};
use super::node::{InternalNode, LeafNode, Node};
pub trait RleTreeTrait<T: Rle>: Sized {
type Int: num::Integer;
type InternalCache;
const MAX_CHILDREN_NUM: usize;
const MIN_CHILDREN_NUM: usize = Self::MAX_CHILDREN_NUM / 2;
type Int: num::Integer + Copy;
type InternalCache: Default + Debug;
fn update_cache();
fn min_children() -> usize;
#[inline]
fn max_children() -> usize {
Self::min_children() * 2
}
fn before_insert_internal(node: InternalNode<'_, T, Self>);
fn find_insert_pos_internal(node: InternalNode<'_, T, Self>, index: Self::Int) -> usize;
fn update_cache_leaf(node: &mut LeafNode<'_, T, Self>);
fn update_cache_internal(node: &mut InternalNode<'_, T, Self>);
fn find_insert_pos_internal(node: &mut InternalNode<'_, T, Self>, index: Self::Int) -> usize;
/// returns (index, offset)
/// if 0 < offset < children[index].len(), we need to split the node
fn find_insert_pos_leaf(node: &mut LeafNode<'_, T, Self>, index: Self::Int) -> (usize, usize);
}