fix: tree balance issue

This commit is contained in:
Zixuan Chen 2022-10-12 20:29:56 +08:00
parent 8406b182ae
commit d36c41b7cd
7 changed files with 553 additions and 75 deletions

View file

@ -121,6 +121,7 @@ impl Sliceable for YSpan {
Some(self.id.inc(from as i32 - 1))
};
// origin_right should be the same
let origin_right = self.origin_right;
YSpan {
origin_left,

View file

@ -107,7 +107,6 @@ impl ListCrdt for YataImpl {
fn integrate(container: &mut Self::Container, op: Self::OpUnit) {
container.vv.set_end(op.id.inc(op.len as i32));
// dbg!(&container);
// SAFETY: we know this is safe because in [YataImpl::insert_after] there is no access to shared elements
unsafe { crdt_list::yata::integrate::<Self>(container, op) };
container.check_consistency();
@ -301,42 +300,218 @@ pub mod fuzz {
use Action::*;
#[test]
fn issue_0() {
fn issue_1() {
crdt_list::test::test_with_actions::<YataImpl>(
2,
5,
5,
vec![
vec! [
NewOp {
client_id: 1,
pos: 0,
client_id: 18446743798824736406,
pos: 18446744073699196927,
},
Sync { from: 1, to: 0 },
NewOp {
client_id: 0,
Delete {
client_id: 18446744073709551615,
pos: 0,
len: 7411535208244772857,
},
Delete {
client_id: 18446540664058413055,
pos: 10873349650923257855,
len: 18446603336204419555,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073702670335,
len: 18446744073709486335,
},
Delete {
client_id: 18446744073709551615,
pos: 11719107999768421119,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768945314,
pos: 11719107999768421026,
},
Delete {
client_id: 10851025925718409122,
pos: 540508742418326,
len: 18446504380166307839,
},
Delete {
client_id: 18446744070052118527,
pos: 18446744073709551615,
len: 18446744073709524735,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107997996196514,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11739092723114877602,
pos: 10880594044147376802,
},
Delete {
client_id: 18446743137406648319,
pos: 18446744073709551615,
len: 18446744073702670335,
},
Sync {
from: 18374686479688400896,
to: 18446744073709551615,
},
Delete {
client_id: 11719107999768421119,
pos: 11719107999768421026,
len: 18446744071947943842,
},
Delete {
client_id: 4294967297,
pos: 18446744073709551615,
len: 11719210655348162559,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 18446743672711193250,
},
Delete {
client_id: 11745387828182253567,
pos: 11719107999768421026,
len: 11719107999768421538,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
Delete {
client_id: 18417073299693934335,
pos: 18446744073709551510,
len: 18446744073709551615,
},
Delete {
client_id: 15914838024966373375,
pos: 15914838024376868060,
len: 15914635714237357276,
},
Sync {
from: 18374686479671623680,
to: 18446744073709551615,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073695461375,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073558556672,
pos: 18446744073642442557,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551614,
len: 18446744073709551615,
},
Delete {
client_id: 0,
pos: 0,
len: 2,
len: 0,
},
],
)
}
#[test]
fn issue_1() {
crdt_list::test::test_with_actions::<YataImpl>(
3,
5,
vec![
NewOp {
client_id: 0,
pos: 4,
Sync {
from: 0,
to: 0,
},
Sync {
from: 0,
to: 0,
},
Sync {
from: 0,
to: 0,
},
Sync {
from: 0,
to: 0,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 1099511627775,
},
Sync {
from: 11719107999774539526,
to: 11719107999768421026,
},
NewOp {
client_id: 1,
pos: 4,
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 18446744072143151778,
pos: 18446744073709551615,
},
Delete {
client_id: 17144620962624171493,
pos: 16550640557684026861,
len: 18391177001763530213,
},
Delete {
client_id: 12659530246668681215,
pos: 12659530246663417775,
len: 17144611899198910383,
},
Delete {
client_id: 12659589887623556589,
pos: 4221573655528072677,
len: 18446744073707847679,
},
Delete {
client_id: 12659530246663417775,
pos: 17127101077014949807,
len: 17144620962624171501,
},
],
)
@ -346,12 +521,232 @@ pub mod fuzz {
fn normalize() {
let mut actions = vec![
NewOp {
client_id: 10489325084848624384,
pos: 10490853853016199569,
client_id: 18446743798824736406,
pos: 18446744073709551615,
},
Delete {
client_id: 18446744069431361535,
pos: 18446744073709551615,
len: 18446744073709496575,
},
Delete {
client_id: 255,
pos: 1098353998080,
len: 18446744069414584320,
},
Delete {
client_id: 13093571490658779135,
pos: 18374688556288311293,
len: 12659530248010825727,
},
Delete {
client_id: 18446744073709551535,
pos: 10880696699727118335,
len: 18374967954648334335,
},
Delete {
client_id: 18417189201154932735,
pos: 10880696699727118335,
len: 10851025326177714175,
},
Delete {
client_id: 18402271027389267903,
pos: 18446743150291582975,
len: 18446744073709551615,
},
Sync {
from: 18427322270251745280,
to: 18374686481397256192,
},
Delete {
client_id: 16565928279328900863,
pos: 18374688556672476645,
len: 18446743137406648319,
},
Delete {
client_id: 18417189201154932735,
pos: 18446463698244468735,
len: 18446744073709551615,
},
Delete {
client_id: 11719108400766779391,
pos: 11719107999768421026,
len: 11719107999768421026,
},
NewOp {
client_id: 10490853404242804625,
pos: 10489325061612240659,
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 12872029504375268002,
},
NewOp {
client_id: 18417188748414850710,
pos: 18410715272395746198,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744071947943935,
len: 8573222911,
},
Delete {
client_id: 18446744073709551615,
pos: 11719107999768444927,
len: 16565928279328924322,
},
NewOp {
client_id: 18446603336204419555,
pos: 18446744073709551397,
},
Delete {
client_id: 18446744073702670335,
pos: 18446744073709486335,
len: 18446744073709551615,
},
Delete {
client_id: 11719107999768421119,
pos: 11719107999768421026,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719108000959603362,
},
NewOp {
client_id: 11719107999768421026,
pos: 18446641486849286818,
},
NewOp {
client_id: 136118438245406358,
pos: 18385382526639144704,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 18446744072143151778,
pos: 18446744073709551615,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
Delete {
client_id: 10880580798266119830,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18436853815706648575,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 17798225731663167487,
},
Delete {
client_id: 18446744073642442557,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18446744073709551614,
pos: 11719108400766779391,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 18446744073709527714,
pos: 18446744073709551615,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 18446744073709551522,
},
Delete {
client_id: 18446744073709551400,
pos: 18446744073709524480,
len: 18391293503297552383,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 18446744035054846207,
pos: 72057576858009087,
len: 18383412203949654016,
},
Delete {
client_id: 18446744073709551615,
pos: 18446744073709551615,
len: 18446744073709551615,
},
Delete {
client_id: 11719107999768421026,
pos: 11719107999768421026,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 18446743672711193250,
},
Delete {
client_id: 11745387828182253567,
pos: 11719107999768421026,
len: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
NewOp {
client_id: 11719107999768421026,
pos: 11719107999768421026,
},
];

View file

@ -296,10 +296,6 @@ impl<T: Rle, A: RleTreeTrait<T>> RleTree<T, A> {
for (mut node, updates) in updates_map {
// SAFETY: we has the exclusive reference to the tree and the cursor is valid
let node = unsafe { node.as_mut() };
if updates.is_empty() {
A::update_cache_internal(node);
continue;
}
if let Err(new) = node.apply_updates(updates) {
internal_updates_map
@ -316,6 +312,8 @@ impl<T: Rle, A: RleTreeTrait<T>> RleTree<T, A> {
}
}
}
self.debug_check();
}
pub fn update_range<U, F>(

View file

@ -9,6 +9,7 @@ use crate::Rle;
use super::{cursor::SafeCursor, tree_trait::RleTreeTrait, BumpVec};
use bumpalo::Bump;
use enum_as_inner::EnumAsInner;
mod utils;
mod internal_impl;
mod leaf_impl;
pub(crate) mod node_trait;

View file

@ -3,7 +3,10 @@ use std::{
fmt::{Debug, Error, Formatter},
};
use crate::rle_tree::tree_trait::{FindPosResult, Position};
use crate::rle_tree::{
node::utils::distribute,
tree_trait::{FindPosResult, Position},
};
use super::*;
@ -19,22 +22,29 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
}
}
/// return result need to update cache
#[inline]
fn _split(&mut self) -> &'a mut Node<'a, T, A> {
let ans = self
.bump
.alloc(Node::Internal(Self::new(self.bump, self.parent)));
let inner_ptr = NonNull::new(&mut *ans.as_internal_mut().unwrap()).unwrap();
let inner = ans.as_internal_mut().unwrap();
for child in self
.children
.drain(self.children.len() - A::MIN_CHILDREN_NUM..self.children.len())
{
child.set_parent(inner_ptr);
inner.children.push(child);
self._balance(inner);
ans
}
ans
/// return result need to update cache
#[inline]
fn _balance(&mut self, other: &mut Self) {
let keep_num = (self.children.len() + other.children.len()) / 2;
debug_assert!(keep_num >= A::MIN_CHILDREN_NUM);
for child in self.children.drain(keep_num..) {
child.set_parent(other.into());
other.children.push(child);
}
debug_assert!(self.children.len() >= A::MIN_CHILDREN_NUM);
debug_assert!(other.children.len() >= A::MIN_CHILDREN_NUM);
}
#[inline]
@ -59,6 +69,23 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
}
pub(crate) fn check(&mut self) {
self.check_balance();
self.check_children_parent_link();
for child in self.children.iter_mut() {
match child {
Node::Internal(node) => {
node.check();
}
Node::Leaf(node) => {
node.check();
}
}
}
A::check_cache_internal(self);
}
fn check_balance(&mut self) {
if !self.is_root() {
assert!(
self.children.len() >= A::MIN_CHILDREN_NUM,
@ -66,28 +93,46 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
self.children.len()
);
}
assert!(
self.children.len() <= A::MAX_CHILDREN_NUM,
"children.len() = {}",
self.children.len()
);
}
fn check_balance_recursively(&self) {
if !self.is_root() {
assert!(
self.children.len() >= A::MIN_CHILDREN_NUM,
"children.len() = {}",
self.children.len()
);
}
assert!(
self.children.len() <= A::MAX_CHILDREN_NUM,
"children.len() = {}",
self.children.len()
);
let self_ptr = self as *const _;
for child in self.children.iter_mut() {
match child {
Node::Internal(node) => {
node.check();
assert!(std::ptr::eq(node.parent.unwrap().as_ptr(), self_ptr));
}
Node::Leaf(node) => {
node.check();
assert!(std::ptr::eq(node.parent.as_ptr(), self_ptr));
for child in self.children.iter() {
if let Some(child) = child.as_internal() {
child.check_balance_recursively();
}
}
}
A::check_cache_internal(self);
fn check_children_parent_link(&mut self) {
let self_ptr = self as *const _;
for child in self.children.iter_mut() {
match child {
Node::Internal(node) => {
assert!(std::ptr::eq(node.parent.unwrap().as_ptr(), self_ptr));
}
Node::Leaf(node) => {
assert!(std::ptr::eq(node.parent.as_ptr(), self_ptr));
}
}
}
}
// TODO: simplify this func?
@ -222,12 +267,19 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
&mut self,
mut updates: Vec<(usize, Vec<&'a mut Node<'a, T, A>>)>,
) -> Result<(), Vec<&'a mut Node<'a, T, A>>> {
if updates.is_empty() {
A::update_cache_internal(self);
return Ok(());
}
self.check_balance_recursively();
self.check_children_parent_link();
updates.sort_by_key(|x| x.0);
let mut new_children: Vec<&'a mut Node<'a, T, A>> = Vec::new();
let mut self_children = std::mem::replace(&mut self.children, BumpVec::new_in(self.bump));
let mut last_end = 0;
let mut saved_end = 0;
for (index, replace) in updates {
for child in self_children.drain(0..index - last_end + 1) {
for child in self_children.drain(0..index + 1 - saved_end) {
new_children.push(child);
}
@ -235,7 +287,7 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
new_children.push(element);
}
last_end = index;
saved_end = index + 1;
}
for child in self_children.drain(..) {
@ -252,13 +304,15 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
A::update_cache_internal(self);
Ok(())
} else {
for child in
new_children.drain(0..(std::cmp::min(A::MAX_CHILDREN_NUM, new_children.len())))
{
let children_nums =
distribute(new_children.len(), A::MIN_CHILDREN_NUM, A::MAX_CHILDREN_NUM);
let mut index = 0;
for child in new_children.drain(0..children_nums[index]) {
child.set_parent(self_ptr);
self.children.push(child);
}
index += 1;
A::update_cache_internal(self);
let mut ans_vec = Vec::new();
while !new_children.is_empty() {
@ -266,13 +320,12 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
.bump
.alloc(Node::Internal(InternalNode::new(self.bump, self.parent)));
let new_internal = new_internal_node.as_internal_mut().unwrap();
for child in
new_children.drain(0..(std::cmp::min(A::MAX_CHILDREN_NUM, new_children.len())))
{
for child in new_children.drain(..children_nums[index]) {
child.set_parent(new_internal.into());
new_internal.children.push(child);
}
index += 1;
A::update_cache_internal(new_internal);
ans_vec.push(new_internal_node);
}
@ -280,11 +333,19 @@ impl<'a, T: Rle, A: RleTreeTrait<T>> InternalNode<'a, T, A> {
Err(ans_vec)
};
self.check_children_parent_link();
self.check_balance_recursively();
if result.is_err() && self.is_root() {
let mut new = result.unwrap_err();
assert!(new.len() == 1);
let v = new.pop().unwrap();
self._create_level(v);
let new = new.pop().unwrap();
let inner = new.as_internal_mut().unwrap();
self._balance(inner);
A::update_cache_internal(self);
A::update_cache_internal(inner);
self._create_level(new);
self.check();
self.check_balance_recursively();
Ok(())
} else {
result

View file

@ -4,7 +4,7 @@ use crate::rle_tree::{
};
use std::fmt::{Debug, Error, Formatter};
use super::*;
use super::{utils::distribute, *};
impl<'bump, T: Rle, A: RleTreeTrait<T>> LeafNode<'bump, T, A> {
#[inline]
@ -473,13 +473,15 @@ impl<'bump, T: Rle, A: RleTreeTrait<T>> LeafNode<'bump, T, A> {
A::update_cache_leaf(self);
Ok(())
} else {
for child in
new_children.drain(0..std::cmp::min(A::MAX_CHILDREN_NUM, new_children.len()))
{
let children_nums =
distribute(new_children.len(), A::MIN_CHILDREN_NUM, A::MAX_CHILDREN_NUM);
let mut index = 0;
for child in new_children.drain(..children_nums[index]) {
notify(child, self);
self.children.push(child);
}
index += 1;
A::update_cache_leaf(self);
let mut leaf_vec = Vec::new();
while !new_children.is_empty() {
@ -487,13 +489,12 @@ impl<'bump, T: Rle, A: RleTreeTrait<T>> LeafNode<'bump, T, A> {
.bump
.alloc(Node::Leaf(LeafNode::new(self.bump, self.parent)));
let new_leaf = new_leaf_node.as_leaf_mut().unwrap();
for child in
new_children.drain(0..std::cmp::min(A::MAX_CHILDREN_NUM, new_children.len()))
{
for child in new_children.drain(..children_nums[index]) {
notify(child, new_leaf);
new_leaf.children.push(child);
}
index += 1;
A::update_cache_leaf(new_leaf);
leaf_vec.push(new_leaf_node);
}

View file

@ -0,0 +1,21 @@
/// distribute the num to a array, where the sum of the array is num
/// and each element is in the range [min, max]
pub(super) fn distribute(mut num: usize, min: usize, max: usize) -> Vec<usize> {
debug_assert!(num >= min);
let n = num / min;
let mut arr = vec![min; n];
num -= n * min;
while num > 0 {
for value in arr.iter_mut() {
if num == 0 {
break;
}
if *value < max {
*value += 1;
num -= 1;
}
}
}
arr
}