perf: binary search for global tree trait

This commit is contained in:
Zixuan Chen 2022-11-10 01:17:22 +08:00
parent 592199ab65
commit 5027da1e2e
3 changed files with 359 additions and 52 deletions

View file

@ -315,6 +315,147 @@ mod test {
use super::Action::*;
use super::*;
#[test]
fn test_16() {
// retreat failed
test_multi_sites(
8,
vec![
Ins {
content: "\0\0\0\0".into(),
pos: 46183951198975,
site: 0,
},
Ins {
content: "hhhh\0\u{15}".into(),
pos: 18446463180827232135,
site: 255,
},
Ins {
content: "\u{1}\0\0\0".into(),
pos: 17872543955649300641,
site: 247,
},
SyncAll,
Ins {
content: "I".into(),
pos: 17144359356472099242,
site: 237,
},
Ins {
content: "\u{1}\0\0\u{14}\0".into(),
pos: 17149707381026848749,
site: 64,
},
Ins {
content: "\0\0".into(),
pos: 576460752320727201,
site: 8,
},
Ins {
content: "\u{8}\u{8}\u{8}".into(),
pos: 74451787075125633,
site: 7,
},
Ins {
content: "\0DD*\u{1}\0\n\0".into(),
pos: 1100718403053,
site: 161,
},
SyncAll,
SyncAll,
Ins {
content: "".into(),
pos: 18444210798919163626,
site: 255,
},
Del {
pos: 7523377975159973992,
len: 7523377975159973992,
site: 104,
},
Del {
pos: 90728552,
len: 7523377975153131520,
site: 104,
},
Del {
pos: 6842472,
len: 8259072,
site: 48,
},
Ins {
content: "0002".into(),
pos: 60868003017,
site: 0,
},
Ins {
content: "\0\0\0\0hhhh\0\u{15}".into(),
pos: 201792722274183,
site: 0,
},
Ins {
content: "\t\0,`".into(),
pos: 2882585236494125450,
site: 136,
},
Ins {
content: "\u{4}\u{4}\u{4}".into(),
pos: 6929914515027398276,
site: 138,
},
Sync { from: 137, to: 4 },
Ins {
content: "\u{4}".into(),
pos: 2879109919149700,
site: 4,
},
Del {
pos: 8680820740569200760,
len: 18446744071435745400,
site: 255,
},
SyncAll,
SyncAll,
Del {
pos: 144116287587483658,
len: 8646911284728217599,
site: 120,
},
Ins {
content: "\0\0SSSSSS\0".into(),
pos: 4755847789890108906,
site: 38,
},
Ins {
content: "\0\u{10}I\u{4}\u{4}\u{4}\u{4}\u{4}\0\0\u{1}\0".into(),
pos: 10768900350821001,
site: 0,
},
Ins {
content: "I\u{4}\u{4}".into(),
pos: 73387494324306057,
site: 0,
},
Ins {
content: "\u{4}\u{4}\u{4}\u{4}\u{4}".into(),
pos: 47890328526783742,
site: 0,
},
Ins {
content: "\0\0\0\0\0\0\0\0\0\0\0\0\0\0".into(),
pos: 792926255917040777,
site: 0,
},
Ins {
content: "\u{4}\0\0\0\0\u{4}".into(),
pos: 3497652043353424901,
site: 48,
},
],
)
}
#[test]
fn test_15() {
// retreat failed

View file

@ -53,7 +53,7 @@ impl<Value: Rle, Index: GlobalIndex> HasIndex for WithIndex<Value, Index> {
}
type RangeMapTrait<Index, Value, TreeArena> =
GlobalTreeTrait<WithIndex<Value, Index>, 16, TreeArena>;
GlobalTreeTrait<WithIndex<Value, Index>, 32, TreeArena>;
#[repr(transparent)]
#[derive(Debug)]

View file

@ -1,4 +1,4 @@
use std::{fmt::Debug, ops::Deref};
use std::{cmp::Ordering, fmt::Debug, ops::Deref};
use bumpalo::Bump;
use num::{traits::AsPrimitive, FromPrimitive, Integer};
@ -42,6 +42,7 @@ impl Position {
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FindPosResult<I> {
pub child_index: usize,
pub offset: I,
@ -279,6 +280,7 @@ fn get_cache<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena>(
}
}
const BINARY_SEARCH_THRESHOLD: usize = 0;
impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T>
for GlobalTreeTrait<T, MAX_CHILD, TreeArena>
{
@ -316,65 +318,225 @@ impl<T: Rle + HasIndex, const MAX_CHILD: usize, TreeArena: Arena> RleTreeTrait<T
node: &InternalNode<'_, T, Self>,
index: Self::Int,
) -> FindPosResult<Self::Int> {
for (i, child) in node.children().iter().enumerate() {
let cache = get_cache(child);
if index <= cache.end {
if index < cache.start {
return FindPosResult::new_not_found(i, index, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == get_cache(&node.children[i + 1]).start
{
return FindPosResult::new(i + 1, index, Position::Start);
}
return FindPosResult::new(i, index, get_pos_global(index, cache));
}
if node.children.is_empty() || index > node.cache.end {
return FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
index,
Position::After,
);
}
FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
index,
Position::After,
)
if index < node.cache.start {
return FindPosResult::new_not_found(0, index, Position::Before);
}
if node.children.len() < BINARY_SEARCH_THRESHOLD {
for (i, child) in node.children().iter().enumerate() {
let cache = get_cache(child);
if index <= cache.end {
if index < cache.start {
return FindPosResult::new_not_found(i, index, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == get_cache(&node.children[i + 1]).start
{
return FindPosResult::new(i + 1, index, Position::Start);
}
return FindPosResult::new(i, index, get_pos_global(index, cache));
}
}
unreachable!()
} else {
let ans = node
.children
.binary_search_by(|x| {
let cache = get_cache(x);
if index < cache.start {
Ordering::Greater
} else if index > cache.end {
Ordering::Less
} else {
Ordering::Equal
}
})
.map_or_else(
|x| {
FindPosResult::new_not_found(
x,
index,
get_pos_global(index, get_cache(&node.children[x])),
)
},
|x| {
FindPosResult::new(
x,
index,
get_pos_global(index, get_cache(&node.children[x])),
)
},
);
if ans.pos == Position::End {
if ans.child_index + 1 < node.children.len()
&& index == get_cache(&node.children[ans.child_index + 1]).start
{
FindPosResult::new(ans.child_index + 1, index, Position::Start)
} else {
ans
}
} else {
ans
}
}
}
fn find_pos_leaf(node: &LeafNode<'_, T, Self>, index: Self::Int) -> FindPosResult<usize> {
for (i, child) in node.children().iter().enumerate() {
let cache = Cache {
start: child.get_start_index(),
end: child.get_end_index(),
};
if index <= cache.end {
if index < cache.start {
return FindPosResult::new_not_found(i, 0, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == node.children[i + 1].get_start_index()
{
return FindPosResult::new(i + 1, 0, Position::Start);
}
return FindPosResult::new(
i,
(index - cache.start).as_(),
get_pos_global(index, cache),
let new = 'new: {
if node.children.is_empty() || index > node.cache.end {
break 'new FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
node.children.last().map(|x| x.atom_len()).unwrap_or(0),
Position::After,
);
}
if index < node.cache.start {
break 'new FindPosResult::new_not_found(0, 0, Position::Before);
}
if node.children.len() < BINARY_SEARCH_THRESHOLD {
for (i, child) in node.children().iter().enumerate() {
let cache = Cache {
start: child.get_start_index(),
end: child.get_end_index(),
};
if index <= cache.end {
if index < cache.start {
break 'new FindPosResult::new_not_found(i, 0, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == node.children[i + 1].get_start_index()
{
break 'new FindPosResult::new(i + 1, 0, Position::Start);
}
break 'new FindPosResult::new(
i,
(index - cache.start).as_(),
get_pos_global(index, cache),
);
}
}
unreachable!()
} else {
let ans = node
.children
.binary_search_by(|x| {
let cache = Cache {
start: x.get_start_index(),
end: x.get_end_index(),
};
if index < cache.start {
Ordering::Greater
} else if index > cache.end {
Ordering::Less
} else {
Ordering::Equal
}
})
.map_or_else(
|x| {
FindPosResult::new_not_found(
x,
0,
get_pos_global(
index,
Cache {
start: node.children[x].get_start_index(),
end: node.children[x].get_end_index(),
},
),
)
},
|x| {
FindPosResult::new(
x,
(index - node.children[x].get_start_index()).as_(),
get_pos_global(
index,
Cache {
start: node.children[x].get_start_index(),
end: node.children[x].get_end_index(),
},
),
)
},
);
if ans.pos == Position::End {
if ans.child_index + 1 < node.children.len()
&& index == node.children[ans.child_index + 1].get_start_index()
{
FindPosResult::new(ans.child_index + 1, 0, Position::Start)
} else {
ans
}
} else {
ans
}
}
};
if cfg!(test) {
let old = 'old: {
if index < node.cache.start {
break 'old FindPosResult::new_not_found(0, 0, Position::Before);
}
for (i, child) in node.children().iter().enumerate() {
let cache = Cache {
start: child.get_start_index(),
end: child.get_end_index(),
};
if index <= cache.end {
if index < cache.start {
break 'old FindPosResult::new_not_found(i, 0, Position::Before);
}
// prefer Start than End
if index == cache.end
&& i + 1 < node.children.len()
&& index == node.children[i + 1].get_start_index()
{
break 'old FindPosResult::new(i + 1, 0, Position::Start);
}
break 'old FindPosResult::new(
i,
(index - cache.start).as_(),
get_pos_global(index, cache),
);
}
}
FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
node.children().last().unwrap().atom_len(),
Position::After,
)
};
assert_eq!(old, new);
}
FindPosResult::new_not_found(
node.children.len().saturating_sub(1),
node.children().last().unwrap().atom_len(),
Position::After,
)
new
}
fn len_leaf(node: &LeafNode<'_, T, Self>) -> Self::Int {
@ -424,6 +586,10 @@ fn get_pos_global<I: Integer>(index: I, cache: Cache<I>) -> Position {
Position::Start
} else if index == cache.end {
Position::End
} else if index < cache.start {
Position::Before
} else if index > cache.end {
Position::After
} else {
Position::Middle
}