mirror of
https://github.com/loro-dev/loro.git
synced 2025-01-23 05:24:51 +00:00
399 lines
10 KiB
Rust
399 lines
10 KiB
Rust
use std::{
|
|
ops::{Deref, Range},
|
|
vec,
|
|
};
|
|
|
|
use num::Integer;
|
|
|
|
use crate::{HasLength, Mergable, Slice, Sliceable};
|
|
|
|
/// RleVec<T> is a vector that can be compressed using run-length encoding.
|
|
///
|
|
/// A T value may be merged with its neighbors. When we push new element, the new value
|
|
/// may be merged with the last element in the array. Each value has a length, so there
|
|
/// are two types of indexes:
|
|
/// 1. (merged) It refers to the index of the merged element.
|
|
/// 2. (atom) The index of substantial elements. It refers to the index of the atom element.
|
|
///
|
|
/// By default, we use atom index in RleVec.
|
|
/// - len() returns the number of atom elements in the array.
|
|
/// - get(index) returns the atom element at the index.
|
|
/// - slice(from, to) returns a slice of atom elements from the index from to the index to.
|
|
#[derive(Debug, Clone)]
|
|
pub struct RleVecWithIndex<T, Cfg = ()> {
|
|
vec: Vec<T>,
|
|
atom_len: usize,
|
|
index: Vec<usize>,
|
|
cfg: Cfg,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct SearchResult<'a, T, I: Integer> {
|
|
pub element: &'a T,
|
|
pub merged_index: usize,
|
|
pub offset: I,
|
|
}
|
|
|
|
impl<T: Eq + PartialEq> PartialEq for RleVecWithIndex<T> {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.vec == other.vec
|
|
}
|
|
}
|
|
|
|
impl<T: Eq + PartialEq> Eq for RleVecWithIndex<T> {}
|
|
|
|
impl<T: Mergable<Cfg> + HasLength, Cfg> RleVecWithIndex<T, Cfg> {
|
|
/// push a new element to the end of the array. It may be merged with last element.
|
|
pub fn push(&mut self, value: T) {
|
|
self.atom_len += value.content_len();
|
|
if self.vec.is_empty() {
|
|
self.vec.push(value);
|
|
self.index.push(0);
|
|
self.index.push(self.atom_len);
|
|
return;
|
|
}
|
|
|
|
let last = self.vec.last_mut().unwrap();
|
|
if last.is_mergable(&value, &self.cfg) {
|
|
last.merge(&value, &self.cfg);
|
|
*self.index.last_mut().unwrap() = self.atom_len;
|
|
return;
|
|
}
|
|
self.vec.push(value);
|
|
self.index.push(self.atom_len);
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.vec.is_empty()
|
|
}
|
|
|
|
/// get the element at the given atom index.
|
|
/// return: (element, merged_index, offset)
|
|
pub fn get(&self, index: usize) -> Option<SearchResult<'_, T, usize>> {
|
|
if index > self.atom_len {
|
|
return None;
|
|
}
|
|
|
|
let mut start = self.index.binary_search(&index).unwrap_or_else(|x| x);
|
|
|
|
if index < self.index[start] {
|
|
start -= 1;
|
|
}
|
|
|
|
if start >= self.vec.len() {
|
|
start -= 1;
|
|
}
|
|
|
|
let value = &self.vec[start];
|
|
Some(SearchResult {
|
|
element: value,
|
|
merged_index: start,
|
|
offset: index - self.index[start],
|
|
})
|
|
}
|
|
|
|
/// get a slice from `from` to `to` with atom indexes
|
|
pub fn slice_iter(&self, from: usize, to: usize) -> SliceIterator<'_, T> {
|
|
if from == to || self.merged_len() == 0 {
|
|
return SliceIterator::new_empty();
|
|
}
|
|
|
|
let from_result = self.get(from);
|
|
if from_result.is_none() {
|
|
return SliceIterator::new_empty();
|
|
}
|
|
|
|
let from_result = from_result.unwrap();
|
|
let to_result = if to == self.atom_len {
|
|
None
|
|
} else {
|
|
self.get(to)
|
|
};
|
|
if let Some(to_result) = to_result {
|
|
SliceIterator {
|
|
vec: &self.vec,
|
|
cur_index: from_result.merged_index,
|
|
cur_offset: from_result.offset,
|
|
end_index: Some(to_result.merged_index),
|
|
end_offset: Some(to_result.offset),
|
|
}
|
|
} else {
|
|
SliceIterator {
|
|
vec: &self.vec,
|
|
cur_index: from_result.merged_index,
|
|
cur_offset: from_result.offset,
|
|
end_index: None,
|
|
end_offset: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn slice_merged(&self, range: Range<usize>) -> &[T] {
|
|
&self.vec[range]
|
|
}
|
|
}
|
|
|
|
impl<T, Conf: Default> RleVecWithIndex<T, Conf> {
|
|
pub fn new() -> Self {
|
|
RleVecWithIndex {
|
|
vec: Vec::new(),
|
|
atom_len: 0,
|
|
index: Vec::new(),
|
|
cfg: Default::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T, Cfg> RleVecWithIndex<T, Cfg> {
|
|
pub fn new_with_conf(cfg: Cfg) -> Self {
|
|
RleVecWithIndex {
|
|
vec: Vec::new(),
|
|
atom_len: 0,
|
|
index: Vec::new(),
|
|
cfg,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T, Conf> RleVecWithIndex<T, Conf> {
|
|
pub fn with_capacity(&mut self, capacity: usize) -> &mut Self {
|
|
self.vec.reserve(capacity);
|
|
self.index.reserve(capacity + 1);
|
|
self
|
|
}
|
|
}
|
|
|
|
impl<T: Mergable<Conf> + HasLength, Conf: Default> From<Vec<T>> for RleVecWithIndex<T, Conf> {
|
|
fn from(vec: Vec<T>) -> Self {
|
|
let mut ans: RleVecWithIndex<T, Conf> = RleVecWithIndex::new();
|
|
ans.with_capacity(vec.len());
|
|
for v in vec {
|
|
ans.push(v);
|
|
}
|
|
ans
|
|
}
|
|
}
|
|
|
|
impl<T, Conf> RleVecWithIndex<T, Conf> {
|
|
#[inline]
|
|
pub fn new_cfg(cfg: Conf) -> Self {
|
|
RleVecWithIndex {
|
|
vec: Vec::new(),
|
|
atom_len: 0,
|
|
index: Vec::new(),
|
|
cfg,
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn merged_len(&self) -> usize {
|
|
self.vec.len()
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn to_vec(self) -> Vec<T> {
|
|
self.vec
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn vec(&self) -> &Vec<T> {
|
|
&self.vec
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn iter(&self) -> std::slice::Iter<'_, T> {
|
|
self.vec.iter()
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn vec_mut(&mut self) -> &mut Vec<T> {
|
|
&mut self.vec
|
|
}
|
|
|
|
#[inline(always)]
|
|
pub fn get_merged(&self, index: usize) -> Option<&T> {
|
|
self.vec.get(index)
|
|
}
|
|
}
|
|
|
|
impl<T, Cfg> IntoIterator for RleVecWithIndex<T, Cfg> {
|
|
type Item = T;
|
|
|
|
type IntoIter = vec::IntoIter<T>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
self.vec.into_iter()
|
|
}
|
|
}
|
|
|
|
impl<T> Default for RleVecWithIndex<T> {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl<T: Mergable<Cfg> + HasLength, Cfg: Default> FromIterator<T> for RleVecWithIndex<T, Cfg> {
|
|
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
|
|
let mut vec = RleVecWithIndex::new_with_conf(Default::default());
|
|
for item in iter {
|
|
vec.push(item);
|
|
}
|
|
vec
|
|
}
|
|
}
|
|
|
|
pub struct SliceIterator<'a, T> {
|
|
pub(super) vec: &'a [T],
|
|
pub(super) cur_index: usize,
|
|
pub(super) cur_offset: usize,
|
|
pub(super) end_index: Option<usize>,
|
|
pub(super) end_offset: Option<usize>,
|
|
}
|
|
|
|
impl<'a, T> SliceIterator<'a, T> {
|
|
pub(super) fn new_empty() -> Self {
|
|
Self {
|
|
vec: &[],
|
|
cur_index: 0,
|
|
cur_offset: 0,
|
|
end_index: None,
|
|
end_offset: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
|
|
type Item = Slice<'a, T>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.vec.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let end_index = self.end_index.unwrap_or(self.vec.len() - 1);
|
|
if self.cur_index == end_index {
|
|
let elem = &self.vec[self.cur_index];
|
|
let end = self.end_offset.unwrap_or_else(|| elem.atom_len());
|
|
if self.cur_offset == end {
|
|
return None;
|
|
}
|
|
|
|
let ans = Slice {
|
|
value: elem,
|
|
start: self.cur_offset,
|
|
end,
|
|
};
|
|
self.cur_offset = end;
|
|
return Some(ans);
|
|
}
|
|
|
|
let ans = Slice {
|
|
value: &self.vec[self.cur_index],
|
|
start: self.cur_offset,
|
|
end: self.vec[self.cur_index].atom_len(),
|
|
};
|
|
|
|
self.cur_index += 1;
|
|
self.cur_offset = 0;
|
|
Some(ans)
|
|
}
|
|
}
|
|
|
|
impl<T: Mergable<Cfg> + HasLength + Sliceable + Clone, Cfg> Mergable<Cfg>
|
|
for RleVecWithIndex<T, Cfg>
|
|
{
|
|
fn is_mergable(&self, _: &Self, _: &Cfg) -> bool {
|
|
true
|
|
}
|
|
|
|
fn merge(&mut self, other: &Self, _: &Cfg) {
|
|
for item in other.vec.iter() {
|
|
self.push(item.clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T: Mergable<Cfg> + HasLength + Sliceable, Cfg: Clone> Sliceable for RleVecWithIndex<T, Cfg> {
|
|
fn slice(&self, start: usize, end: usize) -> Self {
|
|
let mut ans = RleVecWithIndex::new_with_conf(self.cfg.clone());
|
|
for value in self.slice_iter(start, end).map(|x| x.into_inner()) {
|
|
ans.push(value);
|
|
}
|
|
|
|
ans
|
|
}
|
|
}
|
|
|
|
impl<T, Cfg> HasLength for RleVecWithIndex<T, Cfg> {
|
|
fn content_len(&self) -> usize {
|
|
self.atom_len
|
|
}
|
|
|
|
fn atom_len(&self) -> usize {
|
|
self.atom_len
|
|
}
|
|
}
|
|
|
|
impl<T, Cfg> Deref for RleVecWithIndex<T, Cfg> {
|
|
type Target = [T];
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
self.vec()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
mod prime_value {
|
|
use crate::{Mergable, RleVecWithIndex, Sliceable};
|
|
|
|
impl Mergable for String {
|
|
fn is_mergable(&self, _: &Self, _: &()) -> bool {
|
|
self.len() < 8
|
|
}
|
|
|
|
fn merge(&mut self, other: &Self, _: &()) {
|
|
self.push_str(other);
|
|
}
|
|
}
|
|
|
|
impl Sliceable for String {
|
|
fn slice(&self, start: usize, end: usize) -> Self {
|
|
self[start..end].to_string()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn get_at_atom_index() {
|
|
let mut vec: RleVecWithIndex<String> = RleVecWithIndex::new();
|
|
vec.push("1234".to_string());
|
|
vec.push("5678".to_string());
|
|
vec.push("12345678".to_string());
|
|
assert_eq!(vec.get(4).unwrap().element, "12345678");
|
|
assert_eq!(vec.get(4).unwrap().merged_index, 0);
|
|
assert_eq!(vec.get(4).unwrap().offset, 4);
|
|
|
|
assert_eq!(vec.get(8).unwrap().element, "12345678");
|
|
assert_eq!(vec.get(8).unwrap().merged_index, 1);
|
|
assert_eq!(vec.get(8).unwrap().offset, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn slice() {
|
|
let mut vec: RleVecWithIndex<String> = RleVecWithIndex::new();
|
|
vec.push("1234".to_string());
|
|
vec.push("56".to_string());
|
|
vec.push("78".to_string());
|
|
vec.push("12345678".to_string());
|
|
let mut iter = vec.slice_iter(4, 12);
|
|
let first = iter.next().unwrap();
|
|
assert_eq!(first.value, "12345678");
|
|
assert_eq!(first.start, 4);
|
|
assert_eq!(first.end, 8);
|
|
let second = iter.next().unwrap();
|
|
assert_eq!(second.value, "12345678");
|
|
assert_eq!(second.start, 0);
|
|
assert_eq!(second.end, 4);
|
|
}
|
|
}
|
|
}
|