Refactor rm unused code (#328)

* chore: init

* fix: fuzz config

* refactor: rm unused code
This commit is contained in:
Zixuan Chen 2024-04-22 21:20:00 +08:00 committed by GitHub
parent 4700ead1c1
commit f99bfd8e21
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 136 additions and 853 deletions

18
Cargo.lock generated
View file

@ -175,12 +175,6 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "cast"
version = "0.3.0"
@ -567,6 +561,7 @@ dependencies = [
"enum_dispatch",
"fxhash",
"loro",
"loro-internal",
"tabled 0.10.0",
]
@ -834,7 +829,6 @@ dependencies = [
"js-sys",
"leb128",
"loro-common",
"loro-preload",
"loro-rle",
"md5",
"miniz_oxide",
@ -861,16 +855,6 @@ dependencies = [
"zstd",
]
[[package]]
name = "loro-preload"
version = "0.4.0"
dependencies = [
"bytes",
"loro-common",
"serde",
"serde_columnar",
]
[[package]]
name = "loro-rle"
version = "0.4.0"

View file

@ -6,7 +6,6 @@ members = [
"crates/rle",
"crates/loro-common",
"crates/loro-internal",
"crates/loro-preload",
"crates/loro-wasm",
"crates/fuzz",
]

View file

@ -7,7 +7,8 @@ publish = false
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
loro = { path = "../loro", features = ["test_utils"] }
loro = { path = "../loro" }
loro-internal = { path = "../loro-internal", features = ["test_utils"] }
arbitrary = "1"
tabled = "0.10"
debug-log = { workspace = true }

View file

@ -65,12 +65,6 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
[[package]]
name = "cc"
version = "1.0.86"
@ -217,6 +211,7 @@ dependencies = [
"enum_dispatch",
"fxhash",
"loro",
"loro-internal",
"tabled",
]
@ -360,7 +355,7 @@ dependencies = [
[[package]]
name = "loro"
version = "0.3.0"
version = "0.4.0"
dependencies = [
"either",
"enum-as-inner 0.6.0",
@ -369,7 +364,7 @@ dependencies = [
[[package]]
name = "loro-common"
version = "0.2.0"
version = "0.4.0"
dependencies = [
"arbitrary",
"enum-as-inner 0.6.0",
@ -384,7 +379,7 @@ dependencies = [
[[package]]
name = "loro-internal"
version = "0.3.0"
version = "0.4.0"
dependencies = [
"append-only-bytes",
"arbitrary",
@ -398,7 +393,6 @@ dependencies = [
"itertools",
"leb128",
"loro-common",
"loro-preload",
"loro-rle",
"md5",
"num",
@ -416,19 +410,9 @@ dependencies = [
"tracing",
]
[[package]]
name = "loro-preload"
version = "0.2.0"
dependencies = [
"bytes",
"loro-common",
"serde",
"serde_columnar",
]
[[package]]
name = "loro-rle"
version = "0.2.0"
version = "0.4.0"
dependencies = [
"append-only-bytes",
"arref",
@ -755,9 +739,9 @@ dependencies = [
[[package]]
name = "serde_columnar"
version = "0.3.3"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41a9a14c8a221abb13091da4d1075699999e6a12213283c452680a70376efd0"
checksum = "a5d54dd7e7a1ec134c842f8a3bdb5a1fc662d002682e0457f976f3046cf9ccf8"
dependencies = [
"itertools",
"postcard",
@ -768,9 +752,9 @@ dependencies = [
[[package]]
name = "serde_columnar_derive"
version = "0.3.3"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0f77bad2a9b92970e7e1f8004fac293328ac9a05f92f751ae293644d764ede4"
checksum = "3e5eaacabbc55a397ffbb1ee32523f40f86fdefea8a8d9db19630d8b7c00edd1"
dependencies = [
"darling",
"proc-macro2",

View file

@ -93,7 +93,7 @@ impl Actionable for TreeAction {
match action {
TreeActionInner::Create => {
let id = tree.next_tree_id();
let id = tree.__internal__next_tree_id();
*target = (id.peer, id.counter);
}
TreeActionInner::Delete => {

View file

@ -450,3 +450,44 @@ fn random_fuzz_1s_5sites_1() {
fn random_fuzz_1s_5sites_2() {
arbtest::builder().budget_ms(1000).run(|u| prop(u, 5));
}
#[test]
fn test_unknown() {
test_multi_sites(
5,
vec![
FuzzTarget::Map,
FuzzTarget::List,
FuzzTarget::Tree,
FuzzTarget::Text,
],
&mut [
Handle {
site: 34,
target: 115,
container: 4,
action: Generic(GenericAction {
value: I32(62063364),
bool: false,
key: 771987715,
pos: 217020518514230019,
length: 217234923281646339,
prop: 6234107865851074949,
}),
},
Handle {
site: 3,
target: 3,
container: 0,
action: Generic(GenericAction {
value: I32(0),
bool: false,
key: 0,
pos: 0,
length: 0,
prop: 0,
}),
},
],
)
}

View file

@ -491,62 +491,8 @@ impl From<ID> for IdSpan {
#[cfg(test)]
mod test_id_span {
use rle::RleVecWithIndex;
use super::*;
macro_rules! id_spans {
($([$peer:expr, $from:expr, $to:expr]),*) => {
{
let mut id_spans = RleVecWithIndex::new();
$(
id_spans.push(IdSpan {
peer: $peer,
counter: CounterSpan::new($from, $to),
});
)*
id_spans
}
};
}
#[test]
fn test_id_span_rle_vec() {
let mut id_span_vec = RleVecWithIndex::new();
id_span_vec.push(IdSpan {
peer: 0,
counter: CounterSpan::new(0, 2),
});
assert_eq!(id_span_vec.merged_len(), 1);
assert_eq!(id_span_vec.atom_len(), 2);
id_span_vec.push(IdSpan {
peer: 0,
counter: CounterSpan::new(2, 4),
});
assert_eq!(id_span_vec.merged_len(), 1);
assert_eq!(id_span_vec.atom_len(), 4);
id_span_vec.push(IdSpan {
peer: 2,
counter: CounterSpan::new(2, 4),
});
assert_eq!(id_span_vec.merged_len(), 2);
assert_eq!(id_span_vec.atom_len(), 6);
}
#[test]
fn slice() {
let id_span_vec = id_spans!([0, 0, 2], [0, 2, 4], [2, 2, 4]);
let slice: Vec<IdSpan> = id_span_vec.slice_iter(2, 5).map(|x| x.into()).collect();
assert_eq!(slice, id_spans!([0, 2, 4], [2, 2, 3]).to_vec());
}
#[test]
fn backward() {
let id_span_vec = id_spans!([0, 100, 98], [0, 98, 90], [2, 2, 4], [2, 8, 4]);
let slice: Vec<IdSpan> = id_span_vec.slice_iter(5, 14).map(|x| x.into()).collect();
assert_eq!(slice, id_spans!([0, 95, 90], [2, 2, 4], [2, 8, 6]).to_vec());
}
#[test]
fn merge() {
let mut a = CounterSpan::new(0, 2);

View file

@ -14,7 +14,6 @@ keywords = ["crdt", "local-first"]
[dependencies]
rle = { path = "../rle", version = "0.4.0", package = "loro-rle" }
loro-preload = { path = "../loro-preload", version = "0.4.0" }
loro-common = { path = "../loro-common", version = "0.4.0" }
smallvec = { version = "1.8.0", features = ["serde"] }
postcard = "1"

View file

@ -1,3 +1,5 @@
use std::time::Instant;
use bench_utils::TextAction;
use loro_internal::LoroDoc;
@ -37,8 +39,15 @@ fn main() {
txn.commit().unwrap();
}
let start = Instant::now();
let snapshot = loro.export_snapshot();
println!("Snapshot time {}ms", start.elapsed().as_millis());
let output = miniz_oxide::deflate::compress_to_vec(&snapshot, 6);
println!(
"Snapshot+compression time {}ms",
start.elapsed().as_millis()
);
println!(
"snapshot size {} after compression {}",
snapshot.len(),

View file

@ -2335,8 +2335,8 @@ impl TreeHandler {
}
}
#[cfg(feature = "test_utils")]
pub fn next_tree_id(&self) -> TreeID {
#[allow(non_snake_case)]
pub fn __internal__next_tree_id(&self) -> TreeID {
match &self.inner {
MaybeDetached::Detached(d) => {
let d = d.try_lock().unwrap();

View file

@ -1,19 +0,0 @@
[package]
name = "loro-preload"
version = "0.4.0"
edition = "2021"
license = "MIT"
description = "Loro internal lib for loading data"
documentation = "https://docs.rs/loro/"
homepage = "https://loro.dev"
authors = ["Zixuan Chen", "Liang Zhao"]
categories = ["data-structures", "crdt", "collaborative-editing", "local-first"]
keywords = ["crdt", "local-first"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
serde = { version = "1", features = ["derive"] }
serde_columnar = { workspace = true }
loro-common = { path = "../loro-common", version = "0.4.0" }
bytes = "1.4.0"

View file

@ -1,5 +0,0 @@
# loro-preload
This crate contains a small part of the functionality of the Loro project.
It aims to provide a smaller lib that can be loaded fast first to provide the basic functionality of Loro.
It can decode the Loro binary data and will have basic mechanism to record the ops in Loro.

View file

@ -1,326 +0,0 @@
use bytes::{BufMut, BytesMut};
use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID};
use serde_columnar::{columnar, to_vec, ColumnarError};
use std::borrow::Cow;
use serde::{Deserialize, Serialize};
/// The final phase of the encoding process. It's also the first phase of the decoding process.
///
/// This data structure allows users to only load the state or the oplog.
///
/// - When only the state is needed, the `oplog` and `oplog_extra_arena` can be ignored.
/// - When only the oplog is needed, the `app_state` can be ignored. (state_arena is still needed).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FinalPhase<'a> {
#[serde(borrow)]
pub common: Cow<'a, [u8]>, // -> CommonArena
#[serde(borrow)]
pub app_state: Cow<'a, [u8]>, // -> EncodedAppState
#[serde(borrow)]
pub state_arena: Cow<'a, [u8]>, // -> TempArena<'a>
#[serde(borrow)]
pub oplog_extra_arena: Cow<'a, [u8]>, // -> TempArena<'a>. Cannot have full history if this is dropped
#[serde(borrow)]
pub oplog: Cow<'a, [u8]>, // -> OpLog. Can be ignored if we only need state
}
impl<'a> FinalPhase<'a> {
#[inline(always)]
pub fn encode(&self) -> Vec<u8> {
let mut bytes = BytesMut::with_capacity(
self.common.len()
+ self.app_state.len()
+ self.state_arena.len()
+ self.oplog_extra_arena.len()
+ self.oplog.len()
+ 10,
);
leb::write_unsigned(&mut bytes, self.common.len() as u64);
bytes.put_slice(&self.common);
leb::write_unsigned(&mut bytes, self.app_state.len() as u64);
bytes.put_slice(&self.app_state);
leb::write_unsigned(&mut bytes, self.state_arena.len() as u64);
bytes.put_slice(&self.state_arena);
leb::write_unsigned(&mut bytes, self.oplog_extra_arena.len() as u64);
bytes.put_slice(&self.oplog_extra_arena);
leb::write_unsigned(&mut bytes, self.oplog.len() as u64);
bytes.put_slice(&self.oplog);
bytes.to_vec()
}
#[inline(always)]
pub fn decode(bytes: &'a [u8]) -> Result<Self, LoroError> {
let mut index = 0;
let len = leb::read_unsigned(bytes, &mut index) as usize;
let common = &bytes[index..index + len];
index += len;
let len = leb::read_unsigned(bytes, &mut index) as usize;
let app_state = &bytes[index..index + len];
index += len;
let len = leb::read_unsigned(bytes, &mut index) as usize;
let state_arena = &bytes[index..index + len];
index += len;
let len = leb::read_unsigned(bytes, &mut index) as usize;
let additional_arena = &bytes[index..index + len];
index += len;
let len = leb::read_unsigned(bytes, &mut index) as usize;
let oplog = &bytes[index..index + len];
Ok(FinalPhase {
common: Cow::Borrowed(common),
app_state: Cow::Borrowed(app_state),
state_arena: Cow::Borrowed(state_arena),
oplog_extra_arena: Cow::Borrowed(additional_arena),
oplog: Cow::Borrowed(oplog),
})
}
pub fn diagnose_size(&self) {
println!("common: {}", self.common.len());
println!("app_state: {}", self.app_state.len());
println!("state_arena: {}", self.state_arena.len());
println!("additional_arena: {}", self.oplog_extra_arena.len());
println!("oplog: {}", self.oplog.len());
}
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CommonArena<'a> {
#[serde(borrow)]
pub peer_ids: Cow<'a, [u64]>,
pub container_ids: Vec<ContainerID>,
}
impl<'a> CommonArena<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.common)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EncodedAppState<'a> {
pub frontiers: Vec<ID>,
/// container states
#[serde(borrow)]
pub states: Vec<EncodedContainerState<'a>>,
/// containers' parents
pub parents: Vec<Option<u32>>,
}
impl<'a> EncodedAppState<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode(data: &'a FinalPhase) -> Result<EncodedAppState<'a>, LoroError> {
serde_columnar::from_bytes(&data.app_state)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EncodedContainerState<'a> {
Map(Vec<MapEntry>),
List {
elem_idx: Vec<usize>,
elem_ids: Vec<ID>,
},
#[serde(borrow)]
Richtext(Box<EncodedRichtextState<'a>>),
Tree((Vec<EncodedTreeNode>, Vec<usize>)),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EncodedTreeNode {
pub node_idx: usize,
pub parent: Option<usize>,
pub id: ID,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct EncodedRichtextState<'a> {
/// It's composed of interleaved:
///
/// - len of text ranges
/// - len of styles anchors
pub len: Vec<u32>,
/// This is encoded [TextRanges]
#[serde(borrow)]
pub text_bytes: Cow<'a, [u8]>,
pub ids: Vec<(u32, u32)>,
/// Style anchor index in the style arena
// TODO: can be optimized
pub styles: Vec<CompactStyleOp>,
/// It is a start or end anchor. It's indexed by bit position.
pub is_style_start: Vec<u8>,
}
#[columnar(vec, ser, de, iterable)]
#[derive(Debug, Clone, Copy)]
pub struct TextRange {
#[columnar(strategy = "DeltaRle")]
pub start: usize,
#[columnar(strategy = "DeltaRle")]
pub len: usize,
}
#[columnar(ser, de)]
#[derive(Debug, Default)]
pub struct TextRanges {
#[columnar(class = "vec", iter = "TextRange")]
pub ranges: Vec<TextRange>,
}
impl TextRanges {
#[inline]
pub fn decode_iter(
bytes: &[u8],
) -> LoroResult<impl Iterator<Item = Result<TextRange, ColumnarError>> + '_> {
let iter = serde_columnar::iter_from_bytes::<TextRanges>(bytes)?;
Ok(iter.ranges)
}
#[inline]
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
}
impl<'a> EncodedContainerState<'a> {
pub fn container_type(&self) -> loro_common::ContainerType {
match self {
EncodedContainerState::Map(_) => loro_common::ContainerType::Map,
EncodedContainerState::List { .. } => loro_common::ContainerType::List,
EncodedContainerState::Tree(_) => loro_common::ContainerType::Tree,
EncodedContainerState::Richtext { .. } => loro_common::ContainerType::Text,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MapEntry {
pub key: usize, // index to the state arena
pub value: usize, // index to the state arena + 1. 0 means None
pub peer: u32, // index to the peer ids
pub counter: u32, // index to the peer ids
pub lamport: u32,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CompactStyleOp {
/// index to the peer idx
pub peer_idx: u32,
/// index to the keywords idx
pub key_idx: u32,
pub counter: u32,
pub lamport: u32,
pub style_info: u8,
pub value: LoroValue,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct TempArena<'a> {
#[serde(borrow)]
pub text: Cow<'a, [u8]>,
// PERF: can we use a Cow here?
pub keywords: Vec<InternalString>,
pub values: Vec<LoroValue>,
pub tree_ids: Vec<(u32, i32)>,
}
impl<'a> TempArena<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode_state_arena(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.state_arena)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
pub fn decode_additional_arena(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.oplog_extra_arena)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
/// returns a deep LoroValue that wraps the whole state
pub fn decode_state(_bytes: &[u8]) -> LoroValue {
unimplemented!()
}
mod leb {
use bytes::{BufMut, BytesMut};
pub const CONTINUATION_BIT: u8 = 1 << 7;
pub fn write_unsigned(w: &mut BytesMut, mut val: u64) -> usize {
let mut bytes_written = 0;
loop {
let mut byte = low_bits_of_u64(val);
val >>= 7;
if val != 0 {
// More bytes to come, so set the continuation bit.
byte |= CONTINUATION_BIT;
}
w.put_u8(byte);
bytes_written += 1;
if val == 0 {
return bytes_written;
}
}
}
#[doc(hidden)]
#[inline]
pub fn low_bits_of_byte(byte: u8) -> u8 {
byte & !CONTINUATION_BIT
}
#[doc(hidden)]
#[inline]
pub fn low_bits_of_u64(val: u64) -> u8 {
let byte = val & (std::u8::MAX as u64);
low_bits_of_byte(byte as u8)
}
pub fn read_unsigned(r: &[u8], index: &mut usize) -> u64 {
let mut result = 0;
let mut shift = 0;
loop {
let mut buf = [r[*index]];
*index += 1;
if shift == 63 && buf[0] != 0x00 && buf[0] != 0x01 {
while buf[0] & CONTINUATION_BIT != 0 {
buf = [r[*index]];
*index += 1;
}
panic!("overflow");
}
let low_bits = low_bits_of_byte(buf[0]) as u64;
result |= low_bits << shift;
if buf[0] & CONTINUATION_BIT == 0 {
return result;
}
shift += 7;
}
}
}

View file

@ -1,2 +0,0 @@
mod encode;
pub use encode::*;

View file

@ -21,4 +21,3 @@ either = "1.9.0"
serde_json = "1.0.87"
[features]
test_utils = ["loro-internal/test_utils"]

View file

@ -1160,9 +1160,11 @@ impl LoroTree {
self.handler.get_deep_value()
}
#[cfg(feature = "test_utils")]
pub fn next_tree_id(&self) -> TreeID {
self.handler.next_tree_id()
// This method is used for testing only.
#[doc(hidden)]
#[allow(non_snake_case)]
pub fn __internal__next_tree_id(&self) -> TreeID {
self.handler.__internal__next_tree_id()
}
}

View file

@ -22,10 +22,74 @@
#![deny(clippy::undocumented_unsafe_blocks)]
mod rle_trait;
mod rle_vec;
mod rle_vec_old;
pub use crate::rle_trait::{
HasIndex, HasLength, Mergable, Rle, RleCollection, RlePush, Slice, Sliceable, ZeroElement,
};
pub use crate::rle_vec::{slice_vec_by, RleVec, RleVecWithLen};
pub use crate::rle_vec_old::{RleVecWithIndex, SearchResult, SliceIterator};
pub mod rle_impl;
use num::Integer;
#[derive(Clone)]
pub struct SearchResult<'a, T, I: Integer> {
pub element: &'a T,
pub merged_index: usize,
pub offset: I,
}
pub struct SliceIterator<'a, T> {
vec: &'a [T],
cur_index: usize,
cur_offset: usize,
end_index: Option<usize>,
end_offset: Option<usize>,
}
impl<'a, T> SliceIterator<'a, T> {
fn new_empty() -> Self {
Self {
vec: &[],
cur_index: 0,
cur_offset: 0,
end_index: None,
end_offset: None,
}
}
}
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
type Item = Slice<'a, T>;
fn next(&mut self) -> Option<Self::Item> {
if self.vec.is_empty() {
return None;
}
let end_index = self.end_index.unwrap_or(self.vec.len() - 1);
if self.cur_index == end_index {
let elem = &self.vec[self.cur_index];
let end = self.end_offset.unwrap_or_else(|| elem.atom_len());
if self.cur_offset == end {
return None;
}
let ans = Slice {
value: elem,
start: self.cur_offset,
end,
};
self.cur_offset = end;
return Some(ans);
}
let ans = Slice {
value: &self.vec[self.cur_index],
start: self.cur_offset,
end: self.vec[self.cur_index].atom_len(),
};
self.cur_index += 1;
self.cur_offset = 0;
Some(ans)
}
}

View file

@ -1,393 +0,0 @@
use std::{
ops::{Deref, Range},
vec,
};
use num::Integer;
use crate::{HasLength, Mergable, Slice, Sliceable};
/// RleVec<T> is a vector that can be compressed using run-length encoding.
///
/// A T value may be merged with its neighbors. When we push new element, the new value
/// may be merged with the last element in the array. Each value has a length, so there
/// are two types of indexes:
/// 1. (merged) It refers to the index of the merged element.
/// 2. (atom) The index of substantial elements. It refers to the index of the atom element.
///
/// By default, we use atom index in RleVec.
/// - len() returns the number of atom elements in the array.
/// - get(index) returns the atom element at the index.
/// - slice(from, to) returns a slice of atom elements from the index from to the index to.
#[derive(Debug, Clone)]
pub struct RleVecWithIndex<T, Cfg = ()> {
vec: Vec<T>,
atom_len: usize,
index: Vec<usize>,
cfg: Cfg,
}
#[derive(Clone)]
pub struct SearchResult<'a, T, I: Integer> {
pub element: &'a T,
pub merged_index: usize,
pub offset: I,
}
impl<T: Eq + PartialEq> PartialEq for RleVecWithIndex<T> {
fn eq(&self, other: &Self) -> bool {
self.vec == other.vec
}
}
impl<T: Eq + PartialEq> Eq for RleVecWithIndex<T> {}
impl<T: Mergable<Cfg> + HasLength, Cfg> RleVecWithIndex<T, Cfg> {
/// push a new element to the end of the array. It may be merged with last element.
pub fn push(&mut self, value: T) {
self.atom_len += value.content_len();
if self.vec.is_empty() {
self.vec.push(value);
self.index.push(0);
self.index.push(self.atom_len);
return;
}
let last = self.vec.last_mut().unwrap();
if last.is_mergable(&value, &self.cfg) {
last.merge(&value, &self.cfg);
*self.index.last_mut().unwrap() = self.atom_len;
return;
}
self.vec.push(value);
self.index.push(self.atom_len);
}
pub fn is_empty(&self) -> bool {
self.vec.is_empty()
}
/// get the element at the given atom index.
/// return: (element, merged_index, offset)
pub fn get(&self, index: usize) -> Option<SearchResult<'_, T, usize>> {
if index > self.atom_len {
return None;
}
let mut start = self.index.binary_search(&index).unwrap_or_else(|x| x);
if index < self.index[start] {
start -= 1;
}
if start >= self.vec.len() {
start -= 1;
}
let value = &self.vec[start];
Some(SearchResult {
element: value,
merged_index: start,
offset: index - self.index[start],
})
}
/// get a slice from `from` to `to` with atom indexes
pub fn slice_iter(&self, from: usize, to: usize) -> SliceIterator<'_, T> {
if from == to || self.merged_len() == 0 {
return SliceIterator::new_empty();
}
let from_result = self.get(from);
if from_result.is_none() {
return SliceIterator::new_empty();
}
let from_result = from_result.unwrap();
let to_result = if to == self.atom_len {
None
} else {
self.get(to)
};
if let Some(to_result) = to_result {
SliceIterator {
vec: &self.vec,
cur_index: from_result.merged_index,
cur_offset: from_result.offset,
end_index: Some(to_result.merged_index),
end_offset: Some(to_result.offset),
}
} else {
SliceIterator {
vec: &self.vec,
cur_index: from_result.merged_index,
cur_offset: from_result.offset,
end_index: None,
end_offset: None,
}
}
}
pub fn slice_merged(&self, range: Range<usize>) -> &[T] {
&self.vec[range]
}
}
impl<T, Conf: Default> RleVecWithIndex<T, Conf> {
pub fn new() -> Self {
RleVecWithIndex {
vec: Vec::new(),
atom_len: 0,
index: Vec::new(),
cfg: Default::default(),
}
}
}
impl<T, Cfg> RleVecWithIndex<T, Cfg> {
pub fn new_with_conf(cfg: Cfg) -> Self {
RleVecWithIndex {
vec: Vec::new(),
atom_len: 0,
index: Vec::new(),
cfg,
}
}
}
impl<T, Conf> RleVecWithIndex<T, Conf> {
pub fn with_capacity(&mut self, capacity: usize) -> &mut Self {
self.vec.reserve(capacity);
self.index.reserve(capacity + 1);
self
}
}
impl<T: Mergable<Conf> + HasLength, Conf: Default> From<Vec<T>> for RleVecWithIndex<T, Conf> {
fn from(vec: Vec<T>) -> Self {
let mut ans: RleVecWithIndex<T, Conf> = RleVecWithIndex::new();
ans.with_capacity(vec.len());
for v in vec {
ans.push(v);
}
ans
}
}
impl<T, Conf> RleVecWithIndex<T, Conf> {
#[inline]
pub fn new_cfg(cfg: Conf) -> Self {
RleVecWithIndex {
vec: Vec::new(),
atom_len: 0,
index: Vec::new(),
cfg,
}
}
#[inline(always)]
pub fn merged_len(&self) -> usize {
self.vec.len()
}
#[inline(always)]
pub fn to_vec(self) -> Vec<T> {
self.vec
}
#[inline(always)]
pub fn vec(&self) -> &Vec<T> {
&self.vec
}
#[inline(always)]
pub fn iter(&self) -> std::slice::Iter<'_, T> {
self.vec.iter()
}
#[inline(always)]
pub fn vec_mut(&mut self) -> &mut Vec<T> {
&mut self.vec
}
#[inline(always)]
pub fn get_merged(&self, index: usize) -> Option<&T> {
self.vec.get(index)
}
}
impl<T, Cfg> IntoIterator for RleVecWithIndex<T, Cfg> {
type Item = T;
type IntoIter = vec::IntoIter<T>;
fn into_iter(self) -> Self::IntoIter {
self.vec.into_iter()
}
}
impl<T> Default for RleVecWithIndex<T> {
fn default() -> Self {
Self::new()
}
}
impl<T: Mergable<Cfg> + HasLength, Cfg: Default> FromIterator<T> for RleVecWithIndex<T, Cfg> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut vec = RleVecWithIndex::new_with_conf(Default::default());
for item in iter {
vec.push(item);
}
vec
}
}
pub struct SliceIterator<'a, T> {
pub(super) vec: &'a [T],
pub(super) cur_index: usize,
pub(super) cur_offset: usize,
pub(super) end_index: Option<usize>,
pub(super) end_offset: Option<usize>,
}
impl<'a, T> SliceIterator<'a, T> {
pub(super) fn new_empty() -> Self {
Self {
vec: &[],
cur_index: 0,
cur_offset: 0,
end_index: None,
end_offset: None,
}
}
}
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
type Item = Slice<'a, T>;
fn next(&mut self) -> Option<Self::Item> {
if self.vec.is_empty() {
return None;
}
let end_index = self.end_index.unwrap_or(self.vec.len() - 1);
if self.cur_index == end_index {
let elem = &self.vec[self.cur_index];
let end = self.end_offset.unwrap_or_else(|| elem.atom_len());
if self.cur_offset == end {
return None;
}
let ans = Slice {
value: elem,
start: self.cur_offset,
end,
};
self.cur_offset = end;
return Some(ans);
}
let ans = Slice {
value: &self.vec[self.cur_index],
start: self.cur_offset,
end: self.vec[self.cur_index].atom_len(),
};
self.cur_index += 1;
self.cur_offset = 0;
Some(ans)
}
}
impl<T: Mergable<Cfg> + HasLength + Sliceable + Clone, Cfg> Mergable<Cfg>
for RleVecWithIndex<T, Cfg>
{
fn is_mergable(&self, _: &Self, _: &Cfg) -> bool {
true
}
fn merge(&mut self, other: &Self, _: &Cfg) {
for item in other.vec.iter() {
self.push(item.clone());
}
}
}
impl<T: Mergable<Cfg> + HasLength + Sliceable, Cfg: Clone> Sliceable for RleVecWithIndex<T, Cfg> {
fn slice(&self, start: usize, end: usize) -> Self {
let mut ans = RleVecWithIndex::new_with_conf(self.cfg.clone());
for value in self.slice_iter(start, end).map(|x| x.into_inner()) {
ans.push(value);
}
ans
}
}
impl<T, Cfg> HasLength for RleVecWithIndex<T, Cfg> {
fn content_len(&self) -> usize {
self.atom_len
}
fn atom_len(&self) -> usize {
self.atom_len
}
}
impl<T, Cfg> Deref for RleVecWithIndex<T, Cfg> {
type Target = [T];
fn deref(&self) -> &Self::Target {
self.vec()
}
}
#[cfg(test)]
mod test {
mod prime_value {
use crate::{Mergable, RleVecWithIndex};
impl Mergable for String {
fn is_mergable(&self, _: &Self, _: &()) -> bool {
self.len() < 8
}
fn merge(&mut self, other: &Self, _: &()) {
self.push_str(other);
}
}
#[test]
fn get_at_atom_index() {
let mut vec: RleVecWithIndex<String> = RleVecWithIndex::new();
vec.push("1234".to_string());
vec.push("5678".to_string());
vec.push("12345678".to_string());
assert_eq!(vec.get(4).unwrap().element, "12345678");
assert_eq!(vec.get(4).unwrap().merged_index, 0);
assert_eq!(vec.get(4).unwrap().offset, 4);
assert_eq!(vec.get(8).unwrap().element, "12345678");
assert_eq!(vec.get(8).unwrap().merged_index, 1);
assert_eq!(vec.get(8).unwrap().offset, 0);
}
#[test]
fn slice() {
let mut vec: RleVecWithIndex<String> = RleVecWithIndex::new();
vec.push("1234".to_string());
vec.push("56".to_string());
vec.push("78".to_string());
vec.push("12345678".to_string());
let mut iter = vec.slice_iter(4, 12);
let first = iter.next().unwrap();
assert_eq!(first.value, "12345678");
assert_eq!(first.start, 4);
assert_eq!(first.end, 8);
let second = iter.next().unwrap();
assert_eq!(second.value, "12345678");
assert_eq!(second.start, 0);
assert_eq!(second.end, 4);
}
}
}