diff --git a/Cargo.lock b/Cargo.lock index b513829f..b1d8d777 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,12 +175,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "bytes" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" - [[package]] name = "cast" version = "0.3.0" @@ -567,6 +561,7 @@ dependencies = [ "enum_dispatch", "fxhash", "loro", + "loro-internal", "tabled 0.10.0", ] @@ -834,7 +829,6 @@ dependencies = [ "js-sys", "leb128", "loro-common", - "loro-preload", "loro-rle", "md5", "miniz_oxide", @@ -861,16 +855,6 @@ dependencies = [ "zstd", ] -[[package]] -name = "loro-preload" -version = "0.4.0" -dependencies = [ - "bytes", - "loro-common", - "serde", - "serde_columnar", -] - [[package]] name = "loro-rle" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 49f8b740..6128296b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ members = [ "crates/rle", "crates/loro-common", "crates/loro-internal", - "crates/loro-preload", "crates/loro-wasm", "crates/fuzz", ] diff --git a/crates/fuzz/Cargo.toml b/crates/fuzz/Cargo.toml index c1c6ac27..9ef533b2 100644 --- a/crates/fuzz/Cargo.toml +++ b/crates/fuzz/Cargo.toml @@ -7,7 +7,8 @@ publish = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -loro = { path = "../loro", features = ["test_utils"] } +loro = { path = "../loro" } +loro-internal = { path = "../loro-internal", features = ["test_utils"] } arbitrary = "1" tabled = "0.10" debug-log = { workspace = true } diff --git a/crates/fuzz/fuzz/Cargo.lock b/crates/fuzz/fuzz/Cargo.lock index 993aa7f1..edb73f75 100644 --- a/crates/fuzz/fuzz/Cargo.lock +++ b/crates/fuzz/fuzz/Cargo.lock @@ -65,12 +65,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "bytes" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" - [[package]] name = "cc" version = "1.0.86" @@ -217,6 +211,7 @@ dependencies = [ "enum_dispatch", "fxhash", "loro", + "loro-internal", "tabled", ] @@ -360,7 +355,7 @@ dependencies = [ [[package]] name = "loro" -version = "0.3.0" +version = "0.4.0" dependencies = [ "either", "enum-as-inner 0.6.0", @@ -369,7 +364,7 @@ dependencies = [ [[package]] name = "loro-common" -version = "0.2.0" +version = "0.4.0" dependencies = [ "arbitrary", "enum-as-inner 0.6.0", @@ -384,7 +379,7 @@ dependencies = [ [[package]] name = "loro-internal" -version = "0.3.0" +version = "0.4.0" dependencies = [ "append-only-bytes", "arbitrary", @@ -398,7 +393,6 @@ dependencies = [ "itertools", "leb128", "loro-common", - "loro-preload", "loro-rle", "md5", "num", @@ -416,19 +410,9 @@ dependencies = [ "tracing", ] -[[package]] -name = "loro-preload" -version = "0.2.0" -dependencies = [ - "bytes", - "loro-common", - "serde", - "serde_columnar", -] - [[package]] name = "loro-rle" -version = "0.2.0" +version = "0.4.0" dependencies = [ "append-only-bytes", "arref", @@ -755,9 +739,9 @@ dependencies = [ [[package]] name = "serde_columnar" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41a9a14c8a221abb13091da4d1075699999e6a12213283c452680a70376efd0" +checksum = "a5d54dd7e7a1ec134c842f8a3bdb5a1fc662d002682e0457f976f3046cf9ccf8" dependencies = [ "itertools", "postcard", @@ -768,9 +752,9 @@ dependencies = [ [[package]] name = "serde_columnar_derive" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0f77bad2a9b92970e7e1f8004fac293328ac9a05f92f751ae293644d764ede4" +checksum = "3e5eaacabbc55a397ffbb1ee32523f40f86fdefea8a8d9db19630d8b7c00edd1" dependencies = [ "darling", "proc-macro2", diff --git a/crates/fuzz/src/container/tree.rs b/crates/fuzz/src/container/tree.rs index 7d119980..f5d72018 100644 --- a/crates/fuzz/src/container/tree.rs +++ b/crates/fuzz/src/container/tree.rs @@ -93,7 +93,7 @@ impl Actionable for TreeAction { match action { TreeActionInner::Create => { - let id = tree.next_tree_id(); + let id = tree.__internal__next_tree_id(); *target = (id.peer, id.counter); } TreeActionInner::Delete => { diff --git a/crates/fuzz/tests/test.rs b/crates/fuzz/tests/test.rs index 48ab98dc..f4dcb530 100644 --- a/crates/fuzz/tests/test.rs +++ b/crates/fuzz/tests/test.rs @@ -450,3 +450,44 @@ fn random_fuzz_1s_5sites_1() { fn random_fuzz_1s_5sites_2() { arbtest::builder().budget_ms(1000).run(|u| prop(u, 5)); } + +#[test] +fn test_unknown() { + test_multi_sites( + 5, + vec![ + FuzzTarget::Map, + FuzzTarget::List, + FuzzTarget::Tree, + FuzzTarget::Text, + ], + &mut [ + Handle { + site: 34, + target: 115, + container: 4, + action: Generic(GenericAction { + value: I32(62063364), + bool: false, + key: 771987715, + pos: 217020518514230019, + length: 217234923281646339, + prop: 6234107865851074949, + }), + }, + Handle { + site: 3, + target: 3, + container: 0, + action: Generic(GenericAction { + value: I32(0), + bool: false, + key: 0, + pos: 0, + length: 0, + prop: 0, + }), + }, + ], + ) +} diff --git a/crates/loro-common/src/span.rs b/crates/loro-common/src/span.rs index 081d0b5c..dec470ec 100644 --- a/crates/loro-common/src/span.rs +++ b/crates/loro-common/src/span.rs @@ -491,62 +491,8 @@ impl From for IdSpan { #[cfg(test)] mod test_id_span { - use rle::RleVecWithIndex; - use super::*; - macro_rules! id_spans { - ($([$peer:expr, $from:expr, $to:expr]),*) => { - { - let mut id_spans = RleVecWithIndex::new(); - $( - id_spans.push(IdSpan { - peer: $peer, - counter: CounterSpan::new($from, $to), - }); - )* - id_spans - } - }; - } - - #[test] - fn test_id_span_rle_vec() { - let mut id_span_vec = RleVecWithIndex::new(); - id_span_vec.push(IdSpan { - peer: 0, - counter: CounterSpan::new(0, 2), - }); - assert_eq!(id_span_vec.merged_len(), 1); - assert_eq!(id_span_vec.atom_len(), 2); - id_span_vec.push(IdSpan { - peer: 0, - counter: CounterSpan::new(2, 4), - }); - assert_eq!(id_span_vec.merged_len(), 1); - assert_eq!(id_span_vec.atom_len(), 4); - id_span_vec.push(IdSpan { - peer: 2, - counter: CounterSpan::new(2, 4), - }); - assert_eq!(id_span_vec.merged_len(), 2); - assert_eq!(id_span_vec.atom_len(), 6); - } - - #[test] - fn slice() { - let id_span_vec = id_spans!([0, 0, 2], [0, 2, 4], [2, 2, 4]); - let slice: Vec = id_span_vec.slice_iter(2, 5).map(|x| x.into()).collect(); - assert_eq!(slice, id_spans!([0, 2, 4], [2, 2, 3]).to_vec()); - } - - #[test] - fn backward() { - let id_span_vec = id_spans!([0, 100, 98], [0, 98, 90], [2, 2, 4], [2, 8, 4]); - let slice: Vec = id_span_vec.slice_iter(5, 14).map(|x| x.into()).collect(); - assert_eq!(slice, id_spans!([0, 95, 90], [2, 2, 4], [2, 8, 6]).to_vec()); - } - #[test] fn merge() { let mut a = CounterSpan::new(0, 2); diff --git a/crates/loro-internal/Cargo.toml b/crates/loro-internal/Cargo.toml index 329eadfd..c7466de8 100644 --- a/crates/loro-internal/Cargo.toml +++ b/crates/loro-internal/Cargo.toml @@ -14,7 +14,6 @@ keywords = ["crdt", "local-first"] [dependencies] rle = { path = "../rle", version = "0.4.0", package = "loro-rle" } -loro-preload = { path = "../loro-preload", version = "0.4.0" } loro-common = { path = "../loro-common", version = "0.4.0" } smallvec = { version = "1.8.0", features = ["serde"] } postcard = "1" diff --git a/crates/loro-internal/examples/encoding.rs b/crates/loro-internal/examples/encoding.rs index bc39594f..0565d390 100644 --- a/crates/loro-internal/examples/encoding.rs +++ b/crates/loro-internal/examples/encoding.rs @@ -1,3 +1,5 @@ +use std::time::Instant; + use bench_utils::TextAction; use loro_internal::LoroDoc; @@ -37,8 +39,15 @@ fn main() { txn.commit().unwrap(); } + let start = Instant::now(); let snapshot = loro.export_snapshot(); + println!("Snapshot time {}ms", start.elapsed().as_millis()); let output = miniz_oxide::deflate::compress_to_vec(&snapshot, 6); + println!( + "Snapshot+compression time {}ms", + start.elapsed().as_millis() + ); + println!( "snapshot size {} after compression {}", snapshot.len(), diff --git a/crates/loro-internal/src/handler.rs b/crates/loro-internal/src/handler.rs index 7eba5d9a..594b4e3e 100644 --- a/crates/loro-internal/src/handler.rs +++ b/crates/loro-internal/src/handler.rs @@ -2335,8 +2335,8 @@ impl TreeHandler { } } - #[cfg(feature = "test_utils")] - pub fn next_tree_id(&self) -> TreeID { + #[allow(non_snake_case)] + pub fn __internal__next_tree_id(&self) -> TreeID { match &self.inner { MaybeDetached::Detached(d) => { let d = d.try_lock().unwrap(); diff --git a/crates/loro-preload/Cargo.toml b/crates/loro-preload/Cargo.toml deleted file mode 100644 index 11ab7a8e..00000000 --- a/crates/loro-preload/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "loro-preload" -version = "0.4.0" -edition = "2021" -license = "MIT" -description = "Loro internal lib for loading data" -documentation = "https://docs.rs/loro/" -homepage = "https://loro.dev" -authors = ["Zixuan Chen", "Liang Zhao"] -categories = ["data-structures", "crdt", "collaborative-editing", "local-first"] -keywords = ["crdt", "local-first"] - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -serde = { version = "1", features = ["derive"] } -serde_columnar = { workspace = true } -loro-common = { path = "../loro-common", version = "0.4.0" } -bytes = "1.4.0" diff --git a/crates/loro-preload/README.md b/crates/loro-preload/README.md deleted file mode 100644 index 7d97914e..00000000 --- a/crates/loro-preload/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# loro-preload - -This crate contains a small part of the functionality of the Loro project. -It aims to provide a smaller lib that can be loaded fast first to provide the basic functionality of Loro. -It can decode the Loro binary data and will have basic mechanism to record the ops in Loro. diff --git a/crates/loro-preload/src/encode.rs b/crates/loro-preload/src/encode.rs deleted file mode 100644 index 54a5701a..00000000 --- a/crates/loro-preload/src/encode.rs +++ /dev/null @@ -1,326 +0,0 @@ -use bytes::{BufMut, BytesMut}; -use loro_common::{ContainerID, InternalString, LoroError, LoroResult, LoroValue, ID}; -use serde_columnar::{columnar, to_vec, ColumnarError}; -use std::borrow::Cow; - -use serde::{Deserialize, Serialize}; - -/// The final phase of the encoding process. It's also the first phase of the decoding process. -/// -/// This data structure allows users to only load the state or the oplog. -/// -/// - When only the state is needed, the `oplog` and `oplog_extra_arena` can be ignored. -/// - When only the oplog is needed, the `app_state` can be ignored. (state_arena is still needed). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FinalPhase<'a> { - #[serde(borrow)] - pub common: Cow<'a, [u8]>, // -> CommonArena - #[serde(borrow)] - pub app_state: Cow<'a, [u8]>, // -> EncodedAppState - #[serde(borrow)] - pub state_arena: Cow<'a, [u8]>, // -> TempArena<'a> - #[serde(borrow)] - pub oplog_extra_arena: Cow<'a, [u8]>, // -> TempArena<'a>. Cannot have full history if this is dropped - #[serde(borrow)] - pub oplog: Cow<'a, [u8]>, // -> OpLog. Can be ignored if we only need state -} - -impl<'a> FinalPhase<'a> { - #[inline(always)] - pub fn encode(&self) -> Vec { - let mut bytes = BytesMut::with_capacity( - self.common.len() - + self.app_state.len() - + self.state_arena.len() - + self.oplog_extra_arena.len() - + self.oplog.len() - + 10, - ); - - leb::write_unsigned(&mut bytes, self.common.len() as u64); - bytes.put_slice(&self.common); - leb::write_unsigned(&mut bytes, self.app_state.len() as u64); - bytes.put_slice(&self.app_state); - leb::write_unsigned(&mut bytes, self.state_arena.len() as u64); - bytes.put_slice(&self.state_arena); - leb::write_unsigned(&mut bytes, self.oplog_extra_arena.len() as u64); - bytes.put_slice(&self.oplog_extra_arena); - leb::write_unsigned(&mut bytes, self.oplog.len() as u64); - bytes.put_slice(&self.oplog); - bytes.to_vec() - } - - #[inline(always)] - pub fn decode(bytes: &'a [u8]) -> Result { - let mut index = 0; - let len = leb::read_unsigned(bytes, &mut index) as usize; - let common = &bytes[index..index + len]; - index += len; - - let len = leb::read_unsigned(bytes, &mut index) as usize; - let app_state = &bytes[index..index + len]; - index += len; - - let len = leb::read_unsigned(bytes, &mut index) as usize; - let state_arena = &bytes[index..index + len]; - index += len; - - let len = leb::read_unsigned(bytes, &mut index) as usize; - let additional_arena = &bytes[index..index + len]; - index += len; - - let len = leb::read_unsigned(bytes, &mut index) as usize; - let oplog = &bytes[index..index + len]; - - Ok(FinalPhase { - common: Cow::Borrowed(common), - app_state: Cow::Borrowed(app_state), - state_arena: Cow::Borrowed(state_arena), - oplog_extra_arena: Cow::Borrowed(additional_arena), - oplog: Cow::Borrowed(oplog), - }) - } - - pub fn diagnose_size(&self) { - println!("common: {}", self.common.len()); - println!("app_state: {}", self.app_state.len()); - println!("state_arena: {}", self.state_arena.len()); - println!("additional_arena: {}", self.oplog_extra_arena.len()); - println!("oplog: {}", self.oplog.len()); - } -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct CommonArena<'a> { - #[serde(borrow)] - pub peer_ids: Cow<'a, [u64]>, - pub container_ids: Vec, -} - -impl<'a> CommonArena<'a> { - pub fn encode(&self) -> Vec { - to_vec(self).unwrap() - } - - pub fn decode(data: &'a FinalPhase) -> Result { - serde_columnar::from_bytes(&data.common) - .map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str())) - } -} - -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct EncodedAppState<'a> { - pub frontiers: Vec, - /// container states - #[serde(borrow)] - pub states: Vec>, - /// containers' parents - pub parents: Vec>, -} - -impl<'a> EncodedAppState<'a> { - pub fn encode(&self) -> Vec { - to_vec(self).unwrap() - } - - pub fn decode(data: &'a FinalPhase) -> Result, LoroError> { - serde_columnar::from_bytes(&data.app_state) - .map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str())) - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum EncodedContainerState<'a> { - Map(Vec), - List { - elem_idx: Vec, - elem_ids: Vec, - }, - #[serde(borrow)] - Richtext(Box>), - Tree((Vec, Vec)), -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EncodedTreeNode { - pub node_idx: usize, - pub parent: Option, - pub id: ID, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct EncodedRichtextState<'a> { - /// It's composed of interleaved: - /// - /// - len of text ranges - /// - len of styles anchors - pub len: Vec, - /// This is encoded [TextRanges] - #[serde(borrow)] - pub text_bytes: Cow<'a, [u8]>, - pub ids: Vec<(u32, u32)>, - /// Style anchor index in the style arena - // TODO: can be optimized - pub styles: Vec, - /// It is a start or end anchor. It's indexed by bit position. - pub is_style_start: Vec, -} - -#[columnar(vec, ser, de, iterable)] -#[derive(Debug, Clone, Copy)] -pub struct TextRange { - #[columnar(strategy = "DeltaRle")] - pub start: usize, - #[columnar(strategy = "DeltaRle")] - pub len: usize, -} - -#[columnar(ser, de)] -#[derive(Debug, Default)] -pub struct TextRanges { - #[columnar(class = "vec", iter = "TextRange")] - pub ranges: Vec, -} - -impl TextRanges { - #[inline] - pub fn decode_iter( - bytes: &[u8], - ) -> LoroResult> + '_> { - let iter = serde_columnar::iter_from_bytes::(bytes)?; - Ok(iter.ranges) - } - - #[inline] - pub fn encode(&self) -> Vec { - to_vec(self).unwrap() - } -} - -impl<'a> EncodedContainerState<'a> { - pub fn container_type(&self) -> loro_common::ContainerType { - match self { - EncodedContainerState::Map(_) => loro_common::ContainerType::Map, - EncodedContainerState::List { .. } => loro_common::ContainerType::List, - EncodedContainerState::Tree(_) => loro_common::ContainerType::Tree, - EncodedContainerState::Richtext { .. } => loro_common::ContainerType::Text, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MapEntry { - pub key: usize, // index to the state arena - pub value: usize, // index to the state arena + 1. 0 means None - pub peer: u32, // index to the peer ids - pub counter: u32, // index to the peer ids - pub lamport: u32, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct CompactStyleOp { - /// index to the peer idx - pub peer_idx: u32, - /// index to the keywords idx - pub key_idx: u32, - pub counter: u32, - pub lamport: u32, - pub style_info: u8, - pub value: LoroValue, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize)] -pub struct TempArena<'a> { - #[serde(borrow)] - pub text: Cow<'a, [u8]>, - // PERF: can we use a Cow here? - pub keywords: Vec, - pub values: Vec, - pub tree_ids: Vec<(u32, i32)>, -} - -impl<'a> TempArena<'a> { - pub fn encode(&self) -> Vec { - to_vec(self).unwrap() - } - - pub fn decode_state_arena(data: &'a FinalPhase) -> Result { - serde_columnar::from_bytes(&data.state_arena) - .map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str())) - } - - pub fn decode_additional_arena(data: &'a FinalPhase) -> Result { - serde_columnar::from_bytes(&data.oplog_extra_arena) - .map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str())) - } -} - -/// returns a deep LoroValue that wraps the whole state -pub fn decode_state(_bytes: &[u8]) -> LoroValue { - unimplemented!() -} - -mod leb { - use bytes::{BufMut, BytesMut}; - pub const CONTINUATION_BIT: u8 = 1 << 7; - - pub fn write_unsigned(w: &mut BytesMut, mut val: u64) -> usize { - let mut bytes_written = 0; - loop { - let mut byte = low_bits_of_u64(val); - val >>= 7; - if val != 0 { - // More bytes to come, so set the continuation bit. - byte |= CONTINUATION_BIT; - } - - w.put_u8(byte); - bytes_written += 1; - - if val == 0 { - return bytes_written; - } - } - } - - #[doc(hidden)] - #[inline] - pub fn low_bits_of_byte(byte: u8) -> u8 { - byte & !CONTINUATION_BIT - } - - #[doc(hidden)] - #[inline] - pub fn low_bits_of_u64(val: u64) -> u8 { - let byte = val & (std::u8::MAX as u64); - low_bits_of_byte(byte as u8) - } - - pub fn read_unsigned(r: &[u8], index: &mut usize) -> u64 { - let mut result = 0; - let mut shift = 0; - - loop { - let mut buf = [r[*index]]; - *index += 1; - - if shift == 63 && buf[0] != 0x00 && buf[0] != 0x01 { - while buf[0] & CONTINUATION_BIT != 0 { - buf = [r[*index]]; - *index += 1; - } - - panic!("overflow"); - } - - let low_bits = low_bits_of_byte(buf[0]) as u64; - result |= low_bits << shift; - - if buf[0] & CONTINUATION_BIT == 0 { - return result; - } - - shift += 7; - } - } -} diff --git a/crates/loro-preload/src/lib.rs b/crates/loro-preload/src/lib.rs deleted file mode 100644 index 3021ed79..00000000 --- a/crates/loro-preload/src/lib.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod encode; -pub use encode::*; diff --git a/crates/loro/Cargo.toml b/crates/loro/Cargo.toml index 5dbcaa1e..d5ed1753 100644 --- a/crates/loro/Cargo.toml +++ b/crates/loro/Cargo.toml @@ -21,4 +21,3 @@ either = "1.9.0" serde_json = "1.0.87" [features] -test_utils = ["loro-internal/test_utils"] diff --git a/crates/loro/src/lib.rs b/crates/loro/src/lib.rs index c0fca119..8374c337 100644 --- a/crates/loro/src/lib.rs +++ b/crates/loro/src/lib.rs @@ -1160,9 +1160,11 @@ impl LoroTree { self.handler.get_deep_value() } - #[cfg(feature = "test_utils")] - pub fn next_tree_id(&self) -> TreeID { - self.handler.next_tree_id() + // This method is used for testing only. + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn __internal__next_tree_id(&self) -> TreeID { + self.handler.__internal__next_tree_id() } } diff --git a/crates/rle/src/lib.rs b/crates/rle/src/lib.rs index 3bce2072..65ac8cdd 100644 --- a/crates/rle/src/lib.rs +++ b/crates/rle/src/lib.rs @@ -22,10 +22,74 @@ #![deny(clippy::undocumented_unsafe_blocks)] mod rle_trait; mod rle_vec; -mod rle_vec_old; pub use crate::rle_trait::{ HasIndex, HasLength, Mergable, Rle, RleCollection, RlePush, Slice, Sliceable, ZeroElement, }; pub use crate::rle_vec::{slice_vec_by, RleVec, RleVecWithLen}; -pub use crate::rle_vec_old::{RleVecWithIndex, SearchResult, SliceIterator}; pub mod rle_impl; + +use num::Integer; + +#[derive(Clone)] +pub struct SearchResult<'a, T, I: Integer> { + pub element: &'a T, + pub merged_index: usize, + pub offset: I, +} + +pub struct SliceIterator<'a, T> { + vec: &'a [T], + cur_index: usize, + cur_offset: usize, + end_index: Option, + end_offset: Option, +} + +impl<'a, T> SliceIterator<'a, T> { + fn new_empty() -> Self { + Self { + vec: &[], + cur_index: 0, + cur_offset: 0, + end_index: None, + end_offset: None, + } + } +} + +impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> { + type Item = Slice<'a, T>; + + fn next(&mut self) -> Option { + if self.vec.is_empty() { + return None; + } + + let end_index = self.end_index.unwrap_or(self.vec.len() - 1); + if self.cur_index == end_index { + let elem = &self.vec[self.cur_index]; + let end = self.end_offset.unwrap_or_else(|| elem.atom_len()); + if self.cur_offset == end { + return None; + } + + let ans = Slice { + value: elem, + start: self.cur_offset, + end, + }; + self.cur_offset = end; + return Some(ans); + } + + let ans = Slice { + value: &self.vec[self.cur_index], + start: self.cur_offset, + end: self.vec[self.cur_index].atom_len(), + }; + + self.cur_index += 1; + self.cur_offset = 0; + Some(ans) + } +} diff --git a/crates/rle/src/rle_vec_old.rs b/crates/rle/src/rle_vec_old.rs deleted file mode 100644 index d0b8a7b4..00000000 --- a/crates/rle/src/rle_vec_old.rs +++ /dev/null @@ -1,393 +0,0 @@ -use std::{ - ops::{Deref, Range}, - vec, -}; - -use num::Integer; - -use crate::{HasLength, Mergable, Slice, Sliceable}; - -/// RleVec is a vector that can be compressed using run-length encoding. -/// -/// A T value may be merged with its neighbors. When we push new element, the new value -/// may be merged with the last element in the array. Each value has a length, so there -/// are two types of indexes: -/// 1. (merged) It refers to the index of the merged element. -/// 2. (atom) The index of substantial elements. It refers to the index of the atom element. -/// -/// By default, we use atom index in RleVec. -/// - len() returns the number of atom elements in the array. -/// - get(index) returns the atom element at the index. -/// - slice(from, to) returns a slice of atom elements from the index from to the index to. -#[derive(Debug, Clone)] -pub struct RleVecWithIndex { - vec: Vec, - atom_len: usize, - index: Vec, - cfg: Cfg, -} - -#[derive(Clone)] -pub struct SearchResult<'a, T, I: Integer> { - pub element: &'a T, - pub merged_index: usize, - pub offset: I, -} - -impl PartialEq for RleVecWithIndex { - fn eq(&self, other: &Self) -> bool { - self.vec == other.vec - } -} - -impl Eq for RleVecWithIndex {} - -impl + HasLength, Cfg> RleVecWithIndex { - /// push a new element to the end of the array. It may be merged with last element. - pub fn push(&mut self, value: T) { - self.atom_len += value.content_len(); - if self.vec.is_empty() { - self.vec.push(value); - self.index.push(0); - self.index.push(self.atom_len); - return; - } - - let last = self.vec.last_mut().unwrap(); - if last.is_mergable(&value, &self.cfg) { - last.merge(&value, &self.cfg); - *self.index.last_mut().unwrap() = self.atom_len; - return; - } - self.vec.push(value); - self.index.push(self.atom_len); - } - - pub fn is_empty(&self) -> bool { - self.vec.is_empty() - } - - /// get the element at the given atom index. - /// return: (element, merged_index, offset) - pub fn get(&self, index: usize) -> Option> { - if index > self.atom_len { - return None; - } - - let mut start = self.index.binary_search(&index).unwrap_or_else(|x| x); - - if index < self.index[start] { - start -= 1; - } - - if start >= self.vec.len() { - start -= 1; - } - - let value = &self.vec[start]; - Some(SearchResult { - element: value, - merged_index: start, - offset: index - self.index[start], - }) - } - - /// get a slice from `from` to `to` with atom indexes - pub fn slice_iter(&self, from: usize, to: usize) -> SliceIterator<'_, T> { - if from == to || self.merged_len() == 0 { - return SliceIterator::new_empty(); - } - - let from_result = self.get(from); - if from_result.is_none() { - return SliceIterator::new_empty(); - } - - let from_result = from_result.unwrap(); - let to_result = if to == self.atom_len { - None - } else { - self.get(to) - }; - if let Some(to_result) = to_result { - SliceIterator { - vec: &self.vec, - cur_index: from_result.merged_index, - cur_offset: from_result.offset, - end_index: Some(to_result.merged_index), - end_offset: Some(to_result.offset), - } - } else { - SliceIterator { - vec: &self.vec, - cur_index: from_result.merged_index, - cur_offset: from_result.offset, - end_index: None, - end_offset: None, - } - } - } - - pub fn slice_merged(&self, range: Range) -> &[T] { - &self.vec[range] - } -} - -impl RleVecWithIndex { - pub fn new() -> Self { - RleVecWithIndex { - vec: Vec::new(), - atom_len: 0, - index: Vec::new(), - cfg: Default::default(), - } - } -} - -impl RleVecWithIndex { - pub fn new_with_conf(cfg: Cfg) -> Self { - RleVecWithIndex { - vec: Vec::new(), - atom_len: 0, - index: Vec::new(), - cfg, - } - } -} - -impl RleVecWithIndex { - pub fn with_capacity(&mut self, capacity: usize) -> &mut Self { - self.vec.reserve(capacity); - self.index.reserve(capacity + 1); - self - } -} - -impl + HasLength, Conf: Default> From> for RleVecWithIndex { - fn from(vec: Vec) -> Self { - let mut ans: RleVecWithIndex = RleVecWithIndex::new(); - ans.with_capacity(vec.len()); - for v in vec { - ans.push(v); - } - ans - } -} - -impl RleVecWithIndex { - #[inline] - pub fn new_cfg(cfg: Conf) -> Self { - RleVecWithIndex { - vec: Vec::new(), - atom_len: 0, - index: Vec::new(), - cfg, - } - } - - #[inline(always)] - pub fn merged_len(&self) -> usize { - self.vec.len() - } - - #[inline(always)] - pub fn to_vec(self) -> Vec { - self.vec - } - - #[inline(always)] - pub fn vec(&self) -> &Vec { - &self.vec - } - - #[inline(always)] - pub fn iter(&self) -> std::slice::Iter<'_, T> { - self.vec.iter() - } - - #[inline(always)] - pub fn vec_mut(&mut self) -> &mut Vec { - &mut self.vec - } - - #[inline(always)] - pub fn get_merged(&self, index: usize) -> Option<&T> { - self.vec.get(index) - } -} - -impl IntoIterator for RleVecWithIndex { - type Item = T; - - type IntoIter = vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.vec.into_iter() - } -} - -impl Default for RleVecWithIndex { - fn default() -> Self { - Self::new() - } -} - -impl + HasLength, Cfg: Default> FromIterator for RleVecWithIndex { - fn from_iter>(iter: I) -> Self { - let mut vec = RleVecWithIndex::new_with_conf(Default::default()); - for item in iter { - vec.push(item); - } - vec - } -} - -pub struct SliceIterator<'a, T> { - pub(super) vec: &'a [T], - pub(super) cur_index: usize, - pub(super) cur_offset: usize, - pub(super) end_index: Option, - pub(super) end_offset: Option, -} - -impl<'a, T> SliceIterator<'a, T> { - pub(super) fn new_empty() -> Self { - Self { - vec: &[], - cur_index: 0, - cur_offset: 0, - end_index: None, - end_offset: None, - } - } -} - -impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> { - type Item = Slice<'a, T>; - - fn next(&mut self) -> Option { - if self.vec.is_empty() { - return None; - } - - let end_index = self.end_index.unwrap_or(self.vec.len() - 1); - if self.cur_index == end_index { - let elem = &self.vec[self.cur_index]; - let end = self.end_offset.unwrap_or_else(|| elem.atom_len()); - if self.cur_offset == end { - return None; - } - - let ans = Slice { - value: elem, - start: self.cur_offset, - end, - }; - self.cur_offset = end; - return Some(ans); - } - - let ans = Slice { - value: &self.vec[self.cur_index], - start: self.cur_offset, - end: self.vec[self.cur_index].atom_len(), - }; - - self.cur_index += 1; - self.cur_offset = 0; - Some(ans) - } -} - -impl + HasLength + Sliceable + Clone, Cfg> Mergable - for RleVecWithIndex -{ - fn is_mergable(&self, _: &Self, _: &Cfg) -> bool { - true - } - - fn merge(&mut self, other: &Self, _: &Cfg) { - for item in other.vec.iter() { - self.push(item.clone()); - } - } -} - -impl + HasLength + Sliceable, Cfg: Clone> Sliceable for RleVecWithIndex { - fn slice(&self, start: usize, end: usize) -> Self { - let mut ans = RleVecWithIndex::new_with_conf(self.cfg.clone()); - for value in self.slice_iter(start, end).map(|x| x.into_inner()) { - ans.push(value); - } - - ans - } -} - -impl HasLength for RleVecWithIndex { - fn content_len(&self) -> usize { - self.atom_len - } - - fn atom_len(&self) -> usize { - self.atom_len - } -} - -impl Deref for RleVecWithIndex { - type Target = [T]; - - fn deref(&self) -> &Self::Target { - self.vec() - } -} - -#[cfg(test)] -mod test { - mod prime_value { - use crate::{Mergable, RleVecWithIndex}; - - impl Mergable for String { - fn is_mergable(&self, _: &Self, _: &()) -> bool { - self.len() < 8 - } - - fn merge(&mut self, other: &Self, _: &()) { - self.push_str(other); - } - } - - #[test] - fn get_at_atom_index() { - let mut vec: RleVecWithIndex = RleVecWithIndex::new(); - vec.push("1234".to_string()); - vec.push("5678".to_string()); - vec.push("12345678".to_string()); - assert_eq!(vec.get(4).unwrap().element, "12345678"); - assert_eq!(vec.get(4).unwrap().merged_index, 0); - assert_eq!(vec.get(4).unwrap().offset, 4); - - assert_eq!(vec.get(8).unwrap().element, "12345678"); - assert_eq!(vec.get(8).unwrap().merged_index, 1); - assert_eq!(vec.get(8).unwrap().offset, 0); - } - - #[test] - fn slice() { - let mut vec: RleVecWithIndex = RleVecWithIndex::new(); - vec.push("1234".to_string()); - vec.push("56".to_string()); - vec.push("78".to_string()); - vec.push("12345678".to_string()); - let mut iter = vec.slice_iter(4, 12); - let first = iter.next().unwrap(); - assert_eq!(first.value, "12345678"); - assert_eq!(first.start, 4); - assert_eq!(first.end, 8); - let second = iter.next().unwrap(); - assert_eq!(second.value, "12345678"); - assert_eq!(second.start, 0); - assert_eq!(second.end, 4); - } - } -}