From 2c7e2de7639b14b3be1dbb8caeec21d4615bd792 Mon Sep 17 00:00:00 2001 From: Zixuan Chen Date: Wed, 13 Jul 2022 00:47:41 +0800 Subject: [PATCH] feat: rle --- .editorconfig | 2 + .gitignore | 2 + Cargo.toml | 2 + README.md | 0 crates/loro-core/Cargo.toml | 9 ++ crates/loro-core/src/id.rs | 7 + crates/loro-core/src/lib.rs | 12 ++ crates/loro-core/src/rle.rs | 3 + crates/loro-core/src/store.rs | 11 ++ crates/rle/Cargo.toml | 8 ++ crates/rle/src/lib.rs | 2 + crates/rle/src/rle.rs | 237 ++++++++++++++++++++++++++++++++++ 12 files changed, 295 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 crates/loro-core/Cargo.toml create mode 100644 crates/loro-core/src/id.rs create mode 100644 crates/loro-core/src/lib.rs create mode 100644 crates/loro-core/src/rle.rs create mode 100644 crates/loro-core/src/store.rs create mode 100644 crates/rle/Cargo.toml create mode 100644 crates/rle/src/lib.rs create mode 100644 crates/rle/src/rle.rs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..1c9705cb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,2 @@ +[*.rs] +indent_size = 4 diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..4fffb2f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..c66a4d73 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,2 @@ +[workspace] +members = ["crates/*"] diff --git a/README.md b/README.md new file mode 100644 index 00000000..e69de29b diff --git a/crates/loro-core/Cargo.toml b/crates/loro-core/Cargo.toml new file mode 100644 index 00000000..bdb3c7ac --- /dev/null +++ b/crates/loro-core/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "loro-core" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +string_cache = "0.8.3" diff --git a/crates/loro-core/src/id.rs b/crates/loro-core/src/id.rs new file mode 100644 index 00000000..eda131a9 --- /dev/null +++ b/crates/loro-core/src/id.rs @@ -0,0 +1,7 @@ +pub type ClientID = u64; + +#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, PartialOrd, Ord)] +pub struct ID { + client_id: u64, + counter: u32, +} diff --git a/crates/loro-core/src/lib.rs b/crates/loro-core/src/lib.rs new file mode 100644 index 00000000..b31042e6 --- /dev/null +++ b/crates/loro-core/src/lib.rs @@ -0,0 +1,12 @@ +#![allow(dead_code, unused_imports)] +mod id; +mod store; + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/crates/loro-core/src/rle.rs b/crates/loro-core/src/rle.rs new file mode 100644 index 00000000..8db75e71 --- /dev/null +++ b/crates/loro-core/src/rle.rs @@ -0,0 +1,3 @@ +struct RleArray { + data: Vec, +} diff --git a/crates/loro-core/src/store.rs b/crates/loro-core/src/store.rs new file mode 100644 index 00000000..cdad02c0 --- /dev/null +++ b/crates/loro-core/src/store.rs @@ -0,0 +1,11 @@ +use std::collections::HashMap; +use string_cache::{Atom, DefaultAtom, EmptyStaticAtomSet}; + +use crate::id::ClientID; + +#[non_exhaustive] +struct Change {} + +struct Store { + map: HashMap>, +} diff --git a/crates/rle/Cargo.toml b/crates/rle/Cargo.toml new file mode 100644 index 00000000..19ed8d08 --- /dev/null +++ b/crates/rle/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "rle" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/crates/rle/src/lib.rs b/crates/rle/src/lib.rs new file mode 100644 index 00000000..69fc2126 --- /dev/null +++ b/crates/rle/src/lib.rs @@ -0,0 +1,2 @@ +mod rle; +pub use rle::{HasLength, Mergable, RleVec, SearchResult, Sliceable}; diff --git a/crates/rle/src/rle.rs b/crates/rle/src/rle.rs new file mode 100644 index 00000000..1b8e7567 --- /dev/null +++ b/crates/rle/src/rle.rs @@ -0,0 +1,237 @@ +/// RleVec is a vector that can be compressed using run-length encoding. +/// +/// A T value may be merged with its neighbors. When we push new element, the new value +/// may be merged with the last element in the array. Each value has a length, so there +/// are two types of indexes: +/// 1. (merged) It refers to the index of the merged element. +/// 2. (atom) The index of substantial elements. It refers to the index of the atom element. +/// +/// By default, we use atom index in RleVec. +/// - len() returns the number of atom elements in the array. +/// - get(index) returns the atom element at the index. +/// - slice(from, to) returns a slice of atom elements from the index from to the index to. +pub struct RleVec { + vec: Vec, + _len: usize, + index: Vec, +} + +pub trait Mergable { + fn is_mergable(&self, other: &Self) -> bool; + fn merge(&mut self, other: &Self); +} + +pub trait Sliceable { + fn slice(&self, start: usize, end: usize) -> Self; +} + +pub trait HasLength { + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn len(&self) -> usize; +} + +pub struct SearchResult<'a, T> { + element: &'a T, + merged_index: usize, + offset: usize, +} + +impl RleVec { + /// push a new element to the end of the array. It may be merged with last element. + pub fn push(&mut self, value: T) { + self._len += value.len(); + if self.vec.is_empty() { + self.vec.push(value); + self.index.push(0); + self.index.push(self._len); + return; + } + + let last = self.vec.last_mut().unwrap(); + if last.is_mergable(&value) { + last.merge(&value); + *self.index.last_mut().unwrap() = self._len; + return; + } + self.vec.push(value); + self.index.push(self._len); + } + + pub fn is_empty(&self) -> bool { + self.vec.is_empty() + } + + pub fn len(&self) -> usize { + self._len + } + + /// get the element at the given atom index. + /// return: (element, merged_index, offset) + pub fn get(&self, index: usize) -> SearchResult<'_, T> { + let mut start = 0; + let mut end = self.index.len() - 1; + while start < end { + let mid = (start + end) / 2; + if self.index[mid] == index { + start = mid; + break; + } + + if self.index[mid] < index { + start = mid + 1; + } else { + end = mid; + } + } + + if index < self.index[start] { + start -= 1; + } + + let value = &self.vec[start]; + SearchResult { + element: value, + merged_index: start, + offset: index - self.index[start], + } + } + + /// get a slice from `from` to `to` with atom indexes + pub fn slice(&self, from: usize, to: usize) -> SliceIterator<'_, T> { + let from_result = self.get(from); + let to_result = self.get(to); + SliceIterator { + vec: &self.vec, + cur_index: from_result.merged_index, + cur_offset: from_result.offset, + end_index: to_result.merged_index, + end_offset: to_result.offset, + } + } +} + +impl RleVec { + pub fn new() -> Self { + RleVec { + vec: Vec::new(), + _len: 0, + index: Vec::new(), + } + } +} + +impl Default for RleVec { + fn default() -> Self { + Self::new() + } +} + +pub struct SliceIterator<'a, T> { + vec: &'a Vec, + cur_index: usize, + cur_offset: usize, + end_index: usize, + end_offset: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct Slice<'a, T> { + value: &'a T, + start: usize, + end: usize, +} + +impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> { + type Item = Slice<'a, T>; + + fn next(&mut self) -> Option { + if self.cur_index == self.end_index { + if self.cur_offset == self.end_offset { + return None; + } + + let ans = Slice { + value: &self.vec[self.cur_index], + start: self.cur_offset, + end: self.end_offset, + }; + self.cur_offset = self.end_offset; + return Some(ans); + } + + let ans = Slice { + value: &self.vec[self.cur_index], + start: self.cur_offset, + end: self.vec[self.cur_index].len(), + }; + + self.cur_index += 1; + self.cur_offset = 0; + Some(ans) + } +} + +#[cfg(test)] +mod test { + mod prime_value { + use crate::{HasLength, Mergable, RleVec, Sliceable}; + + impl HasLength for String { + fn len(&self) -> usize { + self.len() + } + } + + impl Mergable for String { + fn is_mergable(&self, _: &Self) -> bool { + self.len() < 8 + } + + fn merge(&mut self, other: &Self) { + self.push_str(other); + } + } + + impl Sliceable for String { + fn slice(&self, start: usize, end: usize) -> Self { + self[start..end].to_string() + } + } + + #[test] + fn get_at_atom_index() { + let mut vec: RleVec = RleVec::new(); + vec.push("1234".to_string()); + vec.push("5678".to_string()); + vec.push("12345678".to_string()); + assert_eq!(vec.get(4).element, "12345678"); + assert_eq!(vec.get(4).merged_index, 0); + assert_eq!(vec.get(4).offset, 4); + + assert_eq!(vec.get(8).element, "12345678"); + assert_eq!(vec.get(8).merged_index, 1); + assert_eq!(vec.get(8).offset, 0); + } + + #[test] + fn slice() { + let mut vec: RleVec = RleVec::new(); + vec.push("1234".to_string()); + vec.push("56".to_string()); + vec.push("78".to_string()); + vec.push("12345678".to_string()); + let mut iter = vec.slice(4, 12); + let first = iter.next().unwrap(); + assert_eq!(first.value, "12345678"); + assert_eq!(first.start, 4); + assert_eq!(first.end, 8); + let second = iter.next().unwrap(); + assert_eq!(second.value, "12345678"); + assert_eq!(second.start, 0); + assert_eq!(second.end, 4); + } + } +}