mirror of
https://github.com/loro-dev/loro.git
synced 2024-11-28 01:06:50 +00:00
feat: rle
This commit is contained in:
commit
2c7e2de763
12 changed files with 295 additions and 0 deletions
2
.editorconfig
Normal file
2
.editorconfig
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[*.rs]
|
||||||
|
indent_size = 4
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
/Cargo.lock
|
2
Cargo.toml
Normal file
2
Cargo.toml
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[workspace]
|
||||||
|
members = ["crates/*"]
|
0
README.md
Normal file
0
README.md
Normal file
9
crates/loro-core/Cargo.toml
Normal file
9
crates/loro-core/Cargo.toml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
[package]
|
||||||
|
name = "loro-core"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
string_cache = "0.8.3"
|
7
crates/loro-core/src/id.rs
Normal file
7
crates/loro-core/src/id.rs
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
pub type ClientID = u64;
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, PartialOrd, Ord)]
|
||||||
|
pub struct ID {
|
||||||
|
client_id: u64,
|
||||||
|
counter: u32,
|
||||||
|
}
|
12
crates/loro-core/src/lib.rs
Normal file
12
crates/loro-core/src/lib.rs
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
#![allow(dead_code, unused_imports)]
|
||||||
|
mod id;
|
||||||
|
mod store;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
#[test]
|
||||||
|
fn it_works() {
|
||||||
|
let result = 2 + 2;
|
||||||
|
assert_eq!(result, 4);
|
||||||
|
}
|
||||||
|
}
|
3
crates/loro-core/src/rle.rs
Normal file
3
crates/loro-core/src/rle.rs
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
struct RleArray<T> {
|
||||||
|
data: Vec<T>,
|
||||||
|
}
|
11
crates/loro-core/src/store.rs
Normal file
11
crates/loro-core/src/store.rs
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use string_cache::{Atom, DefaultAtom, EmptyStaticAtomSet};
|
||||||
|
|
||||||
|
use crate::id::ClientID;
|
||||||
|
|
||||||
|
#[non_exhaustive]
|
||||||
|
struct Change {}
|
||||||
|
|
||||||
|
struct Store {
|
||||||
|
map: HashMap<ClientID, Vec<Change>>,
|
||||||
|
}
|
8
crates/rle/Cargo.toml
Normal file
8
crates/rle/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
[package]
|
||||||
|
name = "rle"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
2
crates/rle/src/lib.rs
Normal file
2
crates/rle/src/lib.rs
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
mod rle;
|
||||||
|
pub use rle::{HasLength, Mergable, RleVec, SearchResult, Sliceable};
|
237
crates/rle/src/rle.rs
Normal file
237
crates/rle/src/rle.rs
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
/// RleVec<T> is a vector that can be compressed using run-length encoding.
|
||||||
|
///
|
||||||
|
/// A T value may be merged with its neighbors. When we push new element, the new value
|
||||||
|
/// may be merged with the last element in the array. Each value has a length, so there
|
||||||
|
/// are two types of indexes:
|
||||||
|
/// 1. (merged) It refers to the index of the merged element.
|
||||||
|
/// 2. (atom) The index of substantial elements. It refers to the index of the atom element.
|
||||||
|
///
|
||||||
|
/// By default, we use atom index in RleVec.
|
||||||
|
/// - len() returns the number of atom elements in the array.
|
||||||
|
/// - get(index) returns the atom element at the index.
|
||||||
|
/// - slice(from, to) returns a slice of atom elements from the index from to the index to.
|
||||||
|
pub struct RleVec<T> {
|
||||||
|
vec: Vec<T>,
|
||||||
|
_len: usize,
|
||||||
|
index: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Mergable {
|
||||||
|
fn is_mergable(&self, other: &Self) -> bool;
|
||||||
|
fn merge(&mut self, other: &Self);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Sliceable {
|
||||||
|
fn slice(&self, start: usize, end: usize) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait HasLength {
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn len(&self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SearchResult<'a, T> {
|
||||||
|
element: &'a T,
|
||||||
|
merged_index: usize,
|
||||||
|
offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Mergable + Sliceable + HasLength> RleVec<T> {
|
||||||
|
/// push a new element to the end of the array. It may be merged with last element.
|
||||||
|
pub fn push(&mut self, value: T) {
|
||||||
|
self._len += value.len();
|
||||||
|
if self.vec.is_empty() {
|
||||||
|
self.vec.push(value);
|
||||||
|
self.index.push(0);
|
||||||
|
self.index.push(self._len);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let last = self.vec.last_mut().unwrap();
|
||||||
|
if last.is_mergable(&value) {
|
||||||
|
last.merge(&value);
|
||||||
|
*self.index.last_mut().unwrap() = self._len;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
self.vec.push(value);
|
||||||
|
self.index.push(self._len);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.vec.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self._len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// get the element at the given atom index.
|
||||||
|
/// return: (element, merged_index, offset)
|
||||||
|
pub fn get(&self, index: usize) -> SearchResult<'_, T> {
|
||||||
|
let mut start = 0;
|
||||||
|
let mut end = self.index.len() - 1;
|
||||||
|
while start < end {
|
||||||
|
let mid = (start + end) / 2;
|
||||||
|
if self.index[mid] == index {
|
||||||
|
start = mid;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.index[mid] < index {
|
||||||
|
start = mid + 1;
|
||||||
|
} else {
|
||||||
|
end = mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if index < self.index[start] {
|
||||||
|
start -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = &self.vec[start];
|
||||||
|
SearchResult {
|
||||||
|
element: value,
|
||||||
|
merged_index: start,
|
||||||
|
offset: index - self.index[start],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// get a slice from `from` to `to` with atom indexes
|
||||||
|
pub fn slice(&self, from: usize, to: usize) -> SliceIterator<'_, T> {
|
||||||
|
let from_result = self.get(from);
|
||||||
|
let to_result = self.get(to);
|
||||||
|
SliceIterator {
|
||||||
|
vec: &self.vec,
|
||||||
|
cur_index: from_result.merged_index,
|
||||||
|
cur_offset: from_result.offset,
|
||||||
|
end_index: to_result.merged_index,
|
||||||
|
end_offset: to_result.offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> RleVec<T> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
RleVec {
|
||||||
|
vec: Vec::new(),
|
||||||
|
_len: 0,
|
||||||
|
index: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Default for RleVec<T> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SliceIterator<'a, T> {
|
||||||
|
vec: &'a Vec<T>,
|
||||||
|
cur_index: usize,
|
||||||
|
cur_offset: usize,
|
||||||
|
end_index: usize,
|
||||||
|
end_offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct Slice<'a, T> {
|
||||||
|
value: &'a T,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
|
||||||
|
type Item = Slice<'a, T>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.cur_index == self.end_index {
|
||||||
|
if self.cur_offset == self.end_offset {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ans = Slice {
|
||||||
|
value: &self.vec[self.cur_index],
|
||||||
|
start: self.cur_offset,
|
||||||
|
end: self.end_offset,
|
||||||
|
};
|
||||||
|
self.cur_offset = self.end_offset;
|
||||||
|
return Some(ans);
|
||||||
|
}
|
||||||
|
|
||||||
|
let ans = Slice {
|
||||||
|
value: &self.vec[self.cur_index],
|
||||||
|
start: self.cur_offset,
|
||||||
|
end: self.vec[self.cur_index].len(),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.cur_index += 1;
|
||||||
|
self.cur_offset = 0;
|
||||||
|
Some(ans)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
mod prime_value {
|
||||||
|
use crate::{HasLength, Mergable, RleVec, Sliceable};
|
||||||
|
|
||||||
|
impl HasLength for String {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Mergable for String {
|
||||||
|
fn is_mergable(&self, _: &Self) -> bool {
|
||||||
|
self.len() < 8
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge(&mut self, other: &Self) {
|
||||||
|
self.push_str(other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sliceable for String {
|
||||||
|
fn slice(&self, start: usize, end: usize) -> Self {
|
||||||
|
self[start..end].to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_at_atom_index() {
|
||||||
|
let mut vec: RleVec<String> = RleVec::new();
|
||||||
|
vec.push("1234".to_string());
|
||||||
|
vec.push("5678".to_string());
|
||||||
|
vec.push("12345678".to_string());
|
||||||
|
assert_eq!(vec.get(4).element, "12345678");
|
||||||
|
assert_eq!(vec.get(4).merged_index, 0);
|
||||||
|
assert_eq!(vec.get(4).offset, 4);
|
||||||
|
|
||||||
|
assert_eq!(vec.get(8).element, "12345678");
|
||||||
|
assert_eq!(vec.get(8).merged_index, 1);
|
||||||
|
assert_eq!(vec.get(8).offset, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn slice() {
|
||||||
|
let mut vec: RleVec<String> = RleVec::new();
|
||||||
|
vec.push("1234".to_string());
|
||||||
|
vec.push("56".to_string());
|
||||||
|
vec.push("78".to_string());
|
||||||
|
vec.push("12345678".to_string());
|
||||||
|
let mut iter = vec.slice(4, 12);
|
||||||
|
let first = iter.next().unwrap();
|
||||||
|
assert_eq!(first.value, "12345678");
|
||||||
|
assert_eq!(first.start, 4);
|
||||||
|
assert_eq!(first.end, 8);
|
||||||
|
let second = iter.next().unwrap();
|
||||||
|
assert_eq!(second.value, "12345678");
|
||||||
|
assert_eq!(second.start, 0);
|
||||||
|
assert_eq!(second.end, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue