mirror of
https://github.com/loro-dev/loro.git
synced 2024-11-24 04:09:42 +00:00
feat: rle
This commit is contained in:
commit
2c7e2de763
12 changed files with 295 additions and 0 deletions
2
.editorconfig
Normal file
2
.editorconfig
Normal file
|
@ -0,0 +1,2 @@
|
|||
[*.rs]
|
||||
indent_size = 4
|
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
/Cargo.lock
|
2
Cargo.toml
Normal file
2
Cargo.toml
Normal file
|
@ -0,0 +1,2 @@
|
|||
[workspace]
|
||||
members = ["crates/*"]
|
0
README.md
Normal file
0
README.md
Normal file
9
crates/loro-core/Cargo.toml
Normal file
9
crates/loro-core/Cargo.toml
Normal file
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
name = "loro-core"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
string_cache = "0.8.3"
|
7
crates/loro-core/src/id.rs
Normal file
7
crates/loro-core/src/id.rs
Normal file
|
@ -0,0 +1,7 @@
|
|||
pub type ClientID = u64;
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, PartialOrd, Ord)]
|
||||
pub struct ID {
|
||||
client_id: u64,
|
||||
counter: u32,
|
||||
}
|
12
crates/loro-core/src/lib.rs
Normal file
12
crates/loro-core/src/lib.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
#![allow(dead_code, unused_imports)]
|
||||
mod id;
|
||||
mod store;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let result = 2 + 2;
|
||||
assert_eq!(result, 4);
|
||||
}
|
||||
}
|
3
crates/loro-core/src/rle.rs
Normal file
3
crates/loro-core/src/rle.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
struct RleArray<T> {
|
||||
data: Vec<T>,
|
||||
}
|
11
crates/loro-core/src/store.rs
Normal file
11
crates/loro-core/src/store.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
use std::collections::HashMap;
|
||||
use string_cache::{Atom, DefaultAtom, EmptyStaticAtomSet};
|
||||
|
||||
use crate::id::ClientID;
|
||||
|
||||
#[non_exhaustive]
|
||||
struct Change {}
|
||||
|
||||
struct Store {
|
||||
map: HashMap<ClientID, Vec<Change>>,
|
||||
}
|
8
crates/rle/Cargo.toml
Normal file
8
crates/rle/Cargo.toml
Normal file
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "rle"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
2
crates/rle/src/lib.rs
Normal file
2
crates/rle/src/lib.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
mod rle;
|
||||
pub use rle::{HasLength, Mergable, RleVec, SearchResult, Sliceable};
|
237
crates/rle/src/rle.rs
Normal file
237
crates/rle/src/rle.rs
Normal file
|
@ -0,0 +1,237 @@
|
|||
/// RleVec<T> is a vector that can be compressed using run-length encoding.
|
||||
///
|
||||
/// A T value may be merged with its neighbors. When we push new element, the new value
|
||||
/// may be merged with the last element in the array. Each value has a length, so there
|
||||
/// are two types of indexes:
|
||||
/// 1. (merged) It refers to the index of the merged element.
|
||||
/// 2. (atom) The index of substantial elements. It refers to the index of the atom element.
|
||||
///
|
||||
/// By default, we use atom index in RleVec.
|
||||
/// - len() returns the number of atom elements in the array.
|
||||
/// - get(index) returns the atom element at the index.
|
||||
/// - slice(from, to) returns a slice of atom elements from the index from to the index to.
|
||||
pub struct RleVec<T> {
|
||||
vec: Vec<T>,
|
||||
_len: usize,
|
||||
index: Vec<usize>,
|
||||
}
|
||||
|
||||
pub trait Mergable {
|
||||
fn is_mergable(&self, other: &Self) -> bool;
|
||||
fn merge(&mut self, other: &Self);
|
||||
}
|
||||
|
||||
pub trait Sliceable {
|
||||
fn slice(&self, start: usize, end: usize) -> Self;
|
||||
}
|
||||
|
||||
pub trait HasLength {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
fn len(&self) -> usize;
|
||||
}
|
||||
|
||||
pub struct SearchResult<'a, T> {
|
||||
element: &'a T,
|
||||
merged_index: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl<T: Mergable + Sliceable + HasLength> RleVec<T> {
|
||||
/// push a new element to the end of the array. It may be merged with last element.
|
||||
pub fn push(&mut self, value: T) {
|
||||
self._len += value.len();
|
||||
if self.vec.is_empty() {
|
||||
self.vec.push(value);
|
||||
self.index.push(0);
|
||||
self.index.push(self._len);
|
||||
return;
|
||||
}
|
||||
|
||||
let last = self.vec.last_mut().unwrap();
|
||||
if last.is_mergable(&value) {
|
||||
last.merge(&value);
|
||||
*self.index.last_mut().unwrap() = self._len;
|
||||
return;
|
||||
}
|
||||
self.vec.push(value);
|
||||
self.index.push(self._len);
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.vec.is_empty()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self._len
|
||||
}
|
||||
|
||||
/// get the element at the given atom index.
|
||||
/// return: (element, merged_index, offset)
|
||||
pub fn get(&self, index: usize) -> SearchResult<'_, T> {
|
||||
let mut start = 0;
|
||||
let mut end = self.index.len() - 1;
|
||||
while start < end {
|
||||
let mid = (start + end) / 2;
|
||||
if self.index[mid] == index {
|
||||
start = mid;
|
||||
break;
|
||||
}
|
||||
|
||||
if self.index[mid] < index {
|
||||
start = mid + 1;
|
||||
} else {
|
||||
end = mid;
|
||||
}
|
||||
}
|
||||
|
||||
if index < self.index[start] {
|
||||
start -= 1;
|
||||
}
|
||||
|
||||
let value = &self.vec[start];
|
||||
SearchResult {
|
||||
element: value,
|
||||
merged_index: start,
|
||||
offset: index - self.index[start],
|
||||
}
|
||||
}
|
||||
|
||||
/// get a slice from `from` to `to` with atom indexes
|
||||
pub fn slice(&self, from: usize, to: usize) -> SliceIterator<'_, T> {
|
||||
let from_result = self.get(from);
|
||||
let to_result = self.get(to);
|
||||
SliceIterator {
|
||||
vec: &self.vec,
|
||||
cur_index: from_result.merged_index,
|
||||
cur_offset: from_result.offset,
|
||||
end_index: to_result.merged_index,
|
||||
end_offset: to_result.offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> RleVec<T> {
|
||||
pub fn new() -> Self {
|
||||
RleVec {
|
||||
vec: Vec::new(),
|
||||
_len: 0,
|
||||
index: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for RleVec<T> {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SliceIterator<'a, T> {
|
||||
vec: &'a Vec<T>,
|
||||
cur_index: usize,
|
||||
cur_offset: usize,
|
||||
end_index: usize,
|
||||
end_offset: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Slice<'a, T> {
|
||||
value: &'a T,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
|
||||
type Item = Slice<'a, T>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.cur_index == self.end_index {
|
||||
if self.cur_offset == self.end_offset {
|
||||
return None;
|
||||
}
|
||||
|
||||
let ans = Slice {
|
||||
value: &self.vec[self.cur_index],
|
||||
start: self.cur_offset,
|
||||
end: self.end_offset,
|
||||
};
|
||||
self.cur_offset = self.end_offset;
|
||||
return Some(ans);
|
||||
}
|
||||
|
||||
let ans = Slice {
|
||||
value: &self.vec[self.cur_index],
|
||||
start: self.cur_offset,
|
||||
end: self.vec[self.cur_index].len(),
|
||||
};
|
||||
|
||||
self.cur_index += 1;
|
||||
self.cur_offset = 0;
|
||||
Some(ans)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
mod prime_value {
|
||||
use crate::{HasLength, Mergable, RleVec, Sliceable};
|
||||
|
||||
impl HasLength for String {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Mergable for String {
|
||||
fn is_mergable(&self, _: &Self) -> bool {
|
||||
self.len() < 8
|
||||
}
|
||||
|
||||
fn merge(&mut self, other: &Self) {
|
||||
self.push_str(other);
|
||||
}
|
||||
}
|
||||
|
||||
impl Sliceable for String {
|
||||
fn slice(&self, start: usize, end: usize) -> Self {
|
||||
self[start..end].to_string()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_at_atom_index() {
|
||||
let mut vec: RleVec<String> = RleVec::new();
|
||||
vec.push("1234".to_string());
|
||||
vec.push("5678".to_string());
|
||||
vec.push("12345678".to_string());
|
||||
assert_eq!(vec.get(4).element, "12345678");
|
||||
assert_eq!(vec.get(4).merged_index, 0);
|
||||
assert_eq!(vec.get(4).offset, 4);
|
||||
|
||||
assert_eq!(vec.get(8).element, "12345678");
|
||||
assert_eq!(vec.get(8).merged_index, 1);
|
||||
assert_eq!(vec.get(8).offset, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slice() {
|
||||
let mut vec: RleVec<String> = RleVec::new();
|
||||
vec.push("1234".to_string());
|
||||
vec.push("56".to_string());
|
||||
vec.push("78".to_string());
|
||||
vec.push("12345678".to_string());
|
||||
let mut iter = vec.slice(4, 12);
|
||||
let first = iter.next().unwrap();
|
||||
assert_eq!(first.value, "12345678");
|
||||
assert_eq!(first.start, 4);
|
||||
assert_eq!(first.end, 8);
|
||||
let second = iter.next().unwrap();
|
||||
assert_eq!(second.value, "12345678");
|
||||
assert_eq!(second.start, 0);
|
||||
assert_eq!(second.end, 4);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue