mirror of
https://github.com/loro-dev/loro.git
synced 2025-02-02 11:06:14 +00:00
refactor(bytes): refine interface
This commit is contained in:
parent
8704d22750
commit
f604a89fc3
3 changed files with 62 additions and 21 deletions
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -291,6 +291,14 @@ dependencies = [
|
|||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "compact-bytes"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"append-only-bytes",
|
||||
"fxhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console_error_panic_hook"
|
||||
version = "0.1.7"
|
||||
|
|
|
@ -39,12 +39,11 @@ So it will break the bytes into small pieces to reuse them.
|
|||
|
||||
```rust
|
||||
use compact_bytes::CompactBytes;
|
||||
use append_only_bytes::BytesSlice;
|
||||
|
||||
let mut arena = CompactBytes::new();
|
||||
let bytes1 = arena.alloc(b"hello");
|
||||
// it breaks the bytes into 3 pieces "hi ", "hello", " world"
|
||||
let bytes2: Vec<BytesSlice> = arena.alloc_advance(b"hi hello world");
|
||||
let bytes2: Vec<Range<usize>> = arena.alloc_advance(b"hi hello world");
|
||||
```
|
||||
|
||||
Or you can use `append` to not reuse the old bytes at all.
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#![doc = include_str!("../README.md")]
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use append_only_bytes::{AppendOnlyBytes, BytesSlice};
|
||||
use fxhash::FxHashMap;
|
||||
|
||||
// One entry in the hashtable will take 16bytes. And we need one entry for every position in the document.
|
||||
// So the size of the hashtable will be 16 * document_size.
|
||||
// One entry in the hashtable will take 16 ~ 32 bytes. And we need one entry for every position in the document.
|
||||
// So the size of the hashtable will be (16 ~ 32) * document_size.
|
||||
pub struct CompactBytes {
|
||||
bytes: AppendOnlyBytes,
|
||||
/// map 4 bytes to position in the document
|
||||
|
@ -34,14 +36,18 @@ impl CompactBytes {
|
|||
self.append(bytes)
|
||||
}
|
||||
|
||||
pub fn alloc_advance(&mut self, bytes: &[u8]) -> Vec<BytesSlice> {
|
||||
// ans is Vec<(from_index, to_index)>
|
||||
let mut ans: Vec<(usize, usize)> = vec![];
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
self.bytes.as_bytes()
|
||||
}
|
||||
|
||||
fn push(ans: &mut Vec<(usize, usize)>, new: (usize, usize)) {
|
||||
pub fn alloc_advance(&mut self, bytes: &[u8]) -> Vec<Range<usize>> {
|
||||
let old_len = self.bytes.len();
|
||||
let mut ans: Vec<Range<usize>> = vec![];
|
||||
// this push will try to merge the new range with the last range in the ans
|
||||
fn push_with_merge(ans: &mut Vec<Range<usize>>, new: Range<usize>) {
|
||||
if let Some(last) = ans.last_mut() {
|
||||
if last.1 == new.0 {
|
||||
last.1 = new.1;
|
||||
if last.end == new.start {
|
||||
last.end = new.end;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -53,20 +59,20 @@ impl CompactBytes {
|
|||
while index < bytes.len() {
|
||||
match self.lookup(&bytes[index..]) {
|
||||
Some((pos, len)) => {
|
||||
push(&mut ans, (pos, pos + len));
|
||||
push_with_merge(&mut ans, pos..pos + len);
|
||||
index += len;
|
||||
}
|
||||
None => {
|
||||
push(&mut ans, (self.bytes.len(), self.bytes.len() + 1));
|
||||
push_with_merge(&mut ans, self.bytes.len()..self.bytes.len() + 1);
|
||||
self.bytes.push(bytes[index]);
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ans.into_iter()
|
||||
.map(|(from, to)| self.bytes.slice(from..to))
|
||||
.collect()
|
||||
self.append_new_entries_to_map(old_len);
|
||||
|
||||
ans
|
||||
}
|
||||
|
||||
pub fn append(&mut self, bytes: &[u8]) -> BytesSlice {
|
||||
|
@ -81,8 +87,16 @@ impl CompactBytes {
|
|||
// if old doc = "", append "0123", then we need to add "0123" entry to the map
|
||||
// if old doc = "0123", append "x", then we need to add "123x" entry to the map
|
||||
// if old doc = "0123", append "xyz", then we need to add "123x", "23xy", "3xyz" entries to the map
|
||||
let mut key = 0;
|
||||
let mut is_first = true;
|
||||
for i in old_len.saturating_sub(3)..self.bytes.len().saturating_sub(3) {
|
||||
let key = to_key(&self.bytes[i..i + 4]);
|
||||
if is_first {
|
||||
key = to_key(&self.bytes[i..i + 4]);
|
||||
is_first = false;
|
||||
} else {
|
||||
key = (key << 8) | self.bytes[i + 3] as u32;
|
||||
}
|
||||
|
||||
self.map.insert(key, i as u32);
|
||||
}
|
||||
}
|
||||
|
@ -97,7 +111,7 @@ impl CompactBytes {
|
|||
let key = to_key(bytes);
|
||||
match self.map.get(&key).copied() {
|
||||
Some(pos) => {
|
||||
let mut pos = pos as usize;
|
||||
let pos = pos as usize;
|
||||
let mut len = 4;
|
||||
while pos + len < self.bytes.len()
|
||||
&& len < bytes.len()
|
||||
|
@ -149,13 +163,33 @@ mod tests {
|
|||
let mut bytes = CompactBytes::new();
|
||||
bytes.append(b"123456789");
|
||||
let ans = bytes.alloc_advance(b"haha12345567891234");
|
||||
assert_eq!(ans.len(), 4);
|
||||
assert_eq!(ans[0].len(), 4);
|
||||
assert_eq!(ans[0].start(), 9);
|
||||
assert_eq!(ans[0].start, 9);
|
||||
assert_eq!(ans[1].len(), 5);
|
||||
assert_eq!(ans[1].start(), 0);
|
||||
assert_eq!(ans[1].start, 0);
|
||||
assert_eq!(ans[2].len(), 5);
|
||||
assert_eq!(ans[2].start(), 4);
|
||||
assert_eq!(ans[2].start, 4);
|
||||
assert_eq!(ans[3].len(), 4);
|
||||
assert_eq!(ans[3].start(), 0);
|
||||
assert_eq!(ans[3].start, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn advance_alloc_should_be_indexed_as_well() {
|
||||
let mut bytes = CompactBytes::new();
|
||||
bytes.alloc_advance(b"1234");
|
||||
let a = bytes.alloc(b"1234");
|
||||
assert_eq!(a.start(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn advance_should_use_longer_match() {
|
||||
let mut bytes = CompactBytes::new();
|
||||
bytes.append(b"1234kk 123456 1234xyz");
|
||||
let ans = bytes.alloc_advance(b"012345678");
|
||||
assert_eq!(ans.len(), 3);
|
||||
assert_eq!(ans[0].len(), 1);
|
||||
assert_eq!(ans[1].len(), 6);
|
||||
assert_eq!(ans[2].len(), 2);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue