perf: optimize encode/decode speed further

This commit is contained in:
Zixuan Chen 2024-10-08 22:53:07 +08:00
parent 27c44ba115
commit ca5da52c44
No known key found for this signature in database
9 changed files with 73 additions and 36 deletions

1
Cargo.lock generated
View file

@ -640,6 +640,7 @@ dependencies = [
"criterion 0.4.0",
"ctor 0.2.6",
"dev-utils",
"ensure-cov",
"flate2",
"loro 0.16.12",
"rand",

View file

@ -17,6 +17,7 @@ tracing = "0.1.40"
criterion = "0.4.0"
rand = "0.8.5"
zstd = "0.13.2"
ensure-cov = { workspace = true }
[dev-dependencies]
flate2 = "1.0"
@ -34,3 +35,6 @@ harness = false
[[bench]]
name = "fork"
harness = false
[profile.bench]
debug = true

View file

@ -1,20 +1,32 @@
use criterion::{criterion_group, criterion_main, Criterion};
use loro::LoroDoc;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use loro::{LoroDoc, LoroText};
fn bench_fork(c: &mut Criterion) {
{
let mut b = c.benchmark_group("fork");
b.bench_function("fork 1000 times with text edit at each fork", |b| {
b.iter(|| {
let mut doc = LoroDoc::new();
for _ in 0..1000 {
let text = doc.get_text("text");
text.insert(0, "Hi").unwrap();
doc = doc.fork();
}
});
});
}
let mut b = c.benchmark_group("fork");
b.bench_function("fork with text edit at each fork", |b| {
let snapshot = {
let doc = LoroDoc::new();
let map = doc.get_map("map");
for i in 0..10000 {
let text = map
.insert_container(&i.to_string(), LoroText::new())
.unwrap();
text.insert(0, &i.to_string()).unwrap();
}
doc.export(loro::ExportMode::Snapshot).unwrap()
};
b.iter_with_setup(
|| {
let doc = LoroDoc::new();
doc.import(&snapshot).unwrap();
doc.get_text("text").insert(0, "123").unwrap();
doc
},
|doc| {
black_box(doc.fork());
},
);
});
}
criterion_group!(benches, bench_fork);

View file

@ -1,10 +1,23 @@
use loro::LoroDoc;
use criterion::black_box;
use loro::{LoroDoc, LoroText};
fn main() {
let snapshot = {
let doc = LoroDoc::new();
let map = doc.get_map("map");
for i in 0..10000 {
let text = map
.insert_container(&i.to_string(), LoroText::new())
.unwrap();
text.insert(0, &i.to_string()).unwrap();
}
doc.export(loro::ExportMode::Snapshot).unwrap()
};
let mut doc = LoroDoc::new();
for _ in 0..10_000 {
let text = doc.get_text("text");
text.insert(0, "Hi").unwrap();
doc = doc.fork();
doc.import(&snapshot).unwrap();
for _ in 0..1000 {
doc.get_text("text").insert(0, "123").unwrap();
doc = black_box(doc.fork());
}
ensure_cov::assert_cov("kv-store::mem_store::export_with_encoded_block");
}

View file

@ -165,8 +165,8 @@ impl Block{
pub fn encode(&self, w: &mut Vec<u8>, compression_type: CompressionType)->CompressionType{
match self{
Block::Normal(block)=>block.encode(w,compression_type),
Block::Large(block)=>block.encode(w,compression_type),
Block::Normal(block) => block.encode(w,compression_type),
Block::Large(block) => block.encode(w,compression_type),
}
}

View file

@ -3,7 +3,6 @@ use crate::compress::CompressionType;
use crate::sstable::{SsTable, SsTableBuilder, SsTableIter};
use crate::{KvIterator, MergeIterator};
use bytes::Bytes;
use tracing::trace;
use std::ops::Bound;
use std::{cmp::Ordering, collections::BTreeMap};
@ -229,7 +228,6 @@ impl MemKvStore {
builder.add(k, v);
}
builder.finish_current_block();
if builder.is_empty() {
return Bytes::new();
}
@ -251,7 +249,8 @@ impl MemKvStore {
}
#[tracing::instrument(level = "debug", skip(self))]
fn export_with_encoded_block(&self) -> Bytes {
fn export_with_encoded_block(&mut self) -> Bytes {
ensure_cov::notify_cov("kv-store::mem_store::export_with_encoded_block");
let mut mem_iter = self.mem_table.iter().peekable();
let mut sstable_iter = self.ss_table[0].iter();
let mut builder = SsTableBuilder::new(
@ -267,7 +266,6 @@ impl MemKvStore {
continue 'outer;
};
if block.last_key() < next_mem_pair.0 {
builder.finish_current_block();
builder.add_new_block(block.clone());
sstable_iter.next_block();
continue;
@ -281,7 +279,6 @@ impl MemKvStore {
continue;
}
trace!("parse block one by one");
// There are overlap between next_mem_pair and block
let mut iter = BlockIter::new(block.clone());
let mut next_mem_pair = mem_iter.peek();
@ -322,7 +319,6 @@ impl MemKvStore {
sstable_iter.next_block();
}
builder.finish_current_block();
while let Some(block) = sstable_iter.peek_next_block() {
builder.add_new_block(block.clone());
sstable_iter.next_block();
@ -332,7 +328,12 @@ impl MemKvStore {
return Bytes::new();
}
builder.build().export_all()
drop(mem_iter);
self.mem_table.clear();
let ss = builder.build();
let ans = ss.export_all();
let _ = std::mem::replace(&mut self.ss_table, vec![ss]);
ans
}
#[allow(unused)]

View file

@ -20,6 +20,7 @@ pub const SIZE_OF_U32: usize = std::mem::size_of::<u32>();
const DEFAULT_CACHE_SIZE: usize = 1 << 20;
const MAX_BLOCK_NUM: u32 = 10_000_000;
/// ```log
/// ┌──────────────────────────────────────────────────────────────────────────────────────────┐
/// │ Block Meta │
/// │┌ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ┐ │
@ -27,6 +28,7 @@ const MAX_BLOCK_NUM: u32 = 10_000_000;
/// ││ u32 │ u16 │ bytes │ u8 │ u16(option) │bytes(option)│ │
/// │ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
/// └──────────────────────────────────────────────────────────────────────────────────────────┘
/// ```
#[derive(Debug, Clone)]
pub(crate) struct BlockMeta {
offset: usize,
@ -188,7 +190,7 @@ impl SsTableBuilder {
}
pub fn is_empty(&self) -> bool {
self.meta.is_empty()
self.meta.is_empty() && self.block_builder.is_empty()
}
pub(crate) fn finish_current_block(&mut self) {

View file

@ -789,6 +789,8 @@ impl DocState {
/// Ensure all alive containers are created in DocState and will be encoded in the next `encode` call
pub(crate) fn ensure_all_alive_containers(&mut self) -> FxHashSet<ContainerID> {
// TODO: PERF This can be optimized because we shouldn't need to call get_value for
// all the containers every time we export
let ans = self.get_all_alive_containers();
for id in ans.iter() {
self.ensure_container(id);

View file

@ -141,11 +141,12 @@ impl InnerStore {
for (k, v) in iter {
count += 1;
let cid = ContainerID::from_bytes(&k);
let parent = ContainerWrapper::decode_parent(&v);
let c = ContainerWrapper::new_from_bytes(v);
let parent = c.parent();
let idx = self.arena.register_container(&cid);
let p = parent.as_ref().map(|p| self.arena.register_container(p));
self.arena.set_parent(idx, p);
if self.store.remove(&idx).is_some() {
if self.store.insert(idx, c).is_some() {
count -= 1;
}
}
@ -153,7 +154,7 @@ impl InnerStore {
self.len = count;
});
self.all_loaded = false;
self.all_loaded = true;
Ok(fr)
}
@ -174,11 +175,12 @@ impl InnerStore {
for (k, v) in iter {
count += 1;
let cid = ContainerID::from_bytes(&k);
let parent = ContainerWrapper::decode_parent(&v);
let c = ContainerWrapper::new_from_bytes(v);
let parent = c.parent();
let idx = self.arena.register_container(&cid);
let p = parent.as_ref().map(|p| self.arena.register_container(p));
self.arena.set_parent(idx, p);
if self.store.remove(&idx).is_some() {
if self.store.insert(idx, c).is_some() {
count -= 1;
}
}
@ -186,7 +188,7 @@ impl InnerStore {
self.len = count;
});
self.all_loaded = false;
self.all_loaded = true;
Ok(())
}