perf: optimize encode/decode speed further

This commit is contained in:
Zixuan Chen 2024-10-08 22:53:07 +08:00
parent 27c44ba115
commit ca5da52c44
No known key found for this signature in database
9 changed files with 73 additions and 36 deletions

1
Cargo.lock generated
View file

@ -640,6 +640,7 @@ dependencies = [
"criterion 0.4.0", "criterion 0.4.0",
"ctor 0.2.6", "ctor 0.2.6",
"dev-utils", "dev-utils",
"ensure-cov",
"flate2", "flate2",
"loro 0.16.12", "loro 0.16.12",
"rand", "rand",

View file

@ -17,6 +17,7 @@ tracing = "0.1.40"
criterion = "0.4.0" criterion = "0.4.0"
rand = "0.8.5" rand = "0.8.5"
zstd = "0.13.2" zstd = "0.13.2"
ensure-cov = { workspace = true }
[dev-dependencies] [dev-dependencies]
flate2 = "1.0" flate2 = "1.0"
@ -34,3 +35,6 @@ harness = false
[[bench]] [[bench]]
name = "fork" name = "fork"
harness = false harness = false
[profile.bench]
debug = true

View file

@ -1,20 +1,32 @@
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{black_box, criterion_group, criterion_main, Criterion};
use loro::LoroDoc; use loro::{LoroDoc, LoroText};
fn bench_fork(c: &mut Criterion) { fn bench_fork(c: &mut Criterion) {
{ let mut b = c.benchmark_group("fork");
let mut b = c.benchmark_group("fork"); b.bench_function("fork with text edit at each fork", |b| {
b.bench_function("fork 1000 times with text edit at each fork", |b| { let snapshot = {
b.iter(|| { let doc = LoroDoc::new();
let mut doc = LoroDoc::new(); let map = doc.get_map("map");
for _ in 0..1000 { for i in 0..10000 {
let text = doc.get_text("text"); let text = map
text.insert(0, "Hi").unwrap(); .insert_container(&i.to_string(), LoroText::new())
doc = doc.fork(); .unwrap();
} text.insert(0, &i.to_string()).unwrap();
}); }
}); doc.export(loro::ExportMode::Snapshot).unwrap()
} };
b.iter_with_setup(
|| {
let doc = LoroDoc::new();
doc.import(&snapshot).unwrap();
doc.get_text("text").insert(0, "123").unwrap();
doc
},
|doc| {
black_box(doc.fork());
},
);
});
} }
criterion_group!(benches, bench_fork); criterion_group!(benches, bench_fork);

View file

@ -1,10 +1,23 @@
use loro::LoroDoc; use criterion::black_box;
use loro::{LoroDoc, LoroText};
fn main() { fn main() {
let snapshot = {
let doc = LoroDoc::new();
let map = doc.get_map("map");
for i in 0..10000 {
let text = map
.insert_container(&i.to_string(), LoroText::new())
.unwrap();
text.insert(0, &i.to_string()).unwrap();
}
doc.export(loro::ExportMode::Snapshot).unwrap()
};
let mut doc = LoroDoc::new(); let mut doc = LoroDoc::new();
for _ in 0..10_000 { doc.import(&snapshot).unwrap();
let text = doc.get_text("text"); for _ in 0..1000 {
text.insert(0, "Hi").unwrap(); doc.get_text("text").insert(0, "123").unwrap();
doc = doc.fork(); doc = black_box(doc.fork());
} }
ensure_cov::assert_cov("kv-store::mem_store::export_with_encoded_block");
} }

View file

@ -165,8 +165,8 @@ impl Block{
pub fn encode(&self, w: &mut Vec<u8>, compression_type: CompressionType)->CompressionType{ pub fn encode(&self, w: &mut Vec<u8>, compression_type: CompressionType)->CompressionType{
match self{ match self{
Block::Normal(block)=>block.encode(w,compression_type), Block::Normal(block) => block.encode(w,compression_type),
Block::Large(block)=>block.encode(w,compression_type), Block::Large(block) => block.encode(w,compression_type),
} }
} }

View file

@ -3,7 +3,6 @@ use crate::compress::CompressionType;
use crate::sstable::{SsTable, SsTableBuilder, SsTableIter}; use crate::sstable::{SsTable, SsTableBuilder, SsTableIter};
use crate::{KvIterator, MergeIterator}; use crate::{KvIterator, MergeIterator};
use bytes::Bytes; use bytes::Bytes;
use tracing::trace;
use std::ops::Bound; use std::ops::Bound;
use std::{cmp::Ordering, collections::BTreeMap}; use std::{cmp::Ordering, collections::BTreeMap};
@ -229,7 +228,6 @@ impl MemKvStore {
builder.add(k, v); builder.add(k, v);
} }
builder.finish_current_block();
if builder.is_empty() { if builder.is_empty() {
return Bytes::new(); return Bytes::new();
} }
@ -251,7 +249,8 @@ impl MemKvStore {
} }
#[tracing::instrument(level = "debug", skip(self))] #[tracing::instrument(level = "debug", skip(self))]
fn export_with_encoded_block(&self) -> Bytes { fn export_with_encoded_block(&mut self) -> Bytes {
ensure_cov::notify_cov("kv-store::mem_store::export_with_encoded_block");
let mut mem_iter = self.mem_table.iter().peekable(); let mut mem_iter = self.mem_table.iter().peekable();
let mut sstable_iter = self.ss_table[0].iter(); let mut sstable_iter = self.ss_table[0].iter();
let mut builder = SsTableBuilder::new( let mut builder = SsTableBuilder::new(
@ -267,7 +266,6 @@ impl MemKvStore {
continue 'outer; continue 'outer;
}; };
if block.last_key() < next_mem_pair.0 { if block.last_key() < next_mem_pair.0 {
builder.finish_current_block();
builder.add_new_block(block.clone()); builder.add_new_block(block.clone());
sstable_iter.next_block(); sstable_iter.next_block();
continue; continue;
@ -281,7 +279,6 @@ impl MemKvStore {
continue; continue;
} }
trace!("parse block one by one");
// There are overlap between next_mem_pair and block // There are overlap between next_mem_pair and block
let mut iter = BlockIter::new(block.clone()); let mut iter = BlockIter::new(block.clone());
let mut next_mem_pair = mem_iter.peek(); let mut next_mem_pair = mem_iter.peek();
@ -322,7 +319,6 @@ impl MemKvStore {
sstable_iter.next_block(); sstable_iter.next_block();
} }
builder.finish_current_block();
while let Some(block) = sstable_iter.peek_next_block() { while let Some(block) = sstable_iter.peek_next_block() {
builder.add_new_block(block.clone()); builder.add_new_block(block.clone());
sstable_iter.next_block(); sstable_iter.next_block();
@ -332,7 +328,12 @@ impl MemKvStore {
return Bytes::new(); return Bytes::new();
} }
builder.build().export_all() drop(mem_iter);
self.mem_table.clear();
let ss = builder.build();
let ans = ss.export_all();
let _ = std::mem::replace(&mut self.ss_table, vec![ss]);
ans
} }
#[allow(unused)] #[allow(unused)]

View file

@ -20,6 +20,7 @@ pub const SIZE_OF_U32: usize = std::mem::size_of::<u32>();
const DEFAULT_CACHE_SIZE: usize = 1 << 20; const DEFAULT_CACHE_SIZE: usize = 1 << 20;
const MAX_BLOCK_NUM: u32 = 10_000_000; const MAX_BLOCK_NUM: u32 = 10_000_000;
/// ```log
/// ┌──────────────────────────────────────────────────────────────────────────────────────────┐ /// ┌──────────────────────────────────────────────────────────────────────────────────────────┐
/// │ Block Meta │ /// │ Block Meta │
/// │┌ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ┐ │ /// │┌ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ─┌ ─ ─ ─ ─ ─ ─ ─ ┬ ─ ─ ─ ─ ─ ─ ┐ │
@ -27,6 +28,7 @@ const MAX_BLOCK_NUM: u32 = 10_000_000;
/// ││ u32 │ u16 │ bytes │ u8 │ u16(option) │bytes(option)│ │ /// ││ u32 │ u16 │ bytes │ u8 │ u16(option) │bytes(option)│ │
/// │ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ /// │ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
/// └──────────────────────────────────────────────────────────────────────────────────────────┘ /// └──────────────────────────────────────────────────────────────────────────────────────────┘
/// ```
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub(crate) struct BlockMeta { pub(crate) struct BlockMeta {
offset: usize, offset: usize,
@ -188,7 +190,7 @@ impl SsTableBuilder {
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.meta.is_empty() self.meta.is_empty() && self.block_builder.is_empty()
} }
pub(crate) fn finish_current_block(&mut self) { pub(crate) fn finish_current_block(&mut self) {

View file

@ -789,6 +789,8 @@ impl DocState {
/// Ensure all alive containers are created in DocState and will be encoded in the next `encode` call /// Ensure all alive containers are created in DocState and will be encoded in the next `encode` call
pub(crate) fn ensure_all_alive_containers(&mut self) -> FxHashSet<ContainerID> { pub(crate) fn ensure_all_alive_containers(&mut self) -> FxHashSet<ContainerID> {
// TODO: PERF This can be optimized because we shouldn't need to call get_value for
// all the containers every time we export
let ans = self.get_all_alive_containers(); let ans = self.get_all_alive_containers();
for id in ans.iter() { for id in ans.iter() {
self.ensure_container(id); self.ensure_container(id);

View file

@ -141,11 +141,12 @@ impl InnerStore {
for (k, v) in iter { for (k, v) in iter {
count += 1; count += 1;
let cid = ContainerID::from_bytes(&k); let cid = ContainerID::from_bytes(&k);
let parent = ContainerWrapper::decode_parent(&v); let c = ContainerWrapper::new_from_bytes(v);
let parent = c.parent();
let idx = self.arena.register_container(&cid); let idx = self.arena.register_container(&cid);
let p = parent.as_ref().map(|p| self.arena.register_container(p)); let p = parent.as_ref().map(|p| self.arena.register_container(p));
self.arena.set_parent(idx, p); self.arena.set_parent(idx, p);
if self.store.remove(&idx).is_some() { if self.store.insert(idx, c).is_some() {
count -= 1; count -= 1;
} }
} }
@ -153,7 +154,7 @@ impl InnerStore {
self.len = count; self.len = count;
}); });
self.all_loaded = false; self.all_loaded = true;
Ok(fr) Ok(fr)
} }
@ -174,11 +175,12 @@ impl InnerStore {
for (k, v) in iter { for (k, v) in iter {
count += 1; count += 1;
let cid = ContainerID::from_bytes(&k); let cid = ContainerID::from_bytes(&k);
let parent = ContainerWrapper::decode_parent(&v); let c = ContainerWrapper::new_from_bytes(v);
let parent = c.parent();
let idx = self.arena.register_container(&cid); let idx = self.arena.register_container(&cid);
let p = parent.as_ref().map(|p| self.arena.register_container(p)); let p = parent.as_ref().map(|p| self.arena.register_container(p));
self.arena.set_parent(idx, p); self.arena.set_parent(idx, p);
if self.store.remove(&idx).is_some() { if self.store.insert(idx, c).is_some() {
count -= 1; count -= 1;
} }
} }
@ -186,7 +188,7 @@ impl InnerStore {
self.len = count; self.len = count;
}); });
self.all_loaded = false; self.all_loaded = true;
Ok(()) Ok(())
} }