diff --git a/Cargo.lock b/Cargo.lock index 4ba9938c..6de8cb28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -302,6 +302,7 @@ version = "0.1.0" dependencies = [ "bincode", "columnar_derive", + "flate2", "itertools", "lazy_static", "postcard", diff --git a/crates/loro-core/examples/encoding.rs b/crates/loro-core/examples/encoding.rs index 8279e4a8..ffee3c1f 100644 --- a/crates/loro-core/examples/encoding.rs +++ b/crates/loro-core/examples/encoding.rs @@ -1,10 +1,6 @@ -use std::{ - io::{Read, Write}, - time::Instant, -}; +use std::{io::Read, time::Instant}; -use flate2::write::GzEncoder; -use flate2::{read::GzDecoder, Compression}; +use flate2::read::GzDecoder; use loro_core::{configure::Configure, container::registry::ContainerWrapper, LoroCore}; use serde_json::Value; const RAW_DATA: &[u8; 901823] = include_bytes!("../benches/automerge-paper.json.gz"); @@ -64,8 +60,12 @@ fn main() { } println!("Longest continuous bytes length {}", max_count); - let mut e = GzEncoder::new(Vec::new(), Compression::new(6)); - e.write_all(&buf).unwrap(); - let result = e.finish().unwrap(); - println!("GZipped Size {}", result.len()); + use columnar::{compress, CompressConfig}; + let s = Instant::now(); + let result = compress(&buf, &CompressConfig::default()).unwrap(); + println!( + "GZipped Size {} time: {}ms", + result.len(), + s.elapsed().as_millis() + ); } diff --git a/crates/loro-core/fuzz/Cargo.lock b/crates/loro-core/fuzz/Cargo.lock index 0e3a0c6a..0b8194ee 100644 --- a/crates/loro-core/fuzz/Cargo.lock +++ b/crates/loro-core/fuzz/Cargo.lock @@ -8,6 +8,12 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "0.7.19" @@ -177,6 +183,7 @@ version = "0.1.0" dependencies = [ "bincode", "columnar_derive", + "flate2", "itertools", "lazy_static", "postcard", @@ -206,6 +213,15 @@ dependencies = [ "volatile-register", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "crdt-list" version = "0.3.0" @@ -303,6 +319,16 @@ dependencies = [ "syn", ] +[[package]] +name = "flate2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -504,6 +530,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "miniz_oxide" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +dependencies = [ + "adler", +] + [[package]] name = "nb" version = "0.1.3" diff --git a/crates/loro-core/fuzz/Cargo.toml b/crates/loro-core/fuzz/Cargo.toml index f35c9fc1..ec631035 100644 --- a/crates/loro-core/fuzz/Cargo.toml +++ b/crates/loro-core/fuzz/Cargo.toml @@ -53,3 +53,9 @@ name = "recursive" path = "fuzz_targets/recursive.rs" test = false doc = false + +[[bin]] +name = "encode" +path = "fuzz_targets/encode.rs" +test = false +doc = false diff --git a/crates/loro-core/fuzz/fuzz_targets/encode.rs b/crates/loro-core/fuzz/fuzz_targets/encode.rs new file mode 100644 index 00000000..c04747cf --- /dev/null +++ b/crates/loro-core/fuzz/fuzz_targets/encode.rs @@ -0,0 +1,8 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use loro_core::fuzz::{test_single_client_encode, Action}; + +fuzz_target!(|data: Vec| { + // fuzzed code goes here + test_single_client_encode(data) +}); diff --git a/crates/loro-core/src/container/list/list_container.rs b/crates/loro-core/src/container/list/list_container.rs index 81d01820..23c6b0bc 100644 --- a/crates/loro-core/src/container/list/list_container.rs +++ b/crates/loro-core/src/container/list/list_container.rs @@ -411,7 +411,7 @@ impl Container for ListContainer { values.into() } - fn to_export(&mut self, op: &mut RemoteOp, gc: bool) { + fn to_export(&mut self, op: &mut RemoteOp, _gc: bool) { for content in op.contents.iter_mut() { if let Some((slice, _pos)) = content .as_normal_mut() diff --git a/crates/loro-core/src/container/map/map_container.rs b/crates/loro-core/src/container/map/map_container.rs index 60f35b3e..f7d68c13 100644 --- a/crates/loro-core/src/container/map/map_container.rs +++ b/crates/loro-core/src/container/map/map_container.rs @@ -189,7 +189,7 @@ impl Container for MapContainer { todo!() } - fn to_export(&mut self, _op: &mut RemoteOp, gc: bool) {} + fn to_export(&mut self, _op: &mut RemoteOp, _gc: bool) {} fn to_import(&mut self, _op: &mut RemoteOp) {} } diff --git a/crates/loro-core/src/fuzz.rs b/crates/loro-core/src/fuzz.rs index 1001347e..3446ddf0 100644 --- a/crates/loro-core/src/fuzz.rs +++ b/crates/loro-core/src/fuzz.rs @@ -265,6 +265,40 @@ pub fn test_single_client(mut actions: Vec) { } } +pub fn test_single_client_encode(mut actions: Vec) { + let mut store = LoroCore::new(Default::default(), None); + let mut text_container = store.get_text("hello"); + let mut ground_truth = String::new(); + let mut applied = Vec::new(); + for action in actions + .iter_mut() + .filter(|x| x.as_del().is_some() || x.as_ins().is_some()) + { + ground_truth.preprocess(action); + applied.push(action.clone()); + // println!("{}", (&applied).table()); + ground_truth.apply_action(action); + match action { + Action::Ins { content, pos, .. } => { + text_container.insert(&store, *pos, &content.to_string()); + } + Action::Del { pos, len, .. } => { + if text_container.text_len() == 0 { + return; + } + + text_container.delete(&store, *pos, *len); + } + _ => {} + } + } + let encode_bytes = store.encode_snapshot(); + let store2 = + LoroCore::decode_snapshot(&encode_bytes, None, crate::configure::Configure::default()); + let encode_bytes2 = store2.encode_snapshot(); + assert_eq!(encode_bytes, encode_bytes2); +} + pub fn minify_error(site_num: u8, actions: Vec, f: F, normalize: N) where F: Fn(u8, &mut [T]), @@ -616,4 +650,13 @@ mod test { fn mini() { minify_error(8, vec![], test_multi_sites, normalize) } + + #[test] + fn case_encode() { + test_single_client_encode(vec![Ins { + content: 49087, + pos: 4631600097073807295, + site: 191, + }]) + } } diff --git a/crates/loro-core/src/fuzz/recursive.rs b/crates/loro-core/src/fuzz/recursive.rs index 2d1b7f23..3e2b262a 100644 --- a/crates/loro-core/src/fuzz/recursive.rs +++ b/crates/loro-core/src/fuzz/recursive.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, fmt::Debug, panic::UnwindSafe, time::Instant}; +use std::{collections::HashSet, fmt::Debug}; use arbitrary::Arbitrary; use enum_as_inner::EnumAsInner; diff --git a/crates/loro-core/src/log_store/encoding.rs b/crates/loro-core/src/log_store/encoding.rs index 2eeb11ab..9509c7b3 100644 --- a/crates/loro-core/src/log_store/encoding.rs +++ b/crates/loro-core/src/log_store/encoding.rs @@ -51,9 +51,11 @@ struct OpEncoding { #[columnar(strategy = "Rle", original_type = "u32")] container: ContainerIdx, /// key index or insert/delete pos - prop: usize, + #[columnar(strategy = "DeltaRle")] + prop: usize, // 18225 bytes // TODO: can be compressed gc: usize, + // #[columnar(compress(level = 0))] value: LoroValue, } @@ -85,7 +87,6 @@ struct Encoded { #[columnar(type = "vec")] deps: Vec, clients: Clients, - // TODO: can be compressed containers: Containers, keys: Vec, } @@ -206,13 +207,25 @@ fn decode_changes( } = encoded; if change_encodings.is_empty() { - return LogStore::new(cfg, None); + let store = LogStore::new(cfg, None); + if !containers.is_empty() { + let mut s = store.write().unwrap(); + for container in containers.iter() { + s.get_or_create_container(container); + } + drop(s); + } + return store; } let mut container_reg = ContainerRegistry::new(); let mut op_iter = ops.into_iter(); let mut changes = FxHashMap::default(); let mut deps_iter = deps.into_iter(); + for container in containers.iter() { + container_reg.get_or_create(container); + } + for change_encoding in change_encodings { let ChangeEncoding { client_idx, @@ -242,8 +255,6 @@ fn decode_changes( } = op; let container_id = containers[container as usize].clone(); - container_reg.get_or_create(&container_id); - let container_type = container_id.container_type(); let content = match container_type { ContainerType::Map => {