diff --git a/crates/compact-bytes/Cargo.toml b/crates/compact-bytes/Cargo.toml index 4c7de5f4..c4e99bbb 100644 --- a/crates/compact-bytes/Cargo.toml +++ b/crates/compact-bytes/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +ahash = "0.8.3" append-only-bytes = "0.1.8" fxhash = "0.2.1" linked-hash-map = "0.5.6" diff --git a/crates/compact-bytes/fuzz/Cargo.lock b/crates/compact-bytes/fuzz/Cargo.lock index d89a23df..4049057b 100644 --- a/crates/compact-bytes/fuzz/Cargo.lock +++ b/crates/compact-bytes/fuzz/Cargo.lock @@ -2,6 +2,18 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "append-only-bytes" version = "0.1.8" @@ -32,10 +44,17 @@ dependencies = [ "jobserver", ] +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "compact-bytes" version = "0.1.0" dependencies = [ + "ahash", "append-only-bytes", "fxhash", "linked-hash-map", @@ -70,6 +89,17 @@ dependencies = [ "byteorder", ] +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "jobserver" version = "0.1.26" @@ -142,3 +172,15 @@ name = "unicode-ident" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/crates/compact-bytes/src/lib.rs b/crates/compact-bytes/src/lib.rs index 3caa1dad..5f9b5adc 100644 --- a/crates/compact-bytes/src/lib.rs +++ b/crates/compact-bytes/src/lib.rs @@ -2,7 +2,11 @@ use append_only_bytes::{AppendOnlyBytes, BytesSlice}; use fxhash::FxHasher32; -use std::{hash::Hasher, num::NonZeroU32, ops::Range}; +use std::{ + hash::Hasher, + num::{NonZeroU16, NonZeroU32}, + ops::Range, +}; /// it must be a power of 2 const DEFAULT_CAPACITY: usize = 1 << 16; @@ -10,34 +14,28 @@ const MAX_TRIED: usize = 4; /// # Memory Usage /// -/// The memory usage is capacity * 12 bytes. +/// The memory usage is capacity * 8 bytes. /// The default capacity is 65536 (2^16), so the default memory usage is 0.75MB /// /// You can set the capacity by calling `with_capacity`. The capacity must be a power of 2. pub struct CompactBytes { bytes: AppendOnlyBytes, - map: Box<[Option]>, - pos_and_next: Box<[PosLinkList]>, + map: Box<[Option]>, + pos: Box<[Option]>, + next: Box<[Option]>, /// next write index fr pos_and_next index: usize, capacity: usize, mask: usize, } -#[derive(Debug, Default, Clone, Copy)] -struct PosLinkList { - /// position in the doc + 1 - value: Option, - /// next pos in the list - next: Option, -} - impl CompactBytes { pub fn new() -> Self { CompactBytes { bytes: AppendOnlyBytes::new(), map: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), - pos_and_next: vec![Default::default(); DEFAULT_CAPACITY].into_boxed_slice(), + pos: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), + next: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), index: 1, capacity: DEFAULT_CAPACITY, mask: DEFAULT_CAPACITY - 1, @@ -50,7 +48,8 @@ impl CompactBytes { CompactBytes { bytes: AppendOnlyBytes::with_capacity(cap), map: vec![None; cap].into_boxed_slice(), - pos_and_next: vec![Default::default(); cap].into_boxed_slice(), + pos: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(), + next: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(), index: 1, capacity: cap, mask: cap - 1, @@ -132,11 +131,9 @@ impl CompactBytes { let key = hash(self.bytes.as_bytes(), i, self.mask); // Override the min position in entry with the current position let old = self.map[key]; - self.pos_and_next[self.index] = PosLinkList { - value: Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) }), - next: old, - }; - self.map[key] = Some(NonZeroU32::new(self.index as u32).unwrap()); + self.pos[self.index] = Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) }); + self.next[self.index] = old; + self.map[key] = Some(NonZeroU16::new(self.index as u16).unwrap()); self.index = (self.index + 1) & self.mask; if self.index == 0 { self.index = 1; @@ -156,15 +153,16 @@ impl CompactBytes { let key = hash(bytes, 0, self.mask); match self.map[key] { Some(pointer) => { - let mut node = self.pos_and_next[pointer.get() as usize]; + let mut w_pos = self.pos[pointer.get() as usize]; + let mut next = self.next[pointer.get() as usize]; let mut max_len = 0; let mut ans_pos = 0; let mut tried = 0; - while let Some(pos) = node.value { + while let Some(pos) = w_pos { let pos = pos.get() as usize - 1; - node = node - .next - .map(|x| self.pos_and_next[x.get() as usize]) + w_pos = next.map(|x| self.pos[x.get() as usize]).unwrap_or_default(); + next = next + .map(|x| self.next[x.get() as usize]) .unwrap_or_default(); let mut len = 0;