diff --git a/crates/compact-bytes/Cargo.toml b/crates/compact-bytes/Cargo.toml index c4e99bbb..4c7de5f4 100644 --- a/crates/compact-bytes/Cargo.toml +++ b/crates/compact-bytes/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -ahash = "0.8.3" append-only-bytes = "0.1.8" fxhash = "0.2.1" linked-hash-map = "0.5.6" diff --git a/crates/compact-bytes/fuzz/Cargo.lock b/crates/compact-bytes/fuzz/Cargo.lock index 4049057b..d89a23df 100644 --- a/crates/compact-bytes/fuzz/Cargo.lock +++ b/crates/compact-bytes/fuzz/Cargo.lock @@ -2,18 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", -] - [[package]] name = "append-only-bytes" version = "0.1.8" @@ -44,17 +32,10 @@ dependencies = [ "jobserver", ] -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - [[package]] name = "compact-bytes" version = "0.1.0" dependencies = [ - "ahash", "append-only-bytes", "fxhash", "linked-hash-map", @@ -89,17 +70,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - [[package]] name = "jobserver" version = "0.1.26" @@ -172,15 +142,3 @@ name = "unicode-ident" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/crates/compact-bytes/src/lib.rs b/crates/compact-bytes/src/lib.rs index 5f9b5adc..3caa1dad 100644 --- a/crates/compact-bytes/src/lib.rs +++ b/crates/compact-bytes/src/lib.rs @@ -2,11 +2,7 @@ use append_only_bytes::{AppendOnlyBytes, BytesSlice}; use fxhash::FxHasher32; -use std::{ - hash::Hasher, - num::{NonZeroU16, NonZeroU32}, - ops::Range, -}; +use std::{hash::Hasher, num::NonZeroU32, ops::Range}; /// it must be a power of 2 const DEFAULT_CAPACITY: usize = 1 << 16; @@ -14,28 +10,34 @@ const MAX_TRIED: usize = 4; /// # Memory Usage /// -/// The memory usage is capacity * 8 bytes. +/// The memory usage is capacity * 12 bytes. /// The default capacity is 65536 (2^16), so the default memory usage is 0.75MB /// /// You can set the capacity by calling `with_capacity`. The capacity must be a power of 2. pub struct CompactBytes { bytes: AppendOnlyBytes, - map: Box<[Option]>, - pos: Box<[Option]>, - next: Box<[Option]>, + map: Box<[Option]>, + pos_and_next: Box<[PosLinkList]>, /// next write index fr pos_and_next index: usize, capacity: usize, mask: usize, } +#[derive(Debug, Default, Clone, Copy)] +struct PosLinkList { + /// position in the doc + 1 + value: Option, + /// next pos in the list + next: Option, +} + impl CompactBytes { pub fn new() -> Self { CompactBytes { bytes: AppendOnlyBytes::new(), map: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), - pos: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), - next: vec![None; DEFAULT_CAPACITY].into_boxed_slice(), + pos_and_next: vec![Default::default(); DEFAULT_CAPACITY].into_boxed_slice(), index: 1, capacity: DEFAULT_CAPACITY, mask: DEFAULT_CAPACITY - 1, @@ -48,8 +50,7 @@ impl CompactBytes { CompactBytes { bytes: AppendOnlyBytes::with_capacity(cap), map: vec![None; cap].into_boxed_slice(), - pos: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(), - next: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(), + pos_and_next: vec![Default::default(); cap].into_boxed_slice(), index: 1, capacity: cap, mask: cap - 1, @@ -131,9 +132,11 @@ impl CompactBytes { let key = hash(self.bytes.as_bytes(), i, self.mask); // Override the min position in entry with the current position let old = self.map[key]; - self.pos[self.index] = Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) }); - self.next[self.index] = old; - self.map[key] = Some(NonZeroU16::new(self.index as u16).unwrap()); + self.pos_and_next[self.index] = PosLinkList { + value: Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) }), + next: old, + }; + self.map[key] = Some(NonZeroU32::new(self.index as u32).unwrap()); self.index = (self.index + 1) & self.mask; if self.index == 0 { self.index = 1; @@ -153,16 +156,15 @@ impl CompactBytes { let key = hash(bytes, 0, self.mask); match self.map[key] { Some(pointer) => { - let mut w_pos = self.pos[pointer.get() as usize]; - let mut next = self.next[pointer.get() as usize]; + let mut node = self.pos_and_next[pointer.get() as usize]; let mut max_len = 0; let mut ans_pos = 0; let mut tried = 0; - while let Some(pos) = w_pos { + while let Some(pos) = node.value { let pos = pos.get() as usize - 1; - w_pos = next.map(|x| self.pos[x.get() as usize]).unwrap_or_default(); - next = next - .map(|x| self.next[x.get() as usize]) + node = node + .next + .map(|x| self.pos_and_next[x.get() as usize]) .unwrap_or_default(); let mut len = 0;