perf: make mem more compact

This commit is contained in:
Zixuan Chen 2023-07-14 02:16:01 +08:00
parent 92434ccdfc
commit 44646e6bf1
3 changed files with 65 additions and 24 deletions

View file

@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ahash = "0.8.3"
append-only-bytes = "0.1.8"
fxhash = "0.2.1"
linked-hash-map = "0.5.6"

View file

@ -2,6 +2,18 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
dependencies = [
"cfg-if",
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "append-only-bytes"
version = "0.1.8"
@ -32,10 +44,17 @@ dependencies = [
"jobserver",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "compact-bytes"
version = "0.1.0"
dependencies = [
"ahash",
"append-only-bytes",
"fxhash",
"linked-hash-map",
@ -70,6 +89,17 @@ dependencies = [
"byteorder",
]
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "jobserver"
version = "0.1.26"
@ -142,3 +172,15 @@ name = "unicode-ident"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

View file

@ -2,7 +2,11 @@
use append_only_bytes::{AppendOnlyBytes, BytesSlice};
use fxhash::FxHasher32;
use std::{hash::Hasher, num::NonZeroU32, ops::Range};
use std::{
hash::Hasher,
num::{NonZeroU16, NonZeroU32},
ops::Range,
};
/// it must be a power of 2
const DEFAULT_CAPACITY: usize = 1 << 16;
@ -10,34 +14,28 @@ const MAX_TRIED: usize = 4;
/// # Memory Usage
///
/// The memory usage is capacity * 12 bytes.
/// The memory usage is capacity * 8 bytes.
/// The default capacity is 65536 (2^16), so the default memory usage is 0.75MB
///
/// You can set the capacity by calling `with_capacity`. The capacity must be a power of 2.
pub struct CompactBytes {
bytes: AppendOnlyBytes,
map: Box<[Option<NonZeroU32>]>,
pos_and_next: Box<[PosLinkList]>,
map: Box<[Option<NonZeroU16>]>,
pos: Box<[Option<NonZeroU32>]>,
next: Box<[Option<NonZeroU16>]>,
/// next write index fr pos_and_next
index: usize,
capacity: usize,
mask: usize,
}
#[derive(Debug, Default, Clone, Copy)]
struct PosLinkList {
/// position in the doc + 1
value: Option<NonZeroU32>,
/// next pos in the list
next: Option<NonZeroU32>,
}
impl CompactBytes {
pub fn new() -> Self {
CompactBytes {
bytes: AppendOnlyBytes::new(),
map: vec![None; DEFAULT_CAPACITY].into_boxed_slice(),
pos_and_next: vec![Default::default(); DEFAULT_CAPACITY].into_boxed_slice(),
pos: vec![None; DEFAULT_CAPACITY].into_boxed_slice(),
next: vec![None; DEFAULT_CAPACITY].into_boxed_slice(),
index: 1,
capacity: DEFAULT_CAPACITY,
mask: DEFAULT_CAPACITY - 1,
@ -50,7 +48,8 @@ impl CompactBytes {
CompactBytes {
bytes: AppendOnlyBytes::with_capacity(cap),
map: vec![None; cap].into_boxed_slice(),
pos_and_next: vec![Default::default(); cap].into_boxed_slice(),
pos: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(),
next: vec![None; cap.min(DEFAULT_CAPACITY)].into_boxed_slice(),
index: 1,
capacity: cap,
mask: cap - 1,
@ -132,11 +131,9 @@ impl CompactBytes {
let key = hash(self.bytes.as_bytes(), i, self.mask);
// Override the min position in entry with the current position
let old = self.map[key];
self.pos_and_next[self.index] = PosLinkList {
value: Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) }),
next: old,
};
self.map[key] = Some(NonZeroU32::new(self.index as u32).unwrap());
self.pos[self.index] = Some(unsafe { NonZeroU32::new_unchecked(i as u32 + 1) });
self.next[self.index] = old;
self.map[key] = Some(NonZeroU16::new(self.index as u16).unwrap());
self.index = (self.index + 1) & self.mask;
if self.index == 0 {
self.index = 1;
@ -156,15 +153,16 @@ impl CompactBytes {
let key = hash(bytes, 0, self.mask);
match self.map[key] {
Some(pointer) => {
let mut node = self.pos_and_next[pointer.get() as usize];
let mut w_pos = self.pos[pointer.get() as usize];
let mut next = self.next[pointer.get() as usize];
let mut max_len = 0;
let mut ans_pos = 0;
let mut tried = 0;
while let Some(pos) = node.value {
while let Some(pos) = w_pos {
let pos = pos.get() as usize - 1;
node = node
.next
.map(|x| self.pos_and_next[x.get() as usize])
w_pos = next.map(|x| self.pos[x.get() as usize]).unwrap_or_default();
next = next
.map(|x| self.next[x.get() as usize])
.unwrap_or_default();
let mut len = 0;