index: use SmallVec to avoid lots of small allocations for adjacent lookup

I simply enabled all smallvec features that are covered by our MSRV.
https://docs.rs/smallvec/latest/smallvec/index.html#optional-features
This commit is contained in:
Yuya Nishihara 2023-04-22 15:07:21 +09:00
parent d948acd5bf
commit e9d2403696
3 changed files with 39 additions and 20 deletions

1
Cargo.lock generated
View file

@ -887,6 +887,7 @@ dependencies = [
"rand_chacha", "rand_chacha",
"regex", "regex",
"serde_json", "serde_json",
"smallvec",
"tempfile", "tempfile",
"test-case", "test-case",
"testutils", "testutils",

View file

@ -40,6 +40,7 @@ rand = "0.8.5"
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
regex = "1.7.3" regex = "1.7.3"
serde_json = "1.0.96" serde_json = "1.0.96"
smallvec = { version = "1.10.0", features = ["const_generics", "const_new", "union"] }
tempfile = "3.5.0" tempfile = "3.5.0"
thiserror = "1.0.40" thiserror = "1.0.40"
tracing = "0.1.38" tracing = "0.1.38"

View file

@ -28,6 +28,7 @@ use blake2::Blake2b512;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use digest::Digest; use digest::Digest;
use itertools::Itertools; use itertools::Itertools;
use smallvec::SmallVec;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use thiserror::Error; use thiserror::Error;
@ -305,6 +306,10 @@ impl IndexPosition {
pub const MAX: Self = IndexPosition(u32::MAX); pub const MAX: Self = IndexPosition(u32::MAX);
} }
// SmallVec reuses two pointer-size fields as inline area, which meas we can
// inline up to 16 bytes (on 64-bit platform) for free.
type SmallIndexPositionsVec = SmallVec<[IndexPosition; 4]>;
struct CommitGraphEntry<'a> { struct CommitGraphEntry<'a> {
data: &'a [u8], data: &'a [u8],
commit_id_length: usize, commit_id_length: usize,
@ -455,7 +460,7 @@ struct MutableGraphEntry {
commit_id: CommitId, commit_id: CommitId,
change_id: ChangeId, change_id: ChangeId,
generation_number: u32, generation_number: u32,
parent_positions: Vec<IndexPosition>, parent_positions: SmallIndexPositionsVec,
} }
pub struct MutableIndexImpl { pub struct MutableIndexImpl {
@ -510,7 +515,7 @@ impl MutableIndexImpl {
commit_id, commit_id,
change_id, change_id,
generation_number: 0, generation_number: 0,
parent_positions: vec![], parent_positions: SmallVec::new(),
}; };
for parent_id in parent_ids { for parent_id in parent_ids {
let parent_entry = CompositeIndex(self) let parent_entry = CompositeIndex(self)
@ -820,7 +825,7 @@ trait IndexSegment: Send + Sync {
fn segment_num_parents(&self, local_pos: u32) -> u32; fn segment_num_parents(&self, local_pos: u32) -> u32;
fn segment_parent_positions(&self, local_pos: u32) -> Vec<IndexPosition>; fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec;
fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry; fn segment_entry_by_pos(&self, pos: IndexPosition, local_pos: u32) -> IndexEntry;
} }
@ -1170,7 +1175,7 @@ trait RevWalkIndex<'a> {
type Entry: Clone + Ord + RevWalkIndexEntry<'a>; type Entry: Clone + Ord + RevWalkIndexEntry<'a>;
fn entry_by_pos(&self, pos: IndexPosition) -> Self::Entry; fn entry_by_pos(&self, pos: IndexPosition) -> Self::Entry;
fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Vec<IndexPosition>; fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> SmallIndexPositionsVec;
} }
trait RevWalkIndexEntry<'a> { trait RevWalkIndexEntry<'a> {
@ -1185,7 +1190,7 @@ impl<'a> RevWalkIndex<'a> for CompositeIndex<'a> {
IndexEntryByPosition(CompositeIndex::entry_by_pos(self, pos)) IndexEntryByPosition(CompositeIndex::entry_by_pos(self, pos))
} }
fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Vec<IndexPosition> { fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> SmallIndexPositionsVec {
entry.parent_positions() entry.parent_positions()
} }
} }
@ -1203,7 +1208,7 @@ impl<'a> RevWalkIndexEntry<'a> for IndexEntryByPosition<'a> {
#[derive(Clone)] #[derive(Clone)]
struct RevWalkDescendantsIndex<'a> { struct RevWalkDescendantsIndex<'a> {
index: CompositeIndex<'a>, index: CompositeIndex<'a>,
children_map: HashMap<IndexPosition, Vec<IndexPosition>>, children_map: HashMap<IndexPosition, SmallIndexPositionsVec>,
} }
impl<'a> RevWalkDescendantsIndex<'a> { impl<'a> RevWalkDescendantsIndex<'a> {
@ -1212,7 +1217,7 @@ impl<'a> RevWalkDescendantsIndex<'a> {
entries: impl IntoIterator<Item = IndexEntry<'b>>, entries: impl IntoIterator<Item = IndexEntry<'b>>,
) -> Self { ) -> Self {
// For dense set, it's probably cheaper to use `Vec` instead of `HashMap`. // For dense set, it's probably cheaper to use `Vec` instead of `HashMap`.
let mut children_map: HashMap<IndexPosition, Vec<IndexPosition>> = HashMap::new(); let mut children_map: HashMap<IndexPosition, SmallIndexPositionsVec> = HashMap::new();
for entry in entries { for entry in entries {
children_map.entry(entry.position()).or_default(); // mark head node children_map.entry(entry.position()).or_default(); // mark head node
for parent_pos in entry.parent_positions() { for parent_pos in entry.parent_positions() {
@ -1239,8 +1244,7 @@ impl<'a> RevWalkIndex<'a> for RevWalkDescendantsIndex<'a> {
Reverse(IndexEntryByPosition(self.index.entry_by_pos(pos))) Reverse(IndexEntryByPosition(self.index.entry_by_pos(pos)))
} }
// TODO: SmallVec, Cow, or GAT AdjacentIter to eliminate .clone() fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> SmallIndexPositionsVec {
fn adjacent_positions(&self, entry: &IndexEntry<'_>) -> Vec<IndexPosition> {
self.children_map[&entry.position()].clone() self.children_map[&entry.position()].clone()
} }
} }
@ -1689,9 +1693,9 @@ impl IndexSegment for ReadonlyIndexImpl {
self.graph_entry(local_pos).num_parents() self.graph_entry(local_pos).num_parents()
} }
fn segment_parent_positions(&self, local_pos: u32) -> Vec<IndexPosition> { fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec {
let graph_entry = self.graph_entry(local_pos); let graph_entry = self.graph_entry(local_pos);
let mut parent_entries = vec![]; let mut parent_entries = SmallVec::with_capacity(graph_entry.num_parents() as usize);
if graph_entry.num_parents() >= 1 { if graph_entry.num_parents() >= 1 {
parent_entries.push(graph_entry.parent1_pos()); parent_entries.push(graph_entry.parent1_pos());
} }
@ -1783,7 +1787,7 @@ impl IndexSegment for MutableIndexImpl {
self.graph[local_pos as usize].parent_positions.len() as u32 self.graph[local_pos as usize].parent_positions.len() as u32
} }
fn segment_parent_positions(&self, local_pos: u32) -> Vec<IndexPosition> { fn segment_parent_positions(&self, local_pos: u32) -> SmallIndexPositionsVec {
self.graph[local_pos as usize].parent_positions.clone() self.graph[local_pos as usize].parent_positions.clone()
} }
@ -1849,7 +1853,7 @@ impl<'a> IndexEntry<'a> {
self.source.segment_num_parents(self.local_pos) self.source.segment_num_parents(self.local_pos)
} }
pub fn parent_positions(&self) -> Vec<IndexPosition> { pub fn parent_positions(&self) -> SmallIndexPositionsVec {
self.source.segment_parent_positions(self.local_pos) self.source.segment_parent_positions(self.local_pos)
} }
@ -2039,6 +2043,7 @@ impl Index for ReadonlyIndexImpl {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use smallvec::smallvec_inline;
use test_case::test_case; use test_case::test_case;
use super::*; use super::*;
@ -2114,7 +2119,7 @@ mod tests {
assert_eq!(entry.change_id(), change_id0); assert_eq!(entry.change_id(), change_id0);
assert_eq!(entry.generation_number(), 0); assert_eq!(entry.generation_number(), 0);
assert_eq!(entry.num_parents(), 0); assert_eq!(entry.num_parents(), 0);
assert_eq!(entry.parent_positions(), Vec::<IndexPosition>::new()); assert_eq!(entry.parent_positions(), SmallIndexPositionsVec::new());
assert_eq!(entry.parents(), Vec::<IndexEntry>::new()); assert_eq!(entry.parents(), Vec::<IndexEntry>::new());
} }
@ -2203,7 +2208,10 @@ mod tests {
assert_eq!(entry_1.change_id(), change_id1); assert_eq!(entry_1.change_id(), change_id1);
assert_eq!(entry_1.generation_number(), 1); assert_eq!(entry_1.generation_number(), 1);
assert_eq!(entry_1.num_parents(), 1); assert_eq!(entry_1.num_parents(), 1);
assert_eq!(entry_1.parent_positions(), vec![IndexPosition(0)]); assert_eq!(
entry_1.parent_positions(),
smallvec_inline![IndexPosition(0)]
);
assert_eq!(entry_1.parents().len(), 1); assert_eq!(entry_1.parents().len(), 1);
assert_eq!(entry_1.parents()[0].pos, IndexPosition(0)); assert_eq!(entry_1.parents()[0].pos, IndexPosition(0));
assert_eq!(entry_2.pos, IndexPosition(2)); assert_eq!(entry_2.pos, IndexPosition(2));
@ -2211,19 +2219,28 @@ mod tests {
assert_eq!(entry_2.change_id(), change_id2); assert_eq!(entry_2.change_id(), change_id2);
assert_eq!(entry_2.generation_number(), 1); assert_eq!(entry_2.generation_number(), 1);
assert_eq!(entry_2.num_parents(), 1); assert_eq!(entry_2.num_parents(), 1);
assert_eq!(entry_2.parent_positions(), vec![IndexPosition(0)]); assert_eq!(
entry_2.parent_positions(),
smallvec_inline![IndexPosition(0)]
);
assert_eq!(entry_3.change_id(), change_id3); assert_eq!(entry_3.change_id(), change_id3);
assert_eq!(entry_3.generation_number(), 2); assert_eq!(entry_3.generation_number(), 2);
assert_eq!(entry_3.parent_positions(), vec![IndexPosition(2)]); assert_eq!(
entry_3.parent_positions(),
smallvec_inline![IndexPosition(2)]
);
assert_eq!(entry_4.pos, IndexPosition(4)); assert_eq!(entry_4.pos, IndexPosition(4));
assert_eq!(entry_4.generation_number(), 2); assert_eq!(entry_4.generation_number(), 2);
assert_eq!(entry_4.num_parents(), 1); assert_eq!(entry_4.num_parents(), 1);
assert_eq!(entry_4.parent_positions(), vec![IndexPosition(1)]); assert_eq!(
entry_4.parent_positions(),
smallvec_inline![IndexPosition(1)]
);
assert_eq!(entry_5.generation_number(), 3); assert_eq!(entry_5.generation_number(), 3);
assert_eq!(entry_5.num_parents(), 2); assert_eq!(entry_5.num_parents(), 2);
assert_eq!( assert_eq!(
entry_5.parent_positions(), entry_5.parent_positions(),
vec![IndexPosition(4), IndexPosition(2)] smallvec_inline![IndexPosition(4), IndexPosition(2)]
); );
assert_eq!(entry_5.parents().len(), 2); assert_eq!(entry_5.parents().len(), 2);
assert_eq!(entry_5.parents()[0].pos, IndexPosition(4)); assert_eq!(entry_5.parents()[0].pos, IndexPosition(4));
@ -2284,7 +2301,7 @@ mod tests {
assert_eq!(entry_6.num_parents(), 5); assert_eq!(entry_6.num_parents(), 5);
assert_eq!( assert_eq!(
entry_6.parent_positions(), entry_6.parent_positions(),
vec![ smallvec_inline![
IndexPosition(1), IndexPosition(1),
IndexPosition(2), IndexPosition(2),
IndexPosition(3), IndexPosition(3),