mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-15 08:53:16 +00:00
index: compact parent encoding, inline up to two parents
This saves 4 more bytes per entry, and more importantly, most commit parents can be resolved with no indirection to the overflow table. IIRC, Git always inlines the first parent, but that wouldn't be useful in jj since jj diffs merge commit against the auto-merge parent. The first merge parent is nothing special. I'll use a similar encoding in change id sstable, where only one position will be inlined (to optimize for imported commits.) Benchmark number measuring the cost of change id index building: ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux \ --ignore-working-copy debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ --ignore-working-copy log -r@ --config-toml='revsets.short-prefixes=\"\"'" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r@ --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 342.9 ms ± 14.5 ms [User: 202.4 ms, System: 140.6 ms] Range (min … max): 326.6 ms … 360.6 ms 20 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r@ --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 325.0 ms ± 13.6 ms [User: 196.2 ms, System: 128.8 ms] Range (min … max): 311.6 ms … 343.2 ms 20 runs Relative speed comparison 1.06 ± 0.06 target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r@ --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r@ --config-toml='revsets.short-prefixes=""' ```
This commit is contained in:
parent
89928ffdd8
commit
8b1dfa7157
2 changed files with 82 additions and 45 deletions
|
@ -32,7 +32,7 @@ use tempfile::NamedTempFile;
|
|||
use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
|
||||
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
|
||||
use super::readonly::{
|
||||
DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION,
|
||||
DefaultReadonlyIndex, ReadonlyIndexSegment, INDEX_SEGMENT_FILE_FORMAT_VERSION, OVERFLOW_FLAG,
|
||||
};
|
||||
use crate::backend::{ChangeId, CommitId};
|
||||
use crate::commit::Commit;
|
||||
|
@ -198,22 +198,32 @@ impl MutableIndexSegment {
|
|||
for entry in &self.graph {
|
||||
buf.extend(entry.generation_number.to_le_bytes());
|
||||
|
||||
buf.extend(
|
||||
u32::try_from(entry.parent_positions.len())
|
||||
.unwrap()
|
||||
.to_le_bytes(),
|
||||
);
|
||||
let mut parent1_pos = IndexPosition(0);
|
||||
let parent_overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
|
||||
for (i, parent_pos) in entry.parent_positions.iter().enumerate() {
|
||||
if i == 0 {
|
||||
parent1_pos = *parent_pos;
|
||||
} else {
|
||||
parent_overflow.push(*parent_pos);
|
||||
match entry.parent_positions.as_slice() {
|
||||
[] => {
|
||||
buf.extend((!0_u32).to_le_bytes());
|
||||
buf.extend((!0_u32).to_le_bytes());
|
||||
}
|
||||
[pos1] => {
|
||||
assert!(pos1.0 < OVERFLOW_FLAG);
|
||||
buf.extend(pos1.0.to_le_bytes());
|
||||
buf.extend((!0_u32).to_le_bytes());
|
||||
}
|
||||
[pos1, pos2] => {
|
||||
assert!(pos1.0 < OVERFLOW_FLAG);
|
||||
assert!(pos2.0 < OVERFLOW_FLAG);
|
||||
buf.extend(pos1.0.to_le_bytes());
|
||||
buf.extend(pos2.0.to_le_bytes());
|
||||
}
|
||||
positions => {
|
||||
let overflow_pos = u32::try_from(parent_overflow.len()).unwrap();
|
||||
let num_parents = u32::try_from(positions.len()).unwrap();
|
||||
assert!(overflow_pos < OVERFLOW_FLAG);
|
||||
assert!(num_parents < OVERFLOW_FLAG);
|
||||
buf.extend((!overflow_pos).to_le_bytes());
|
||||
buf.extend((!num_parents).to_le_bytes());
|
||||
parent_overflow.extend_from_slice(positions);
|
||||
}
|
||||
}
|
||||
buf.extend(parent1_pos.0.to_le_bytes());
|
||||
buf.extend(parent_overflow_pos.to_le_bytes());
|
||||
|
||||
assert_eq!(entry.change_id.as_bytes().len(), self.change_id_length);
|
||||
buf.extend_from_slice(entry.change_id.as_bytes());
|
||||
|
|
|
@ -23,7 +23,7 @@ use std::io::Read;
|
|||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use smallvec::SmallVec;
|
||||
use smallvec::smallvec;
|
||||
use thiserror::Error;
|
||||
|
||||
use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
|
||||
|
@ -73,7 +73,24 @@ impl ReadonlyIndexLoadError {
|
|||
}
|
||||
|
||||
/// Current format version of the index segment file.
|
||||
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 3;
|
||||
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 4;
|
||||
|
||||
/// If set, the value is stored in the overflow table.
|
||||
pub(crate) const OVERFLOW_FLAG: u32 = 0x8000_0000;
|
||||
|
||||
/// Global index position of parent entry, or overflow pointer.
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||
struct ParentIndexPosition(u32);
|
||||
|
||||
impl ParentIndexPosition {
|
||||
fn as_inlined(self) -> Option<IndexPosition> {
|
||||
(self.0 & OVERFLOW_FLAG == 0).then_some(IndexPosition(self.0))
|
||||
}
|
||||
|
||||
fn as_overflow(self) -> Option<u32> {
|
||||
(self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
|
||||
}
|
||||
}
|
||||
|
||||
struct CommitGraphEntry<'a> {
|
||||
data: &'a [u8],
|
||||
|
@ -85,23 +102,19 @@ struct CommitGraphEntry<'a> {
|
|||
// lowest set bit to determine which generation number the pointers point to.
|
||||
impl CommitGraphEntry<'_> {
|
||||
fn size(commit_id_length: usize, change_id_length: usize) -> usize {
|
||||
16 + commit_id_length + change_id_length
|
||||
12 + commit_id_length + change_id_length
|
||||
}
|
||||
|
||||
fn generation_number(&self) -> u32 {
|
||||
u32::from_le_bytes(self.data[0..4].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn num_parents(&self) -> u32 {
|
||||
u32::from_le_bytes(self.data[4..8].try_into().unwrap())
|
||||
fn parent1_pos_or_overflow_pos(&self) -> ParentIndexPosition {
|
||||
ParentIndexPosition(u32::from_le_bytes(self.data[4..8].try_into().unwrap()))
|
||||
}
|
||||
|
||||
fn parent1_pos(&self) -> IndexPosition {
|
||||
IndexPosition(u32::from_le_bytes(self.data[8..12].try_into().unwrap()))
|
||||
}
|
||||
|
||||
fn parent2_overflow_pos(&self) -> u32 {
|
||||
u32::from_le_bytes(self.data[12..16].try_into().unwrap())
|
||||
fn parent2_pos_or_overflow_len(&self) -> ParentIndexPosition {
|
||||
ParentIndexPosition(u32::from_le_bytes(self.data[8..12].try_into().unwrap()))
|
||||
}
|
||||
|
||||
// TODO: Consider storing the change ids in a separate table. That table could
|
||||
|
@ -111,11 +124,11 @@ impl CommitGraphEntry<'_> {
|
|||
// to better cache locality when walking it; ability to quickly find all
|
||||
// commits associated with a change id.
|
||||
fn change_id(&self) -> ChangeId {
|
||||
ChangeId::new(self.data[16..][..self.change_id_length].to_vec())
|
||||
ChangeId::new(self.data[12..][..self.change_id_length].to_vec())
|
||||
}
|
||||
|
||||
fn commit_id(&self) -> CommitId {
|
||||
CommitId::from_bytes(&self.data[16 + self.change_id_length..][..self.commit_id_length])
|
||||
CommitId::from_bytes(&self.data[12 + self.change_id_length..][..self.commit_id_length])
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,9 +169,14 @@ impl CommitLookupEntry<'_> {
|
|||
/// u32: number of overflow parent entries
|
||||
/// for each entry, in some topological order with parents first:
|
||||
/// u32: generation number
|
||||
/// u32: number of parents
|
||||
/// u32: global index position for parent 1
|
||||
/// u32: position in the overflow table of parent 2
|
||||
/// if number of parents <= 2:
|
||||
/// u32: (< 0x8000_0000) global index position for parent 1
|
||||
/// (==0xffff_ffff) no parent 1
|
||||
/// u32: (< 0x8000_0000) global index position for parent 2
|
||||
/// (==0xffff_ffff) no parent 2
|
||||
/// else:
|
||||
/// u32: (>=0x8000_0000) position in the overflow table, bit-negated
|
||||
/// u32: (>=0x8000_0000) number of parents (in the overflow table), bit-negated
|
||||
/// <change id length number of bytes>: change id
|
||||
/// <commit id length number of bytes>: commit id
|
||||
/// for each entry, sorted by commit id:
|
||||
|
@ -339,12 +357,14 @@ impl ReadonlyIndexSegment {
|
|||
}
|
||||
}
|
||||
|
||||
fn overflow_parent(&self, overflow_pos: u32) -> IndexPosition {
|
||||
fn overflow_parents(&self, overflow_pos: u32, num_parents: u32) -> SmallIndexPositionsVec {
|
||||
let offset = (overflow_pos as usize) * 4
|
||||
+ (self.num_local_commits as usize) * self.commit_graph_entry_size
|
||||
+ (self.num_local_commits as usize) * self.commit_lookup_entry_size;
|
||||
let pos = u32::from_le_bytes(self.data[offset..][..4].try_into().unwrap());
|
||||
IndexPosition(pos)
|
||||
self.data[offset..][..(num_parents as usize) * 4]
|
||||
.chunks_exact(4)
|
||||
.map(|chunk| IndexPosition(u32::from_le_bytes(chunk.try_into().unwrap())))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &CommitId) -> Option<u32> {
|
||||
|
@ -448,23 +468,30 @@ impl IndexSegment for ReadonlyIndexSegment {
|
|||
}
|
||||
|
||||
fn num_parents(&self, local_pos: LocalPosition) -> u32 {
|
||||
self.graph_entry(local_pos).num_parents()
|
||||
let graph_entry = self.graph_entry(local_pos);
|
||||
let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
|
||||
let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
|
||||
let inlined_len1 = pos1_or_overflow_pos.as_inlined().is_some() as u32;
|
||||
let inlined_len2 = pos2_or_overflow_len.as_inlined().is_some() as u32;
|
||||
let overflow_len = pos2_or_overflow_len.as_overflow().unwrap_or(0);
|
||||
inlined_len1 + inlined_len2 + overflow_len
|
||||
}
|
||||
|
||||
fn parent_positions(&self, local_pos: LocalPosition) -> SmallIndexPositionsVec {
|
||||
let graph_entry = self.graph_entry(local_pos);
|
||||
let mut parent_entries = SmallVec::with_capacity(graph_entry.num_parents() as usize);
|
||||
if graph_entry.num_parents() >= 1 {
|
||||
parent_entries.push(graph_entry.parent1_pos());
|
||||
}
|
||||
if graph_entry.num_parents() >= 2 {
|
||||
let mut parent_overflow_pos = graph_entry.parent2_overflow_pos();
|
||||
for _ in 1..graph_entry.num_parents() {
|
||||
parent_entries.push(self.overflow_parent(parent_overflow_pos));
|
||||
parent_overflow_pos += 1;
|
||||
let pos1_or_overflow_pos = graph_entry.parent1_pos_or_overflow_pos();
|
||||
let pos2_or_overflow_len = graph_entry.parent2_pos_or_overflow_len();
|
||||
if let Some(pos1) = pos1_or_overflow_pos.as_inlined() {
|
||||
if let Some(pos2) = pos2_or_overflow_len.as_inlined() {
|
||||
smallvec![pos1, pos2]
|
||||
} else {
|
||||
smallvec![pos1]
|
||||
}
|
||||
} else {
|
||||
let overflow_pos = pos1_or_overflow_pos.as_overflow().unwrap();
|
||||
let num_parents = pos2_or_overflow_len.as_overflow().unwrap();
|
||||
self.overflow_parents(overflow_pos, num_parents)
|
||||
}
|
||||
parent_entries
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue