mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-28 15:26:25 +00:00
index: don't store commit ids in sorted lookup table to save disk space
This reduces the index file size. In my linux mirror repo containing 1591524 commits, the initial index file shrank from 122MB to 92MB. In theory, this makes commit id lookup slow because of additional indirection and cache miss, but I don't see significant difference. In mid-size repo, this is actually a bit faster thanks to smaller index reads. Alternatively, the commit id field could be removed from the CommitGraphEntry, but doing that would introduce indirect lookup there, and the index disk size isn't as small as this change. - jj-0 baseline 122MB - jj-1 shrink CommitLookupEntry (this) 92MB - jj-3 shrink CommitGraphEntry 98MB Mid-size repo, "log" with default template ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2,jj-3 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=\"\"'" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 177.7 ms ± 12.9 ms [User: 96.3 ms, System: 81.5 ms] Range (min … max): 156.8 ms … 191.2 ms 20 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 169.8 ms ± 13.8 ms [User: 93.3 ms, System: 76.6 ms] Range (min … max): 151.1 ms … 191.5 ms 20 runs Benchmark 4: target/release-with-debug/jj-3 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 170.3 ms ± 13.4 ms [User: 90.1 ms, System: 79.7 ms] Range (min … max): 154.8 ms … 186.2 ms 20 runs Relative speed comparison 1.05 ± 0.11 target/release-with-debug/jj-0 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' 1.00 ± 0.11 target/release-with-debug/jj-3 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' ``` Small repo, "log" thousands of commits with -T"commit_id.shortest()" ``` % hyperfine --sort command --warmup 3 --runs 100 -L bin jj-0,jj-1,jj-2,jj-3 \ -s "target/release-with-debug/{bin} -R ~/mirrors/git debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=\"\"'" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 179.3 ms ± 12.8 ms [User: 149.7 ms, System: 29.6 ms] Range (min … max): 155.2 ms … 191.0 ms 100 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 179.1 ms ± 13.7 ms [User: 148.5 ms, System: 30.5 ms] Range (min … max): 157.2 ms … 196.7 ms 100 runs Benchmark 4: target/release-with-debug/jj-3 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 178.2 ms ± 13.6 ms [User: 148.7 ms, System: 29.6 ms] Range (min … max): 156.5 ms … 191.7 ms 100 runs Relative speed comparison 1.01 ± 0.11 target/release-with-debug/jj-0 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' 1.01 ± 0.11 target/release-with-debug/jj-1 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' 1.01 ± 0.11 target/release-with-debug/jj-3 -R ~/mirrors/git --ignore-working-copy log -r.. -l5000 -T'commit_id.shortest()' --config-toml='revsets.short-prefixes=""' ```
This commit is contained in:
parent
c75230747a
commit
0c0eb37f2e
2 changed files with 22 additions and 42 deletions
|
@ -254,8 +254,7 @@ impl MutableIndexSegment {
|
|||
buf.extend_from_slice(entry.commit_id.as_bytes());
|
||||
}
|
||||
|
||||
for (commit_id, LocalPosition(pos)) in &self.commit_lookup {
|
||||
buf.extend_from_slice(commit_id.as_bytes());
|
||||
for LocalPosition(pos) in self.commit_lookup.values() {
|
||||
buf.extend(pos.to_le_bytes());
|
||||
}
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ impl ReadonlyIndexLoadError {
|
|||
}
|
||||
|
||||
/// Current format version of the index segment file.
|
||||
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 5;
|
||||
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 6;
|
||||
|
||||
/// If set, the value is stored in the overflow table.
|
||||
pub(crate) const OVERFLOW_FLAG: u32 = 0x8000_0000;
|
||||
|
@ -133,33 +133,13 @@ impl CommitGraphEntry<'_> {
|
|||
u32::from_le_bytes(self.data[12..16].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn commit_id(&self) -> CommitId {
|
||||
CommitId::from_bytes(&self.data[16..])
|
||||
}
|
||||
}
|
||||
|
||||
struct CommitLookupEntry<'a> {
|
||||
data: &'a [u8],
|
||||
commit_id_length: usize,
|
||||
}
|
||||
|
||||
impl CommitLookupEntry<'_> {
|
||||
fn size(commit_id_length: usize) -> usize {
|
||||
commit_id_length + 4
|
||||
}
|
||||
|
||||
fn commit_id(&self) -> CommitId {
|
||||
CommitId::from_bytes(self.commit_id_bytes())
|
||||
}
|
||||
|
||||
// might be better to add borrowed version of CommitId
|
||||
fn commit_id_bytes(&self) -> &[u8] {
|
||||
&self.data[0..self.commit_id_length]
|
||||
}
|
||||
|
||||
fn local_pos(&self) -> LocalPosition {
|
||||
let pos = u32::from_le_bytes(self.data[self.commit_id_length..][..4].try_into().unwrap());
|
||||
LocalPosition(pos)
|
||||
&self.data[16..]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,7 +168,6 @@ impl CommitLookupEntry<'_> {
|
|||
/// u32: change id position in the sorted change ids table
|
||||
/// <commit id length number of bytes>: commit id
|
||||
/// for each entry, sorted by commit id:
|
||||
/// <commit id length number of bytes>: commit id
|
||||
/// u32: local position in the graph entries table
|
||||
/// for each entry, sorted by change id:
|
||||
/// <change id length number of bytes>: change id
|
||||
|
@ -324,8 +303,7 @@ impl ReadonlyIndexSegment {
|
|||
|
||||
let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length);
|
||||
let graph_size = (num_local_commits as usize) * commit_graph_entry_size;
|
||||
let commit_lookup_entry_size = CommitLookupEntry::size(commit_id_length);
|
||||
let commit_lookup_size = (num_local_commits as usize) * commit_lookup_entry_size;
|
||||
let commit_lookup_size = (num_local_commits as usize) * 4;
|
||||
let change_id_table_size = (num_local_change_ids as usize) * change_id_length;
|
||||
let change_pos_table_size = (num_local_change_ids as usize) * 4;
|
||||
let parent_overflow_size = (num_parent_overflow_entries as usize) * 4;
|
||||
|
@ -389,14 +367,10 @@ impl ReadonlyIndexSegment {
|
|||
}
|
||||
}
|
||||
|
||||
fn commit_lookup_entry(&self, lookup_pos: u32) -> CommitLookupEntry {
|
||||
fn commit_lookup_pos(&self, lookup_pos: u32) -> LocalPosition {
|
||||
let table = &self.data[self.commit_lookup_base..self.change_id_table_base];
|
||||
let entry_size = CommitLookupEntry::size(self.commit_id_length);
|
||||
let offset = (lookup_pos as usize) * entry_size;
|
||||
CommitLookupEntry {
|
||||
data: &table[offset..][..entry_size],
|
||||
commit_id_length: self.commit_id_length,
|
||||
}
|
||||
let offset = (lookup_pos as usize) * 4;
|
||||
LocalPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
|
||||
}
|
||||
|
||||
fn change_lookup_id(&self, lookup_pos: u32) -> ChangeId {
|
||||
|
@ -438,7 +412,8 @@ impl ReadonlyIndexSegment {
|
|||
/// Binary searches commit id by `prefix`. Returns the lookup position.
|
||||
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
|
||||
binary_search_pos_by(self.num_local_commits, |pos| {
|
||||
let entry = self.commit_lookup_entry(pos);
|
||||
let local_pos = self.commit_lookup_pos(pos);
|
||||
let entry = self.graph_entry(local_pos);
|
||||
entry.commit_id_bytes().cmp(prefix)
|
||||
})
|
||||
}
|
||||
|
@ -470,11 +445,9 @@ impl IndexSegment for ReadonlyIndexSegment {
|
|||
}
|
||||
|
||||
fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> {
|
||||
let lookup_pos = self
|
||||
.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
|
||||
.ok()?;
|
||||
let entry = self.commit_lookup_entry(lookup_pos);
|
||||
Some(entry.local_pos())
|
||||
self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
|
||||
.ok()
|
||||
.map(|pos| self.commit_lookup_pos(pos))
|
||||
}
|
||||
|
||||
fn resolve_neighbor_commit_ids(
|
||||
|
@ -482,12 +455,20 @@ impl IndexSegment for ReadonlyIndexSegment {
|
|||
commit_id: &CommitId,
|
||||
) -> (Option<CommitId>, Option<CommitId>) {
|
||||
self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
|
||||
.map_neighbors(|pos| self.commit_lookup_entry(pos).commit_id())
|
||||
.map_neighbors(|pos| {
|
||||
let local_pos = self.commit_lookup_pos(pos);
|
||||
let entry = self.graph_entry(local_pos);
|
||||
entry.commit_id()
|
||||
})
|
||||
}
|
||||
|
||||
fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
|
||||
self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
|
||||
.prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id())
|
||||
.prefix_matches(prefix, |pos| {
|
||||
let local_pos = self.commit_lookup_pos(pos);
|
||||
let entry = self.graph_entry(local_pos);
|
||||
entry.commit_id()
|
||||
})
|
||||
.map(|(id, _)| id)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue