mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-18 18:27:38 +00:00
index: when writing incremental index, squash into parent file if smaller
We currently write a new incremental index file every time. That means that the stack of index files quickly gets deep, which makes it slow to read the index. This commit makes it so that we squash the new index segment into its parent if the parent has fewer commits. That means we'll limit the number of files to O(log n). Writes time will also be O(log n) on average.
This commit is contained in:
parent
a51543b752
commit
62ce5782b5
2 changed files with 119 additions and 8 deletions
|
@ -466,6 +466,46 @@ impl MutableIndex {
|
|||
buf
|
||||
}
|
||||
|
||||
/// If the MutableIndex has more commits than its parent ReadonlyIndex,
|
||||
/// return MutableIndex with the commits from both. This is done
|
||||
/// recursively, so the stack of index files has O(log n) files.
|
||||
fn maybe_squash_with_ancestors(self) -> MutableIndex {
|
||||
if self.parent_file.is_none() {
|
||||
return self;
|
||||
}
|
||||
|
||||
let mut num_new_commits = self.segment_num_commits();
|
||||
let mut parent_file = self.parent_file.as_ref().unwrap().clone();
|
||||
let mut squashed;
|
||||
loop {
|
||||
// TODO: We should probably also squash if the parent file has less than N
|
||||
// commits, regardless of how many (few) are in `self`.
|
||||
if num_new_commits < parent_file.segment_num_commits() {
|
||||
squashed = MutableIndex::incremental(parent_file);
|
||||
break;
|
||||
}
|
||||
if parent_file.parent_file.is_none() {
|
||||
squashed = MutableIndex::full(self.dir.clone(), self.hash_length);
|
||||
break;
|
||||
}
|
||||
num_new_commits += parent_file.segment_num_commits();
|
||||
parent_file = parent_file.parent_file.as_ref().unwrap().clone();
|
||||
}
|
||||
|
||||
// TODO: This can be made more efficient by walking the parent files in order
|
||||
// and not looking up via `self`.
|
||||
for pos in squashed.num_parent_commits..self.num_commits() {
|
||||
let entry = self.entry_by_pos(pos);
|
||||
let parent_ids: Vec<_> = entry
|
||||
.parents_positions()
|
||||
.iter()
|
||||
.map(|pos| self.entry_by_pos(*pos).commit_id())
|
||||
.collect();
|
||||
squashed.add_commit_data(entry.commit_id(), parent_ids);
|
||||
}
|
||||
squashed
|
||||
}
|
||||
|
||||
pub fn save(self) -> io::Result<Arc<ReadonlyIndex>> {
|
||||
if self.segment_num_commits() == 0 && self.parent_file.is_some() {
|
||||
return Ok(self.parent_file.unwrap());
|
||||
|
@ -473,8 +513,8 @@ impl MutableIndex {
|
|||
|
||||
let hash_length = self.hash_length;
|
||||
let dir = self.dir.clone();
|
||||
let buf = self.serialize();
|
||||
|
||||
let buf = self.maybe_squash_with_ancestors().serialize();
|
||||
let mut hasher = Blake2b::new();
|
||||
hasher.update(&buf);
|
||||
let index_file_id_hex = hex::encode(&hasher.finalize());
|
||||
|
@ -509,6 +549,10 @@ impl MutableIndex {
|
|||
CompositeIndex(self).entry_by_id(commit_id)
|
||||
}
|
||||
|
||||
pub fn entry_by_pos(&self, pos: u32) -> IndexEntry {
|
||||
CompositeIndex(self).entry_by_pos(pos)
|
||||
}
|
||||
|
||||
pub fn has_id(&self, commit_id: &CommitId) -> bool {
|
||||
CompositeIndex(self).has_id(commit_id)
|
||||
}
|
||||
|
@ -1235,6 +1279,10 @@ impl ReadonlyIndex {
|
|||
CompositeIndex(self).entry_by_id(commit_id)
|
||||
}
|
||||
|
||||
pub fn entry_by_pos(&self, pos: u32) -> IndexEntry {
|
||||
CompositeIndex(self).entry_by_pos(pos)
|
||||
}
|
||||
|
||||
pub fn has_id(&self, commit_id: &CommitId) -> bool {
|
||||
CompositeIndex(self).has_id(commit_id)
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ use jujube_lib::repo::ReadonlyRepo;
|
|||
use jujube_lib::settings::UserSettings;
|
||||
use jujube_lib::store::CommitId;
|
||||
use jujube_lib::testutils;
|
||||
use jujube_lib::testutils::create_random_commit;
|
||||
use std::sync::Arc;
|
||||
use test_case::test_case;
|
||||
|
||||
|
@ -337,13 +338,8 @@ fn test_index_commits_incremental(use_git: bool) {
|
|||
assert_eq!(stats.num_commits, 2 + 3);
|
||||
assert_eq!(stats.num_merges, 0);
|
||||
assert_eq!(stats.max_generation_number, 3);
|
||||
assert_eq!(stats.levels.len(), 3);
|
||||
assert_eq!(stats.levels[0].num_commits, 2);
|
||||
assert_eq!(stats.levels[1].num_commits, 1);
|
||||
assert_ne!(stats.levels[1].name, stats.levels[0].name);
|
||||
assert_eq!(stats.levels[2].num_commits, 2);
|
||||
assert_ne!(stats.levels[2].name, stats.levels[0].name);
|
||||
assert_ne!(stats.levels[2].name, stats.levels[1].name);
|
||||
assert_eq!(stats.levels.len(), 1);
|
||||
assert_eq!(stats.levels[0].num_commits, 5);
|
||||
|
||||
assert_eq!(generation_number(index.clone(), root_commit.id()), 0);
|
||||
assert_eq!(generation_number(index.clone(), commit_a.id()), 1);
|
||||
|
@ -389,6 +385,7 @@ fn test_index_commits_incremental_empty_transaction(use_git: bool) {
|
|||
assert_eq!(stats.levels.len(), 2);
|
||||
assert_eq!(stats.levels[0].num_commits, 2);
|
||||
assert_eq!(stats.levels[1].num_commits, 1);
|
||||
assert_ne!(stats.levels[1].name, stats.levels[0].name);
|
||||
|
||||
assert_eq!(generation_number(index.clone(), root_commit.id()), 0);
|
||||
assert_eq!(generation_number(index.clone(), commit_a.id()), 1);
|
||||
|
@ -419,3 +416,69 @@ fn test_index_commits_incremental_already_indexed(use_git: bool) {
|
|||
assert_eq!(tx.index().num_commits(), 2 + 1);
|
||||
tx.discard();
|
||||
}
|
||||
|
||||
fn create_n_commits(settings: &UserSettings, repo: &mut Arc<ReadonlyRepo>, num_commits: i32) {
|
||||
let mut tx = repo.start_transaction("test");
|
||||
for _ in 0..num_commits {
|
||||
create_random_commit(settings, repo).write_to_transaction(&mut tx);
|
||||
}
|
||||
tx.commit();
|
||||
Arc::get_mut(repo).unwrap().reload();
|
||||
}
|
||||
|
||||
fn commits_by_level(repo: &ReadonlyRepo) -> Vec<u32> {
|
||||
repo.index()
|
||||
.stats()
|
||||
.levels
|
||||
.iter()
|
||||
.map(|level| level.num_commits)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test_case(false ; "local store")]
|
||||
#[test_case(true ; "git store")]
|
||||
fn test_index_commits_incremental_squashed(use_git: bool) {
|
||||
let settings = testutils::user_settings();
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 1);
|
||||
assert_eq!(commits_by_level(&repo), vec![2, 1]);
|
||||
create_n_commits(&settings, &mut repo, 1);
|
||||
assert_eq!(commits_by_level(&repo), vec![4]);
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 2);
|
||||
assert_eq!(commits_by_level(&repo), vec![4]);
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 100);
|
||||
assert_eq!(commits_by_level(&repo), vec![102]);
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 2);
|
||||
create_n_commits(&settings, &mut repo, 4);
|
||||
create_n_commits(&settings, &mut repo, 8);
|
||||
create_n_commits(&settings, &mut repo, 16);
|
||||
create_n_commits(&settings, &mut repo, 32);
|
||||
assert_eq!(commits_by_level(&repo), vec![64]);
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 32);
|
||||
create_n_commits(&settings, &mut repo, 16);
|
||||
create_n_commits(&settings, &mut repo, 8);
|
||||
create_n_commits(&settings, &mut repo, 4);
|
||||
create_n_commits(&settings, &mut repo, 2);
|
||||
assert_eq!(commits_by_level(&repo), vec![34, 16, 8, 4, 2]);
|
||||
|
||||
let (_temp_dir, mut repo) = testutils::init_repo(&settings, use_git);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
create_n_commits(&settings, &mut repo, 10);
|
||||
assert_eq!(commits_by_level(&repo), vec![72, 20]);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue