From 4619942a570c407f14de60e59b7a11dc4409c6de Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Mon, 21 Dec 2020 13:27:16 -0800 Subject: [PATCH] evolution: add support for updating state incrementally We currently recalculate the entire evolution state whenever a new commit is added within a transaction. That's clearly wasteful. This commit makes the state-update incremental. --- lib/src/commit.rs | 4 + lib/src/evolution.rs | 476 ++++++++++++++++++++++++++++++++++++++++- lib/src/store.rs | 4 + lib/src/transaction.rs | 7 +- 4 files changed, 485 insertions(+), 6 deletions(-) diff --git a/lib/src/commit.rs b/lib/src/commit.rs index 7ef8223fc..d1d6c97ad 100644 --- a/lib/src/commit.rs +++ b/lib/src/commit.rs @@ -90,6 +90,10 @@ impl Commit { parents } + pub fn predecessor_ids(&self) -> Vec { + self.data.predecessors.clone() + } + pub fn predecessors(&self) -> Vec { let mut predecessors = Vec::new(); for predecessor in &self.data.predecessors { diff --git a/lib/src/evolution.rs b/lib/src/evolution.rs index 6fd314ab5..3377c2a34 100644 --- a/lib/src/evolution.rs +++ b/lib/src/evolution.rs @@ -28,8 +28,11 @@ use crate::transaction::{MutableRepo, Transaction}; use crate::trees::merge_trees; use crate::view::View; +// TODO: Combine some maps/sets and use a struct as value instead. +// TODO: Move some of this into the index? #[derive(Debug, Clone, Default)] struct State { + children: HashMap>, /// Contains all successors whether they have the same change id or not. successors: HashMap>, /// Contains the subset of the keys in `successors` for which there is a @@ -50,17 +53,16 @@ impl State { heads.push(store.get_commit(commit_id).unwrap()); } let mut commits = HashSet::new(); - let mut children = HashMap::new(); let mut change_to_commits = HashMap::new(); for commit in walk_ancestors(heads) { - children.insert(commit.id().clone(), HashSet::new()); + state.children.insert(commit.id().clone(), HashSet::new()); change_to_commits .entry(commit.change_id().clone()) .or_insert_with(HashSet::new) .insert(commit.id().clone()); commits.insert(commit); } - // Scan all commits to find obsolete commits and to build a lookup of for + // Scan all commits to find obsolete commits and to build a lookup for // children of a commit for commit in &commits { if commit.is_pruned() { @@ -80,7 +82,7 @@ impl State { } } for parent in commit.parents() { - if let Some(children) = children.get_mut(parent.id()) { + if let Some(children) = state.children.get_mut(parent.id()) { children.insert(commit.id().clone()); } } @@ -100,7 +102,7 @@ impl State { work.extend(state.pruned_commits.iter().cloned()); while !work.is_empty() { let commit_id = work.pop().unwrap(); - for child in children.get(&commit_id).unwrap() { + for child in state.children.get(&commit_id).unwrap() { if state.orphan_commits.insert(child.clone()) { work.push(child.clone()); } @@ -140,6 +142,94 @@ impl State { .map_or(false, |non_obsoletes| non_obsoletes.len() > 1) } + fn add_commit(&mut self, commit: &Commit) { + self.add_commit_data( + commit.id(), + commit.change_id(), + &commit.parent_ids(), + &commit.predecessor_ids(), + commit.is_pruned(), + ); + } + + fn add_commit_data( + &mut self, + commit_id: &CommitId, + change_id: &ChangeId, + parents: &[CommitId], + predecessors: &[CommitId], + is_pruned: bool, + ) { + // TODO: Error out (or ignore?) if the root id is a predecessor or divergent + // (adding the root once should be fine). Perhaps this is not the right + // place to do that (we don't know what the root id is here). + for parent in parents { + self.children + .entry(parent.clone()) + .or_default() + .insert(commit_id.clone()); + } + if is_pruned { + self.pruned_commits.insert(commit_id.clone()); + } + // Update the non_obsoletes_by_changeid by adding the new commit and removing + // the predecessors. + self.non_obsoletes_by_changeid + .entry(change_id.clone()) + .or_default() + .insert(commit_id.clone()); + for predecessor in predecessors { + self.successors + .entry(predecessor.clone()) + .or_default() + .insert(commit_id.clone()); + let became_obsolete = self + .non_obsoletes_by_changeid + .get_mut(change_id) + .unwrap() + .remove(predecessor); + // Mark descendants as orphans if the predecessor just became obsolete. + if became_obsolete { + assert!(self.obsolete_commits.insert(predecessor.clone())); + + let mut descendants = HashSet::new(); + for descendant in bfs( + vec![predecessor.clone()], + Box::new(|commit_id| commit_id.clone()), + Box::new(|commit_id| { + self.children + .get(commit_id) + .cloned() + .unwrap_or_else(HashSet::new) + }), + ) { + descendants.insert(descendant); + } + descendants.remove(predecessor); + descendants = descendants + .iter() + .filter(|commit_id| { + !(self.obsolete_commits.contains(commit_id) + || self.pruned_commits.contains(commit_id)) + }) + .cloned() + .collect(); + self.orphan_commits.extend(descendants); + } + } + // Mark the new commit an orphan if any of its parents are obsolete, pruned, or + // orphans. Note that this has to be done late, in case a parent just got marked + // as obsolete or orphan above. + let is_orphan = parents.iter().any(|parent| { + self.obsolete_commits.contains(parent) + || self.pruned_commits.contains(parent) + || self.orphan_commits.contains(commit_id) + }); + if is_orphan { + self.orphan_commits.insert(commit_id.clone()); + } + } + pub fn new_parent(&self, store: &StoreWrapper, old_parent_id: &CommitId) -> HashSet { let mut new_parents = HashSet::new(); if let Some(successor_ids) = self.successors.get(old_parent_id) { @@ -343,6 +433,10 @@ impl Evolution for MutableEvolution<'_, '_> { } impl MutableEvolution<'_, '_> { + pub fn add_commit(&mut self, commit: &Commit) { + self.state.add_commit(commit); + } + pub fn invalidate(&mut self) { self.state = State::calculate(self.repo.store(), self.repo.view()); } @@ -515,3 +609,375 @@ fn evolve_two_divergent_commits( .set_predecessors(vec![commit1.id().clone(), commit2.id().clone()]) .write_to_transaction(tx) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_commit_data_initial() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + assert!(!state.is_obsolete(&initial_commit)); + assert!(!state.is_orphan(&initial_commit)); + assert!(!state.is_divergent(&initial_change)); + } + + #[test] + fn add_commit_data_pruned() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], true); + assert!(!state.is_obsolete(&initial_commit)); + assert!(!state.is_orphan(&initial_commit)); + assert!(!state.is_divergent(&initial_change)); + } + + #[test] + fn add_commit_data_creating_orphan() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let orphan_commit1 = CommitId::from_hex("bbb111"); + let orphan_change1 = ChangeId::from_hex("bbb111"); + let orphan_commit2 = CommitId::from_hex("ccc111"); + let orphan_change2 = ChangeId::from_hex("ccc111"); + let obsolete_orphan_commit = CommitId::from_hex("ddd111"); + let obsolete_orphan_change = ChangeId::from_hex("ddd111"); + let pruned_orphan_commit = CommitId::from_hex("eee111"); + let rewritten_commit = CommitId::from_hex("aaa222"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &orphan_commit1, + &orphan_change1, + &[initial_commit.clone()], + &[], + false, + ); + state.add_commit_data( + &orphan_commit2, + &orphan_change2, + &[orphan_commit1.clone()], + &[], + false, + ); + state.add_commit_data( + &obsolete_orphan_commit, + &obsolete_orphan_change, + &[initial_commit.clone()], + &[], + false, + ); + state.add_commit_data( + &pruned_orphan_commit, + &obsolete_orphan_change, + &[initial_commit.clone()], + &[obsolete_orphan_commit.clone()], + true, + ); + state.add_commit_data( + &rewritten_commit, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + assert!(state.is_orphan(&orphan_commit1)); + assert!(state.is_orphan(&orphan_commit2)); + assert!(!state.is_orphan(&obsolete_orphan_commit)); + assert!(!state.is_orphan(&pruned_orphan_commit)); + assert!(!state.is_obsolete(&orphan_commit1)); + assert!(!state.is_obsolete(&orphan_commit2)); + assert!(state.is_obsolete(&obsolete_orphan_commit)); + assert!(!state.is_obsolete(&pruned_orphan_commit)); + } + + #[test] + fn add_commit_data_new_commit_on_obsolete() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit = CommitId::from_hex("aaa222"); + let new_commit = CommitId::from_hex("bbb111"); + let new_change = ChangeId::from_hex("bbb111"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &rewritten_commit, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &new_commit, + &new_change, + &[initial_commit.clone()], + &[], + false, + ); + assert!(state.is_orphan(&new_commit)); + } + + #[test] + fn add_commit_data_new_commit_on_orphan() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit = CommitId::from_hex("aaa222"); + let orphan_commit = CommitId::from_hex("bbb111"); + let orphan_change = ChangeId::from_hex("bbb111"); + let new_commit = CommitId::from_hex("bbb111"); + let new_change = ChangeId::from_hex("bbb111"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &rewritten_commit, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &orphan_commit, + &orphan_change, + &[initial_commit.clone()], + &[], + false, + ); + state.add_commit_data( + &new_commit, + &new_change, + &[orphan_commit.clone()], + &[], + false, + ); + assert!(state.is_orphan(&new_commit)); + } + + #[test] + fn add_commit_data_new_commit_on_pruned() { + let mut state = State::default(); + + let pruned_commit = CommitId::from_hex("aaa111"); + let pruned_change = ChangeId::from_hex("aaa111"); + let new_commit = CommitId::from_hex("bbb111"); + let new_change = ChangeId::from_hex("bbb111"); + + state.add_commit_data(&pruned_commit, &pruned_change, &[], &[], true); + state.add_commit_data( + &new_commit, + &new_change, + &[pruned_commit.clone()], + &[], + false, + ); + assert!(state.is_orphan(&new_commit)); + } + + #[test] + fn add_commit_data_rewrite_as_child() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit = CommitId::from_hex("aaa222"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + // The new commit is both a child and a successor of the initial commit + state.add_commit_data( + &rewritten_commit, + &initial_change, + &[initial_commit.clone()], + &[initial_commit.clone()], + false, + ); + assert!(state.is_obsolete(&initial_commit)); + assert!(!state.is_obsolete(&rewritten_commit)); + assert!(!state.is_orphan(&initial_commit)); + assert!(state.is_orphan(&rewritten_commit)); + assert!(!state.is_divergent(&initial_change)); + } + + #[test] + fn add_commit_data_duplicates() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let duplicate_commit1 = CommitId::from_hex("bbb111"); + let duplicate_change1 = ChangeId::from_hex("bbb111"); + let duplicate_commit2 = CommitId::from_hex("ccc111"); + let duplicate_change2 = ChangeId::from_hex("ccc111"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &duplicate_commit1, + &duplicate_change1, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &duplicate_commit2, + &duplicate_change2, + &[], + &[initial_commit.clone()], + false, + ); + assert!(!state.is_obsolete(&initial_commit)); + assert!(!state.is_obsolete(&duplicate_commit1)); + assert!(!state.is_obsolete(&duplicate_commit2)); + assert!(!state.is_divergent(&initial_change)); + assert!(!state.is_divergent(&duplicate_change1)); + assert!(!state.is_divergent(&duplicate_change2)); + assert_eq!( + state.successors(&initial_commit), + hashset!(duplicate_commit1.clone(), duplicate_commit2.clone()) + ); + } + + #[test] + fn add_commit_data_divergent() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit1 = CommitId::from_hex("aaa222"); + let rewritten_commit2 = CommitId::from_hex("aaa333"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &rewritten_commit1, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &rewritten_commit2, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + assert!(state.is_obsolete(&initial_commit)); + assert!(!state.is_obsolete(&rewritten_commit1)); + assert!(!state.is_obsolete(&rewritten_commit2)); + assert!(state.is_divergent(&initial_change)); + assert_eq!( + state.successors(&initial_commit), + hashset!(rewritten_commit1.clone(), rewritten_commit2.clone()) + ); + } + + #[test] + fn add_commit_data_divergent_pruned() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_pruned = CommitId::from_hex("aaa222"); + let rewritten_non_pruned = CommitId::from_hex("aaa333"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &rewritten_pruned, + &initial_change, + &[], + &[initial_commit.clone()], + true, + ); + state.add_commit_data( + &rewritten_non_pruned, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + assert!(state.is_obsolete(&initial_commit)); + assert!(!state.is_obsolete(&rewritten_pruned)); + assert!(!state.is_obsolete(&rewritten_non_pruned)); + // It's still divergent even if one side is pruned + assert!(state.is_divergent(&initial_change)); + assert_eq!( + state.successors(&initial_commit), + hashset!(rewritten_pruned.clone(), rewritten_non_pruned.clone()) + ); + } + + #[test] + fn add_commit_data_divergent_unrelated() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit = CommitId::from_hex("aaa222"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + // Same change id as the initial commit but no predecessor relationship to it + state.add_commit_data(&rewritten_commit, &initial_change, &[], &[], false); + assert!(!state.is_obsolete(&initial_commit)); + assert!(!state.is_obsolete(&rewritten_commit)); + assert!(state.is_divergent(&initial_change)); + assert_eq!(state.successors(&initial_commit), hashset!()); + } + + #[test] + fn add_commit_data_divergent_convergent() { + let mut state = State::default(); + + let initial_commit = CommitId::from_hex("aaa111"); + let initial_change = ChangeId::from_hex("aaa111"); + let rewritten_commit1 = CommitId::from_hex("aaa222"); + let rewritten_commit2 = CommitId::from_hex("aaa333"); + let convergent_commit = CommitId::from_hex("aaa444"); + + state.add_commit_data(&initial_commit, &initial_change, &[], &[], false); + state.add_commit_data( + &rewritten_commit1, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &rewritten_commit2, + &initial_change, + &[], + &[initial_commit.clone()], + false, + ); + state.add_commit_data( + &convergent_commit, + &initial_change, + &[], + &[rewritten_commit1.clone(), rewritten_commit2.clone()], + false, + ); + assert!(state.is_obsolete(&initial_commit)); + assert!(state.is_obsolete(&rewritten_commit1)); + assert!(state.is_obsolete(&rewritten_commit2)); + assert!(!state.is_obsolete(&convergent_commit)); + assert!(!state.is_divergent(&initial_change)); + assert_eq!( + state.successors(&rewritten_commit1), + hashset!(convergent_commit.clone()) + ); + assert_eq!( + state.successors(&rewritten_commit2), + hashset!(convergent_commit.clone()) + ); + } +} diff --git a/lib/src/store.rs b/lib/src/store.rs index 292628c55..7005735bc 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -52,6 +52,10 @@ impl Debug for ChangeId { } impl ChangeId { + pub fn from_hex(hex: &str) -> Self { + ChangeId(hex::decode(hex).unwrap()) + } + pub fn hex(&self) -> String { hex::encode(&self.0) } diff --git a/lib/src/transaction.rs b/lib/src/transaction.rs index 1e8fd6f5d..d9e9a0d16 100644 --- a/lib/src/transaction.rs +++ b/lib/src/transaction.rs @@ -166,8 +166,13 @@ impl<'r> Transaction<'r> { pub fn add_head(&mut self, head: &Commit) { let mut_repo = Arc::get_mut(self.repo.as_mut().unwrap()).unwrap(); + // TODO: Also add other ancestors that were not already known. For example, when + // the user creates a new repo backed by a git repo and they then check + // out some commit, we'll need add all ancestors of that commit. This + // code probably needs to be restructured a bit. Perhaps it should be + // higher-level code that makes sure unknown ancestors are also added. mut_repo.view.as_mut().unwrap().add_head(head); - mut_repo.evolution.as_mut().unwrap().invalidate(); + mut_repo.evolution.as_mut().unwrap().add_commit(head); } pub fn remove_head(&mut self, head: &Commit) {