From a49da4ad01845f992d77b727fa8efadae348c58b Mon Sep 17 00:00:00 2001 From: dploch Date: Tue, 14 May 2024 17:01:28 -0400 Subject: [PATCH] revset: implement a 'reachable(src, domain)' expression This revset correctly implements "reachability" from a set of source commits following both parent and child edges as far as they can go within a domain set. This type of 'bfs' query is currently impossible to express with existing revset functions. --- CHANGELOG.md | 3 + cli/tests/test_revset_output.rs | 4 +- docs/revsets.md | 3 + lib/src/default_index/revset_engine.rs | 33 +++++- lib/src/revset.rs | 37 ++++++ lib/tests/test_revset.rs | 158 +++++++++++++++++++++++++ 6 files changed, 235 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0d2cbca4..59133d9a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,6 +111,9 @@ to avoid letting the user edit the immutable one. * `jj rebase -r` now accepts `--insert-after` and `--insert-before` options to customize the location of the rebased revisions. +* A new revset `reahable(srcs, domain)` will return all commits that are + reachable from `srcs` within `domain`. + ### Fixed bugs * Revsets now support `\`-escapes in string literal. diff --git a/cli/tests/test_revset_output.rs b/cli/tests/test_revset_output.rs index fde0fdc24..ee49eacbf 100644 --- a/cli/tests/test_revset_output.rs +++ b/cli/tests/test_revset_output.rs @@ -278,7 +278,7 @@ fn test_function_name_hint() { | ^----^ | = Function "branch" doesn't exist - Hint: Did you mean "branches"? + Hint: Did you mean "branches", "reachable"? "###); // Both builtin function and function alias should be suggested @@ -308,7 +308,7 @@ fn test_function_name_hint() { | ^----^ | = Function "branch" doesn't exist - Hint: Did you mean "branches"? + Hint: Did you mean "branches", "reachable"? "###); } diff --git a/docs/revsets.md b/docs/revsets.md index 59f061411..d566b368d 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -87,6 +87,9 @@ revsets (expressions) as arguments. * `descendants(x)`: Same as `x::`. +* `reachable(srcs, domain)`: All commits reachable from `srcs` within + `domain`, traversing all parent and child edges. + * `connected(x)`: Same as `x::x`. Useful when `x` includes several commits. * `all()`: All visible commits in the repo. diff --git a/lib/src/default_index/revset_engine.rs b/lib/src/default_index/revset_engine.rs index 41140ad8c..b9e18e653 100644 --- a/lib/src/default_index/revset_engine.rs +++ b/lib/src/default_index/revset_engine.rs @@ -35,8 +35,8 @@ use crate::revset::{ RevsetFilterExtensionWrapper, RevsetFilterPredicate, GENERATION_RANGE_FULL, }; use crate::revset_graph::RevsetGraphEdge; -use crate::rewrite; use crate::store::Store; +use crate::{rewrite, union_find}; type BoxedPredicateFn<'a> = Box bool + 'a>; pub(super) type BoxedRevWalk<'a> = Box + 'a>; @@ -829,6 +829,37 @@ impl<'index> EvaluationContext<'index> { Ok(Box::new(EagerRevset { positions })) } } + ResolvedExpression::Reachable { sources, domain } => { + let mut sets = union_find::UnionFind::::new(); + + // Compute all reachable subgraphs. + let domain_revset = self.evaluate(domain)?; + let domain_vec = domain_revset.positions().attach(index).collect_vec(); + let domain_set: HashSet<_> = domain_vec.iter().copied().collect(); + for pos in &domain_set { + for parent_pos in index.entry_by_pos(*pos).parent_positions() { + if domain_set.contains(&parent_pos) { + sets.union(*pos, parent_pos); + } + } + } + + // Identify disjoint sets reachable from sources. + let set_reps: HashSet<_> = intersection_by( + self.evaluate(sources)?.positions(), + EagerRevWalk::new(domain_vec.iter().copied()), + |pos1, pos2| pos1.cmp(pos2).reverse(), + ) + .attach(index) + .map(|pos| sets.find(pos)) + .collect(); + + let positions = domain_vec + .into_iter() + .filter(|pos| set_reps.contains(&sets.find(*pos))) + .collect_vec(); + Ok(Box::new(EagerRevset { positions })) + } ResolvedExpression::Heads(candidates) => { let candidate_set = self.evaluate(candidates)?; let head_positions: BTreeSet<_> = diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 80611356c..cb06cc575 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -190,6 +190,11 @@ pub enum RevsetExpression { heads: Rc, // TODO: maybe add generation_from_roots/heads? }, + // Commits reachable from "sources" within "domain" + Reachable { + sources: Rc, + domain: Rc, + }, Heads(Rc), Roots(Rc), Latest { @@ -379,6 +384,18 @@ impl RevsetExpression { self.dag_range_to(self) } + /// All commits within `domain` reachable from this set of commits, by + /// traversing either parent or child edges. + pub fn reachable( + self: &Rc, + domain: &Rc, + ) -> Rc { + Rc::new(RevsetExpression::Reachable { + sources: self.clone(), + domain: domain.clone(), + }) + } + /// Commits reachable from `heads` but not from `self`. pub fn range( self: &Rc, @@ -507,6 +524,11 @@ pub enum ResolvedExpression { heads: Box, generation_from_roots: Range, }, + /// Commits reachable from `sources` within `domain`. + Reachable { + sources: Box, + domain: Box, + }, Heads(Box), Roots(Box), Latest { @@ -635,6 +657,12 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: let candidates = parse_expression_rule(arg.into_inner(), state)?; Ok(candidates.connected()) }); + map.insert("reachable", |name, arguments_pair, state| { + let ([source_arg, domain_arg], []) = expect_arguments(name, arguments_pair)?; + let sources = parse_expression_rule(source_arg.into_inner(), state)?; + let domain = parse_expression_rule(domain_arg.into_inner(), state)?; + Ok(sources.reachable(&domain)) + }); map.insert("none", |name, arguments_pair, _state| { expect_no_arguments(name, arguments_pair)?; Ok(RevsetExpression::none()) @@ -960,6 +988,10 @@ fn try_transform_expression( transform_rec_pair((roots, heads), pre, post)? .map(|(roots, heads)| RevsetExpression::DagRange { roots, heads }) } + RevsetExpression::Reachable { sources, domain } => { + transform_rec_pair((sources, domain), pre, post)? + .map(|(sources, domain)| RevsetExpression::Reachable { sources, domain }) + } RevsetExpression::Heads(candidates) => { transform_rec(candidates, pre, post)?.map(RevsetExpression::Heads) } @@ -1748,6 +1780,10 @@ impl VisibilityResolutionContext<'_> { heads: self.resolve(heads).into(), generation_from_roots: GENERATION_RANGE_FULL, }, + RevsetExpression::Reachable { sources, domain } => ResolvedExpression::Reachable { + sources: self.resolve(sources).into(), + domain: self.resolve(domain).into(), + }, RevsetExpression::Heads(candidates) => { ResolvedExpression::Heads(self.resolve(candidates).into()) } @@ -1833,6 +1869,7 @@ impl VisibilityResolutionContext<'_> { | RevsetExpression::Descendants { .. } | RevsetExpression::Range { .. } | RevsetExpression::DagRange { .. } + | RevsetExpression::Reachable { .. } | RevsetExpression::Heads(_) | RevsetExpression::Roots(_) | RevsetExpression::Latest { .. } => { diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index bb372f3c7..dcefc44cb 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -1556,6 +1556,164 @@ fn test_evaluate_expression_connected() { ); } +#[test] +fn test_evaluate_expression_reachable() { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings); + let mut_repo = tx.mut_repo(); + + // Construct 3 separate subgraphs off the root commit. + // 1 is a chain, 2 is a merge, 3 is a pyramidal monstrosity + let graph1commit1 = write_random_commit(mut_repo, &settings); + let graph1commit2 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph1commit1.id().clone()]) + .write() + .unwrap(); + let graph1commit3 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph1commit2.id().clone()]) + .write() + .unwrap(); + let graph2commit1 = write_random_commit(mut_repo, &settings); + let graph2commit2 = write_random_commit(mut_repo, &settings); + let graph2commit3 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph2commit1.id().clone(), graph2commit2.id().clone()]) + .write() + .unwrap(); + let graph3commit1 = write_random_commit(mut_repo, &settings); + let graph3commit2 = write_random_commit(mut_repo, &settings); + let graph3commit3 = write_random_commit(mut_repo, &settings); + let graph3commit4 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit1.id().clone(), graph3commit2.id().clone()]) + .write() + .unwrap(); + let graph3commit5 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit2.id().clone(), graph3commit3.id().clone()]) + .write() + .unwrap(); + let graph3commit6 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit3.id().clone()]) + .write() + .unwrap(); + let graph3commit7 = create_random_commit(mut_repo, &settings) + .set_parents(vec![graph3commit4.id().clone(), graph3commit5.id().clone()]) + .write() + .unwrap(); + + // Domain is respected. + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "reachable({}, all() ~ ::{})", + graph1commit2.id().hex(), + graph1commit1.id().hex() + ) + ), + vec![graph1commit3.id().clone(), graph1commit2.id().clone(),] + ); + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "reachable({}, all() ~ ::{})", + graph1commit2.id().hex(), + graph1commit3.id().hex() + ) + ), + vec![] + ); + + // Each graph is identifiable from any node in it. + for (i, commit) in [&graph1commit1, &graph1commit2, &graph1commit3] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, all() ~ root())", commit.id().hex()) + ), + vec![ + graph1commit3.id().clone(), + graph1commit2.id().clone(), + graph1commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } + + for (i, commit) in [&graph2commit1, &graph2commit2, &graph2commit3] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, all() ~ root())", commit.id().hex()) + ), + vec![ + graph2commit3.id().clone(), + graph2commit2.id().clone(), + graph2commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } + + for (i, commit) in [ + &graph3commit1, + &graph3commit2, + &graph3commit3, + &graph3commit4, + &graph3commit5, + &graph3commit6, + &graph3commit7, + ] + .iter() + .enumerate() + { + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!("reachable({}, all() ~ root())", commit.id().hex()) + ), + vec![ + graph3commit7.id().clone(), + graph3commit6.id().clone(), + graph3commit5.id().clone(), + graph3commit4.id().clone(), + graph3commit3.id().clone(), + graph3commit2.id().clone(), + graph3commit1.id().clone(), + ], + "commit {}", + i + 1 + ); + } + + // Test a split of the pyramidal monstrosity. + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "reachable({}, all() ~ ::{})", + graph3commit4.id().hex(), + graph3commit5.id().hex() + ) + ), + vec![ + graph3commit7.id().clone(), + graph3commit4.id().clone(), + graph3commit1.id().clone(), + ] + ); +} + #[test] fn test_evaluate_expression_descendants() { let settings = testutils::user_settings();