From 982062bd75c55f0ead93c8289613a0be0712af40 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 1 Apr 2023 16:41:47 +0900 Subject: [PATCH] revset: do not always evaluate filter node to InternalRevset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This basically removes hidden 'all() &' from union/negation of filters. To achieve that, I have two options: 1. add separate evaluation path (like the one this commit introduced), or 2. wrap "all()" revset to override predicate as Box::new(|_| true) function. I took the former since it's less ad-hoc. We can add an explicit RevsetExpression node to branch between evaluate() and evaluate_predicate(), but I don't think it would simplify the implementation at this point. We might need such node if we want to resolve "all()" at resolve_symbols(). It might be even better to extract a subset of RevsetExpression enum, which only contains evaluatable nodes. The cost of 'all() &' isn't significant for most filters. '~merges()' is the exception. For jj repo, revsets/:v0.3.0 & (author(martinvonz) | committer(martinvonz)) -------------------------------------------------------------- base 1.06 11.2±0.04m new 1.00 10.5±0.05m revsets/~merges() ----------------- base 1.69 750.0±8.47µ new 1.00 444.1±3.50µ --- lib/src/default_revset_engine.rs | 109 ++++++++++++++++++++++++++----- lib/tests/test_revset.rs | 64 ++++++++++++++++++ testing/bench-revsets-git.txt | 6 ++ 3 files changed, 164 insertions(+), 15 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 8c291231d..485836740 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -342,13 +342,22 @@ impl<'index, P: ToPredicateFn> InternalRevset<'index> for FilterRevset<'index, P impl ToPredicateFn for FilterRevset<'_, P> { fn to_predicate_fn(&self) -> Box) -> bool + '_> { - // TODO: optimize 'p1' out if candidates = All let mut p1 = self.candidates.to_predicate_fn(); let mut p2 = self.predicate.to_predicate_fn(); Box::new(move |entry| p1(entry) && p2(entry)) } } +#[derive(Debug)] +struct NotInPredicate(S); + +impl ToPredicateFn for NotInPredicate { + fn to_predicate_fn(&self) -> Box) -> bool + '_> { + let mut p = self.0.to_predicate_fn(); + Box::new(move |entry| !p(entry)) + } +} + #[derive(Debug)] struct UnionRevset<'index> { set1: Box + 'index>, @@ -379,6 +388,24 @@ impl ToPredicateFn for UnionRevset<'_> { } } +#[derive(Debug)] +struct UnionPredicate { + set1: S1, + set2: S2, +} + +impl ToPredicateFn for UnionPredicate +where + S1: ToPredicateFn, + S2: ToPredicateFn, +{ + fn to_predicate_fn(&self) -> Box) -> bool + '_> { + let mut p1 = self.set1.to_predicate_fn(); + let mut p2 = self.set2.to_predicate_fn(); + Box::new(move |entry| p1(entry) || p2(entry)) + } +} + struct UnionRevsetIterator< 'index, I1: Iterator>, @@ -505,7 +532,6 @@ impl<'index> InternalRevset<'index> for DifferenceRevset<'index> { impl ToPredicateFn for DifferenceRevset<'_> { fn to_predicate_fn(&self) -> Box) -> bool + '_> { - // TODO: optimize 'p1' out for unary negate? let mut p1 = self.set1.to_predicate_fn(); let mut p2 = self.set2.to_predicate_fn(); Box::new(move |entry| p1(entry) && !p2(entry)) @@ -694,11 +720,14 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { let candidate_set = self.evaluate(candidates)?; Ok(self.take_latest_revset(candidate_set.as_ref(), *count)) } - RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset { - candidates: self.evaluate(&RevsetExpression::All)?, - predicate: build_predicate_fn(self.store.clone(), self.index, predicate), - })), - RevsetExpression::AsFilter(candidates) => self.evaluate(candidates), + RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { + // Top-level filter without intersection: e.g. "~author(_)" is represented as + // `AsFilter(NotIn(Filter(Author(_))))`. + Ok(Box::new(FilterRevset { + candidates: self.evaluate(&RevsetExpression::All)?, + predicate: self.evaluate_predicate(expression)?, + })) + } RevsetExpression::NotIn(complement) => { let set1 = self.evaluate(&RevsetExpression::All)?; let set2 = self.evaluate(complement)?; @@ -711,14 +740,12 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { } RevsetExpression::Intersection(expression1, expression2) => { match expression2.as_ref() { - RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset { - candidates: self.evaluate(expression1)?, - predicate: build_predicate_fn(self.store.clone(), self.index, predicate), - })), - RevsetExpression::AsFilter(expression2) => Ok(Box::new(FilterRevset { - candidates: self.evaluate(expression1)?, - predicate: self.evaluate(expression2)?, - })), + RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { + Ok(Box::new(FilterRevset { + candidates: self.evaluate(expression1)?, + predicate: self.evaluate_predicate(expression2)?, + })) + } _ => { // TODO: 'set2' can be turned into a predicate, and use FilterRevset // if a predicate function can terminate the 'set1' iterator early. @@ -736,6 +763,58 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { } } + /// Evaluates expression tree as filter predicate. + /// + /// For filter expression, this never inserts a hidden `all()` since a + /// filter predicate doesn't need to produce revisions to walk. + fn evaluate_predicate( + &self, + expression: &RevsetExpression, + ) -> Result, RevsetEvaluationError> { + match expression { + RevsetExpression::None + | RevsetExpression::All + | RevsetExpression::Commits(_) + | RevsetExpression::Symbol(_) + | RevsetExpression::Children(_) + | RevsetExpression::Ancestors { .. } + | RevsetExpression::Range { .. } + | RevsetExpression::DagRange { .. } + | RevsetExpression::Heads(_) + | RevsetExpression::Roots(_) + | RevsetExpression::VisibleHeads + | RevsetExpression::Branches(_) + | RevsetExpression::RemoteBranches { .. } + | RevsetExpression::Tags + | RevsetExpression::GitRefs + | RevsetExpression::GitHead + | RevsetExpression::Latest { .. } => Ok(self.evaluate(expression)?.into_predicate()), + RevsetExpression::Filter(predicate) => Ok(build_predicate_fn( + self.store.clone(), + self.index, + predicate, + )), + RevsetExpression::AsFilter(candidates) => self.evaluate_predicate(candidates), + RevsetExpression::Present(_) => { + panic!("Expression '{expression:?}' should have been resolved by caller") + } + RevsetExpression::NotIn(complement) => { + let set = self.evaluate_predicate(complement)?; + Ok(Box::new(NotInPredicate(set))) + } + RevsetExpression::Union(expression1, expression2) => { + let set1 = self.evaluate_predicate(expression1)?; + let set2 = self.evaluate_predicate(expression2)?; + Ok(Box::new(UnionPredicate { set1, set2 })) + } + // Intersection of filters should have been substituted by revset::optimize(). + // If it weren't, just fall back to the set evaluation path. + RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => { + Ok(self.evaluate(expression)?.into_predicate()) + } + } + } + fn revset_for_commit_ids( &self, commit_ids: &[CommitId], diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index eeb786f3a..9f7d2ca4d 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -1971,6 +1971,70 @@ fn test_evaluate_expression_difference(use_git: bool) { ); } +#[test_case(false ; "local backend")] +#[test_case(true ; "git backend")] +fn test_evaluate_expression_filter_combinator(use_git: bool) { + let settings = testutils::user_settings(); + let test_repo = TestRepo::init(use_git); + let repo = &test_repo.repo; + + let mut tx = repo.start_transaction(&settings, "test"); + let mut_repo = tx.mut_repo(); + + let root_commit_id = repo.store().root_commit_id(); + let commit1 = create_random_commit(mut_repo, &settings) + .set_description("commit 1") + .write() + .unwrap(); + let commit2 = create_random_commit(mut_repo, &settings) + .set_parents(vec![commit1.id().clone()]) + .set_description("commit 2") + .write() + .unwrap(); + let commit3 = create_random_commit(mut_repo, &settings) + .set_parents(vec![commit2.id().clone()]) + .set_description("commit 3") + .write() + .unwrap(); + + // Not intersected with a set node + assert_eq!( + resolve_commit_ids(mut_repo, "~description(1)"), + vec![ + commit3.id().clone(), + commit2.id().clone(), + root_commit_id.clone(), + ], + ); + assert_eq!( + resolve_commit_ids(mut_repo, "description(1) | description(2)"), + vec![commit2.id().clone(), commit1.id().clone()], + ); + assert_eq!( + resolve_commit_ids( + mut_repo, + "description(commit) ~ (description(2) | description(3))", + ), + vec![commit1.id().clone()], + ); + + // Intersected with a set node + assert_eq!( + resolve_commit_ids(mut_repo, "root.. & ~description(1)"), + vec![commit3.id().clone(), commit2.id().clone()], + ); + assert_eq!( + resolve_commit_ids( + mut_repo, + &format!( + "{}.. & (description(1) | description(2))", + commit1.id().hex(), + ) + ), + vec![commit2.id().clone()], + ); +} + #[test_case(false ; "local backend")] #[test_case(true ; "git backend")] fn test_evaluate_expression_file(use_git: bool) { diff --git a/testing/bench-revsets-git.txt b/testing/bench-revsets-git.txt index 2a66e304a..67881bdac 100644 --- a/testing/bench-revsets-git.txt +++ b/testing/bench-revsets-git.txt @@ -25,6 +25,9 @@ committer(gitster) # Intersection and union of large subsets author(peff) & committer(gitster) author(peff) | committer(gitster) +# Intersection of filter with a small subset +:v1.0.0 & (author(peff) & committer(gitster)) +:v1.0.0 & (author(peff) | committer(gitster)) # Roots and heads of small subsets roots(tags()) heads(tags()) @@ -37,5 +40,8 @@ heads(:v2.40.0) # Parents and children of small subset tags()- tags()+ +# Filter that doesn't read commit object +merges() +~merges() # Files are unbearably slow, so only filter within small set file(Makefile) & v1.0.0..v1.2.0