From 982062bd75c55f0ead93c8289613a0be0712af40 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Sat, 1 Apr 2023 16:41:47 +0900
Subject: [PATCH] revset: do not always evaluate filter node to InternalRevset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This basically removes hidden 'all() &' from union/negation of filters. To
achieve that, I have two options: 1. add separate evaluation path (like the
one this commit introduced), or 2. wrap "all()" revset to override predicate
as Box::new(|_| true) function. I took the former since it's less ad-hoc.

We can add an explicit RevsetExpression node to branch between evaluate()
and evaluate_predicate(), but I don't think it would simplify the
implementation at this point. We might need such node if we want to resolve
"all()" at resolve_symbols(). It might be even better to extract a subset of
RevsetExpression enum, which only contains evaluatable nodes.

The cost of 'all() &' isn't significant for most filters. '~merges()' is
the exception. For jj repo,

    revsets/:v0.3.0 & (author(martinvonz) | committer(martinvonz))
    --------------------------------------------------------------
    base     1.06      11.2±0.04m
    new      1.00      10.5±0.05m

    revsets/~merges()
    -----------------
    base     1.69     750.0±8.47µ
    new      1.00     444.1±3.50µ
---
 lib/src/default_revset_engine.rs | 109 ++++++++++++++++++++++++++-----
 lib/tests/test_revset.rs         |  64 ++++++++++++++++++
 testing/bench-revsets-git.txt    |   6 ++
 3 files changed, 164 insertions(+), 15 deletions(-)
diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs
index 8c291231d..485836740 100644
--- a/lib/src/default_revset_engine.rs
+++ b/lib/src/default_revset_engine.rs
@@ -342,13 +342,22 @@ impl<'index, P: ToPredicateFn> InternalRevset<'index> for FilterRevset<'index, P
 
 impl<P: ToPredicateFn> ToPredicateFn for FilterRevset<'_, P> {
     fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
-        // TODO: optimize 'p1' out if candidates = All
         let mut p1 = self.candidates.to_predicate_fn();
         let mut p2 = self.predicate.to_predicate_fn();
         Box::new(move |entry| p1(entry) && p2(entry))
     }
 }
 
+#[derive(Debug)]
+struct NotInPredicate<S>(S);
+
+impl<S: ToPredicateFn> ToPredicateFn for NotInPredicate<S> {
+    fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
+        let mut p = self.0.to_predicate_fn();
+        Box::new(move |entry| !p(entry))
+    }
+}
+
 #[derive(Debug)]
 struct UnionRevset<'index> {
     set1: Box<dyn InternalRevset<'index> + 'index>,
@@ -379,6 +388,24 @@ impl ToPredicateFn for UnionRevset<'_> {
     }
 }
 
+#[derive(Debug)]
+struct UnionPredicate<S1, S2> {
+    set1: S1,
+    set2: S2,
+}
+
+impl<S1, S2> ToPredicateFn for UnionPredicate<S1, S2>
+where
+    S1: ToPredicateFn,
+    S2: ToPredicateFn,
+{
+    fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
+        let mut p1 = self.set1.to_predicate_fn();
+        let mut p2 = self.set2.to_predicate_fn();
+        Box::new(move |entry| p1(entry) || p2(entry))
+    }
+}
+
 struct UnionRevsetIterator<
     'index,
     I1: Iterator<Item = IndexEntry<'index>>,
@@ -505,7 +532,6 @@ impl<'index> InternalRevset<'index> for DifferenceRevset<'index> {
 
 impl ToPredicateFn for DifferenceRevset<'_> {
     fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
-        // TODO: optimize 'p1' out for unary negate?
         let mut p1 = self.set1.to_predicate_fn();
         let mut p2 = self.set2.to_predicate_fn();
         Box::new(move |entry| p1(entry) && !p2(entry))
@@ -694,11 +720,14 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
                 let candidate_set = self.evaluate(candidates)?;
                 Ok(self.take_latest_revset(candidate_set.as_ref(), *count))
             }
-            RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset {
-                candidates: self.evaluate(&RevsetExpression::All)?,
-                predicate: build_predicate_fn(self.store.clone(), self.index, predicate),
-            })),
-            RevsetExpression::AsFilter(candidates) => self.evaluate(candidates),
+            RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
+                // Top-level filter without intersection: e.g. "~author(_)" is represented as
+                // `AsFilter(NotIn(Filter(Author(_))))`.
+                Ok(Box::new(FilterRevset {
+                    candidates: self.evaluate(&RevsetExpression::All)?,
+                    predicate: self.evaluate_predicate(expression)?,
+                }))
+            }
             RevsetExpression::NotIn(complement) => {
                 let set1 = self.evaluate(&RevsetExpression::All)?;
                 let set2 = self.evaluate(complement)?;
@@ -711,14 +740,12 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
             }
             RevsetExpression::Intersection(expression1, expression2) => {
                 match expression2.as_ref() {
-                    RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset {
-                        candidates: self.evaluate(expression1)?,
-                        predicate: build_predicate_fn(self.store.clone(), self.index, predicate),
-                    })),
-                    RevsetExpression::AsFilter(expression2) => Ok(Box::new(FilterRevset {
-                        candidates: self.evaluate(expression1)?,
-                        predicate: self.evaluate(expression2)?,
-                    })),
+                    RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
+                        Ok(Box::new(FilterRevset {
+                            candidates: self.evaluate(expression1)?,
+                            predicate: self.evaluate_predicate(expression2)?,
+                        }))
+                    }
                     _ => {
                         // TODO: 'set2' can be turned into a predicate, and use FilterRevset
                         // if a predicate function can terminate the 'set1' iterator early.
@@ -736,6 +763,58 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
         }
     }
 
+    /// Evaluates expression tree as filter predicate.
+    ///
+    /// For filter expression, this never inserts a hidden `all()` since a
+    /// filter predicate doesn't need to produce revisions to walk.
+    fn evaluate_predicate(
+        &self,
+        expression: &RevsetExpression,
+    ) -> Result<Box<dyn ToPredicateFn + 'index>, RevsetEvaluationError> {
+        match expression {
+            RevsetExpression::None
+            | RevsetExpression::All
+            | RevsetExpression::Commits(_)
+            | RevsetExpression::Symbol(_)
+            | RevsetExpression::Children(_)
+            | RevsetExpression::Ancestors { .. }
+            | RevsetExpression::Range { .. }
+            | RevsetExpression::DagRange { .. }
+            | RevsetExpression::Heads(_)
+            | RevsetExpression::Roots(_)
+            | RevsetExpression::VisibleHeads
+            | RevsetExpression::Branches(_)
+            | RevsetExpression::RemoteBranches { .. }
+            | RevsetExpression::Tags
+            | RevsetExpression::GitRefs
+            | RevsetExpression::GitHead
+            | RevsetExpression::Latest { .. } => Ok(self.evaluate(expression)?.into_predicate()),
+            RevsetExpression::Filter(predicate) => Ok(build_predicate_fn(
+                self.store.clone(),
+                self.index,
+                predicate,
+            )),
+            RevsetExpression::AsFilter(candidates) => self.evaluate_predicate(candidates),
+            RevsetExpression::Present(_) => {
+                panic!("Expression '{expression:?}' should have been resolved by caller")
+            }
+            RevsetExpression::NotIn(complement) => {
+                let set = self.evaluate_predicate(complement)?;
+                Ok(Box::new(NotInPredicate(set)))
+            }
+            RevsetExpression::Union(expression1, expression2) => {
+                let set1 = self.evaluate_predicate(expression1)?;
+                let set2 = self.evaluate_predicate(expression2)?;
+                Ok(Box::new(UnionPredicate { set1, set2 }))
+            }
+            // Intersection of filters should have been substituted by revset::optimize().
+            // If it weren't, just fall back to the set evaluation path.
+            RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => {
+                Ok(self.evaluate(expression)?.into_predicate())
+            }
+        }
+    }
+
     fn revset_for_commit_ids(
         &self,
         commit_ids: &[CommitId],
diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs
index eeb786f3a..9f7d2ca4d 100644
--- a/lib/tests/test_revset.rs
+++ b/lib/tests/test_revset.rs
@@ -1971,6 +1971,70 @@ fn test_evaluate_expression_difference(use_git: bool) {
     );
 }
 
+#[test_case(false ; "local backend")]
+#[test_case(true ; "git backend")]
+fn test_evaluate_expression_filter_combinator(use_git: bool) {
+    let settings = testutils::user_settings();
+    let test_repo = TestRepo::init(use_git);
+    let repo = &test_repo.repo;
+
+    let mut tx = repo.start_transaction(&settings, "test");
+    let mut_repo = tx.mut_repo();
+
+    let root_commit_id = repo.store().root_commit_id();
+    let commit1 = create_random_commit(mut_repo, &settings)
+        .set_description("commit 1")
+        .write()
+        .unwrap();
+    let commit2 = create_random_commit(mut_repo, &settings)
+        .set_parents(vec![commit1.id().clone()])
+        .set_description("commit 2")
+        .write()
+        .unwrap();
+    let commit3 = create_random_commit(mut_repo, &settings)
+        .set_parents(vec![commit2.id().clone()])
+        .set_description("commit 3")
+        .write()
+        .unwrap();
+
+    // Not intersected with a set node
+    assert_eq!(
+        resolve_commit_ids(mut_repo, "~description(1)"),
+        vec![
+            commit3.id().clone(),
+            commit2.id().clone(),
+            root_commit_id.clone(),
+        ],
+    );
+    assert_eq!(
+        resolve_commit_ids(mut_repo, "description(1) | description(2)"),
+        vec![commit2.id().clone(), commit1.id().clone()],
+    );
+    assert_eq!(
+        resolve_commit_ids(
+            mut_repo,
+            "description(commit) ~ (description(2) | description(3))",
+        ),
+        vec![commit1.id().clone()],
+    );
+
+    // Intersected with a set node
+    assert_eq!(
+        resolve_commit_ids(mut_repo, "root.. & ~description(1)"),
+        vec![commit3.id().clone(), commit2.id().clone()],
+    );
+    assert_eq!(
+        resolve_commit_ids(
+            mut_repo,
+            &format!(
+                "{}.. & (description(1) | description(2))",
+                commit1.id().hex(),
+            )
+        ),
+        vec![commit2.id().clone()],
+    );
+}
+
 #[test_case(false ; "local backend")]
 #[test_case(true ; "git backend")]
 fn test_evaluate_expression_file(use_git: bool) {
diff --git a/testing/bench-revsets-git.txt b/testing/bench-revsets-git.txt
index 2a66e304a..67881bdac 100644
--- a/testing/bench-revsets-git.txt
+++ b/testing/bench-revsets-git.txt
@@ -25,6 +25,9 @@ committer(gitster)
 # Intersection and union of large subsets
 author(peff) & committer(gitster)
 author(peff) | committer(gitster)
+# Intersection of filter with a small subset
+:v1.0.0 & (author(peff) & committer(gitster))
+:v1.0.0 & (author(peff) | committer(gitster))
 # Roots and heads of small subsets
 roots(tags())
 heads(tags())
@@ -37,5 +40,8 @@ heads(:v2.40.0)
 # Parents and children of small subset
 tags()-
 tags()+
+# Filter that doesn't read commit object
+merges()
+~merges()
 # Files are unbearably slow, so only filter within small set
 file(Makefile) & v1.0.0..v1.2.0