revset: do not always evaluate filter node to InternalRevset

This basically removes hidden 'all() &' from union/negation of filters. To
achieve that, I have two options: 1. add separate evaluation path (like the
one this commit introduced), or 2. wrap "all()" revset to override predicate
as Box::new(|_| true) function. I took the former since it's less ad-hoc.

We can add an explicit RevsetExpression node to branch between evaluate()
and evaluate_predicate(), but I don't think it would simplify the
implementation at this point. We might need such node if we want to resolve
"all()" at resolve_symbols(). It might be even better to extract a subset of
RevsetExpression enum, which only contains evaluatable nodes.

The cost of 'all() &' isn't significant for most filters. '~merges()' is
the exception. For jj repo,

    revsets/:v0.3.0 & (author(martinvonz) | committer(martinvonz))
    --------------------------------------------------------------
    base     1.06      11.2±0.04m
    new      1.00      10.5±0.05m

    revsets/~merges()
    -----------------
    base     1.69     750.0±8.47µ
    new      1.00     444.1±3.50µ
This commit is contained in:
Yuya Nishihara 2023-04-01 16:41:47 +09:00
parent 69794f2585
commit 982062bd75
3 changed files with 164 additions and 15 deletions

View file

@ -342,13 +342,22 @@ impl<'index, P: ToPredicateFn> InternalRevset<'index> for FilterRevset<'index, P
impl<P: ToPredicateFn> ToPredicateFn for FilterRevset<'_, P> {
fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
// TODO: optimize 'p1' out if candidates = All
let mut p1 = self.candidates.to_predicate_fn();
let mut p2 = self.predicate.to_predicate_fn();
Box::new(move |entry| p1(entry) && p2(entry))
}
}
#[derive(Debug)]
struct NotInPredicate<S>(S);
impl<S: ToPredicateFn> ToPredicateFn for NotInPredicate<S> {
fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
let mut p = self.0.to_predicate_fn();
Box::new(move |entry| !p(entry))
}
}
#[derive(Debug)]
struct UnionRevset<'index> {
set1: Box<dyn InternalRevset<'index> + 'index>,
@ -379,6 +388,24 @@ impl ToPredicateFn for UnionRevset<'_> {
}
}
#[derive(Debug)]
struct UnionPredicate<S1, S2> {
set1: S1,
set2: S2,
}
impl<S1, S2> ToPredicateFn for UnionPredicate<S1, S2>
where
S1: ToPredicateFn,
S2: ToPredicateFn,
{
fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
let mut p1 = self.set1.to_predicate_fn();
let mut p2 = self.set2.to_predicate_fn();
Box::new(move |entry| p1(entry) || p2(entry))
}
}
struct UnionRevsetIterator<
'index,
I1: Iterator<Item = IndexEntry<'index>>,
@ -505,7 +532,6 @@ impl<'index> InternalRevset<'index> for DifferenceRevset<'index> {
impl ToPredicateFn for DifferenceRevset<'_> {
fn to_predicate_fn(&self) -> Box<dyn FnMut(&IndexEntry<'_>) -> bool + '_> {
// TODO: optimize 'p1' out for unary negate?
let mut p1 = self.set1.to_predicate_fn();
let mut p2 = self.set2.to_predicate_fn();
Box::new(move |entry| p1(entry) && !p2(entry))
@ -694,11 +720,14 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
let candidate_set = self.evaluate(candidates)?;
Ok(self.take_latest_revset(candidate_set.as_ref(), *count))
}
RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset {
candidates: self.evaluate(&RevsetExpression::All)?,
predicate: build_predicate_fn(self.store.clone(), self.index, predicate),
})),
RevsetExpression::AsFilter(candidates) => self.evaluate(candidates),
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
// Top-level filter without intersection: e.g. "~author(_)" is represented as
// `AsFilter(NotIn(Filter(Author(_))))`.
Ok(Box::new(FilterRevset {
candidates: self.evaluate(&RevsetExpression::All)?,
predicate: self.evaluate_predicate(expression)?,
}))
}
RevsetExpression::NotIn(complement) => {
let set1 = self.evaluate(&RevsetExpression::All)?;
let set2 = self.evaluate(complement)?;
@ -711,14 +740,12 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
}
RevsetExpression::Intersection(expression1, expression2) => {
match expression2.as_ref() {
RevsetExpression::Filter(predicate) => Ok(Box::new(FilterRevset {
candidates: self.evaluate(expression1)?,
predicate: build_predicate_fn(self.store.clone(), self.index, predicate),
})),
RevsetExpression::AsFilter(expression2) => Ok(Box::new(FilterRevset {
candidates: self.evaluate(expression1)?,
predicate: self.evaluate(expression2)?,
})),
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
Ok(Box::new(FilterRevset {
candidates: self.evaluate(expression1)?,
predicate: self.evaluate_predicate(expression2)?,
}))
}
_ => {
// TODO: 'set2' can be turned into a predicate, and use FilterRevset
// if a predicate function can terminate the 'set1' iterator early.
@ -736,6 +763,58 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
}
}
/// Evaluates expression tree as filter predicate.
///
/// For filter expression, this never inserts a hidden `all()` since a
/// filter predicate doesn't need to produce revisions to walk.
fn evaluate_predicate(
&self,
expression: &RevsetExpression,
) -> Result<Box<dyn ToPredicateFn + 'index>, RevsetEvaluationError> {
match expression {
RevsetExpression::None
| RevsetExpression::All
| RevsetExpression::Commits(_)
| RevsetExpression::Symbol(_)
| RevsetExpression::Children(_)
| RevsetExpression::Ancestors { .. }
| RevsetExpression::Range { .. }
| RevsetExpression::DagRange { .. }
| RevsetExpression::Heads(_)
| RevsetExpression::Roots(_)
| RevsetExpression::VisibleHeads
| RevsetExpression::Branches(_)
| RevsetExpression::RemoteBranches { .. }
| RevsetExpression::Tags
| RevsetExpression::GitRefs
| RevsetExpression::GitHead
| RevsetExpression::Latest { .. } => Ok(self.evaluate(expression)?.into_predicate()),
RevsetExpression::Filter(predicate) => Ok(build_predicate_fn(
self.store.clone(),
self.index,
predicate,
)),
RevsetExpression::AsFilter(candidates) => self.evaluate_predicate(candidates),
RevsetExpression::Present(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller")
}
RevsetExpression::NotIn(complement) => {
let set = self.evaluate_predicate(complement)?;
Ok(Box::new(NotInPredicate(set)))
}
RevsetExpression::Union(expression1, expression2) => {
let set1 = self.evaluate_predicate(expression1)?;
let set2 = self.evaluate_predicate(expression2)?;
Ok(Box::new(UnionPredicate { set1, set2 }))
}
// Intersection of filters should have been substituted by revset::optimize().
// If it weren't, just fall back to the set evaluation path.
RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => {
Ok(self.evaluate(expression)?.into_predicate())
}
}
}
fn revset_for_commit_ids(
&self,
commit_ids: &[CommitId],

View file

@ -1971,6 +1971,70 @@ fn test_evaluate_expression_difference(use_git: bool) {
);
}
#[test_case(false ; "local backend")]
#[test_case(true ; "git backend")]
fn test_evaluate_expression_filter_combinator(use_git: bool) {
let settings = testutils::user_settings();
let test_repo = TestRepo::init(use_git);
let repo = &test_repo.repo;
let mut tx = repo.start_transaction(&settings, "test");
let mut_repo = tx.mut_repo();
let root_commit_id = repo.store().root_commit_id();
let commit1 = create_random_commit(mut_repo, &settings)
.set_description("commit 1")
.write()
.unwrap();
let commit2 = create_random_commit(mut_repo, &settings)
.set_parents(vec![commit1.id().clone()])
.set_description("commit 2")
.write()
.unwrap();
let commit3 = create_random_commit(mut_repo, &settings)
.set_parents(vec![commit2.id().clone()])
.set_description("commit 3")
.write()
.unwrap();
// Not intersected with a set node
assert_eq!(
resolve_commit_ids(mut_repo, "~description(1)"),
vec![
commit3.id().clone(),
commit2.id().clone(),
root_commit_id.clone(),
],
);
assert_eq!(
resolve_commit_ids(mut_repo, "description(1) | description(2)"),
vec![commit2.id().clone(), commit1.id().clone()],
);
assert_eq!(
resolve_commit_ids(
mut_repo,
"description(commit) ~ (description(2) | description(3))",
),
vec![commit1.id().clone()],
);
// Intersected with a set node
assert_eq!(
resolve_commit_ids(mut_repo, "root.. & ~description(1)"),
vec![commit3.id().clone(), commit2.id().clone()],
);
assert_eq!(
resolve_commit_ids(
mut_repo,
&format!(
"{}.. & (description(1) | description(2))",
commit1.id().hex(),
)
),
vec![commit2.id().clone()],
);
}
#[test_case(false ; "local backend")]
#[test_case(true ; "git backend")]
fn test_evaluate_expression_file(use_git: bool) {

View file

@ -25,6 +25,9 @@ committer(gitster)
# Intersection and union of large subsets
author(peff) & committer(gitster)
author(peff) | committer(gitster)
# Intersection of filter with a small subset
:v1.0.0 & (author(peff) & committer(gitster))
:v1.0.0 & (author(peff) | committer(gitster))
# Roots and heads of small subsets
roots(tags())
heads(tags())
@ -37,5 +40,8 @@ heads(:v2.40.0)
# Parents and children of small subset
tags()-
tags()+
# Filter that doesn't read commit object
merges()
~merges()
# Files are unbearably slow, so only filter within small set
file(Makefile) & v1.0.0..v1.2.0