revset: make resolve() return different type describing evaluation plan

New ResolvedExpression enum ensures that the evaluation engine doesn't have
to know the symbol resolution details. In this commit, I've moved Filter
and NotIn resolution to resolve_visibility(). Implicit All/VisibleHeads
resolution will be migrated later.

It's tempting to combine resolve_symbols() and resolve_visibility() to get
rid of panic!()s, but the resolution might have to be two passes to first
resolve&collect explicit commit ids, and then substitute "all()" with
"(:visible_heads())|commit_id|..". It's also possible to apply some tree
transformation after symbol resolution.
This commit is contained in:
Yuya Nishihara 2023-04-04 19:22:53 +09:00
parent 6c2525cb93
commit 0fcc13a6f4
5 changed files with 262 additions and 90 deletions

View file

@ -44,7 +44,7 @@ use crate::index::{
use crate::nightly_shims::BTreeSetExt;
use crate::op_store::OperationId;
use crate::operation::Operation;
use crate::revset::{Revset, RevsetEvaluationError, RevsetExpression};
use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError};
use crate::store::Store;
use crate::{backend, dag_walk, default_revset_engine};
@ -729,7 +729,7 @@ impl Index for MutableIndexImpl {
fn evaluate_revset<'index>(
&'index self,
expression: &RevsetExpression,
expression: &ResolvedExpression,
store: &Arc<Store>,
visible_heads: &[CommitId],
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetEvaluationError> {
@ -1817,7 +1817,7 @@ impl Index for ReadonlyIndexImpl {
fn evaluate_revset<'index>(
&'index self,
expression: &RevsetExpression,
expression: &ResolvedExpression,
store: &Arc<Store>,
visible_heads: &[CommitId],
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetEvaluationError> {

View file

@ -29,8 +29,8 @@ use crate::index::{HexPrefix, Index, PrefixResolution};
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit};
use crate::repo_path::RepoPath;
use crate::revset::{
ChangeIdIndex, Revset, RevsetEvaluationError, RevsetExpression, RevsetFilterPredicate,
RevsetGraphEdge, GENERATION_RANGE_FULL,
ChangeIdIndex, ResolvedExpression, ResolvedPredicateExpression, Revset, RevsetEvaluationError,
RevsetFilterPredicate, RevsetGraphEdge, GENERATION_RANGE_FULL,
};
use crate::store::Store;
use crate::{backend, rewrite};
@ -545,7 +545,7 @@ impl<'index, I1: Iterator<Item = IndexEntry<'index>>, I2: Iterator<Item = IndexE
// TODO: Having to pass both `&dyn Index` and `CompositeIndex` is a bit ugly.
// Maybe we should make `CompositeIndex` implement `Index`?
pub fn evaluate<'index>(
expression: &RevsetExpression,
expression: &ResolvedExpression,
store: &Arc<Store>,
index: &'index dyn Index,
composite_index: CompositeIndex<'index>,
@ -571,14 +571,11 @@ struct EvaluationContext<'index, 'heads> {
impl<'index, 'heads> EvaluationContext<'index, 'heads> {
fn evaluate(
&self,
expression: &RevsetExpression,
expression: &ResolvedExpression,
) -> Result<Box<dyn InternalRevset<'index> + 'index>, RevsetEvaluationError> {
match expression {
RevsetExpression::CommitRef(_) | RevsetExpression::Present(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller");
}
RevsetExpression::None => Ok(Box::new(EagerRevset::empty())),
RevsetExpression::All => {
ResolvedExpression::None => Ok(Box::new(EagerRevset::empty())),
ResolvedExpression::All => {
// Since `all()` does not include hidden commits, some of the logical
// transformation rules may subtly change the evaluated set. For example,
// `all() & x` is not `x` if `x` is hidden. This wouldn't matter in practice,
@ -589,10 +586,10 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
let walk = self.composite_index.walk_revs(self.visible_heads, &[]);
Ok(Box::new(RevWalkRevset { walk }))
}
RevsetExpression::Commits(commit_ids) => {
ResolvedExpression::Commits(commit_ids) => {
Ok(Box::new(self.revset_for_commit_ids(commit_ids)))
}
RevsetExpression::Children(roots) => {
ResolvedExpression::Children(roots) => {
let root_set = self.evaluate(roots)?;
let head_set = self.revset_for_commit_ids(self.visible_heads);
let (walk, root_positions) = self.walk_ancestors_until_roots(&*root_set, &head_set);
@ -610,7 +607,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
predicate,
}))
}
RevsetExpression::Ancestors { heads, generation } => {
ResolvedExpression::Ancestors { heads, generation } => {
let head_set = self.evaluate(heads)?;
let walk = self.walk_ancestors(&*head_set);
if generation == &GENERATION_RANGE_FULL {
@ -620,7 +617,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
Ok(Box::new(RevWalkRevset { walk }))
}
}
RevsetExpression::Range {
ResolvedExpression::Range {
roots,
heads,
generation,
@ -637,16 +634,16 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
Ok(Box::new(RevWalkRevset { walk }))
}
}
RevsetExpression::DagRange { roots, heads } => {
ResolvedExpression::DagRange { roots, heads } => {
let root_set = self.evaluate(roots)?;
let head_set = self.evaluate(heads)?;
let (dag_range_set, _) = self.collect_dag_range(&*root_set, &*head_set);
Ok(Box::new(dag_range_set))
}
RevsetExpression::VisibleHeads => {
ResolvedExpression::VisibleHeads => {
Ok(Box::new(self.revset_for_commit_ids(self.visible_heads)))
}
RevsetExpression::Heads(candidates) => {
ResolvedExpression::Heads(candidates) => {
let candidate_set = self.evaluate(candidates)?;
let candidate_ids = candidate_set
.iter()
@ -656,7 +653,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
&self.composite_index.heads(&mut candidate_ids.iter()),
)))
}
RevsetExpression::Roots(candidates) => {
ResolvedExpression::Roots(candidates) => {
let candidate_set = EagerRevset {
index_entries: self.evaluate(candidates)?.iter().collect(),
};
@ -673,48 +670,30 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
}
Ok(Box::new(EagerRevset { index_entries }))
}
RevsetExpression::Latest { candidates, count } => {
ResolvedExpression::Latest { candidates, count } => {
let candidate_set = self.evaluate(candidates)?;
Ok(Box::new(
self.take_latest_revset(candidate_set.as_ref(), *count),
))
}
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
// Top-level filter without intersection: e.g. "~author(_)" is represented as
// `AsFilter(NotIn(Filter(Author(_))))`.
Ok(Box::new(FilterRevset {
candidates: self.evaluate(&RevsetExpression::All)?,
predicate: self.evaluate_predicate(expression)?,
}))
}
RevsetExpression::NotIn(complement) => {
let set1 = self.evaluate(&RevsetExpression::All)?;
let set2 = self.evaluate(complement)?;
Ok(Box::new(DifferenceRevset { set1, set2 }))
}
RevsetExpression::Union(expression1, expression2) => {
ResolvedExpression::Union(expression1, expression2) => {
let set1 = self.evaluate(expression1)?;
let set2 = self.evaluate(expression2)?;
Ok(Box::new(UnionRevset { set1, set2 }))
}
RevsetExpression::Intersection(expression1, expression2) => {
match expression2.as_ref() {
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
Ok(Box::new(FilterRevset {
candidates: self.evaluate(expression1)?,
predicate: self.evaluate_predicate(expression2)?,
}))
}
_ => {
// TODO: 'set2' can be turned into a predicate, and use FilterRevset
// if a predicate function can terminate the 'set1' iterator early.
ResolvedExpression::FilterWithin {
candidates,
predicate,
} => Ok(Box::new(FilterRevset {
candidates: self.evaluate(candidates)?,
predicate: self.evaluate_predicate(predicate)?,
})),
ResolvedExpression::Intersection(expression1, expression2) => {
let set1 = self.evaluate(expression1)?;
let set2 = self.evaluate(expression2)?;
Ok(Box::new(IntersectionRevset { set1, set2 }))
}
}
}
RevsetExpression::Difference(expression1, expression2) => {
ResolvedExpression::Difference(expression1, expression2) => {
let set1 = self.evaluate(expression1)?;
let set2 = self.evaluate(expression2)?;
Ok(Box::new(DifferenceRevset { set1, set2 }))
@ -722,50 +701,28 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> {
}
}
/// Evaluates expression tree as filter predicate.
///
/// For filter expression, this never inserts a hidden `all()` since a
/// filter predicate doesn't need to produce revisions to walk.
fn evaluate_predicate(
&self,
expression: &RevsetExpression,
expression: &ResolvedPredicateExpression,
) -> Result<Box<dyn ToPredicateFn + 'index>, RevsetEvaluationError> {
match expression {
RevsetExpression::None
| RevsetExpression::All
| RevsetExpression::Commits(_)
| RevsetExpression::CommitRef(_)
| RevsetExpression::Children(_)
| RevsetExpression::Ancestors { .. }
| RevsetExpression::Range { .. }
| RevsetExpression::DagRange { .. }
| RevsetExpression::Heads(_)
| RevsetExpression::Roots(_)
| RevsetExpression::VisibleHeads
| RevsetExpression::Latest { .. } => Ok(self.evaluate(expression)?.into_predicate()),
RevsetExpression::Filter(predicate) => Ok(build_predicate_fn(
ResolvedPredicateExpression::Filter(predicate) => Ok(build_predicate_fn(
self.store.clone(),
self.index,
predicate,
)),
RevsetExpression::AsFilter(candidates) => self.evaluate_predicate(candidates),
RevsetExpression::Present(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller")
ResolvedPredicateExpression::Set(expression) => {
Ok(self.evaluate(expression)?.into_predicate())
}
RevsetExpression::NotIn(complement) => {
ResolvedPredicateExpression::NotIn(complement) => {
let set = self.evaluate_predicate(complement)?;
Ok(Box::new(NotInPredicate(set)))
}
RevsetExpression::Union(expression1, expression2) => {
ResolvedPredicateExpression::Union(expression1, expression2) => {
let set1 = self.evaluate_predicate(expression1)?;
let set2 = self.evaluate_predicate(expression2)?;
Ok(Box::new(UnionPredicate { set1, set2 }))
}
// Intersection of filters should have been substituted by revset::optimize().
// If it weren't, just fall back to the set evaluation path.
RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => {
Ok(self.evaluate(expression)?.into_predicate())
}
}
}

View file

@ -23,7 +23,7 @@ use crate::commit::Commit;
use crate::default_index_store::{IndexEntry, RevWalk};
use crate::op_store::OperationId;
use crate::operation::Operation;
use crate::revset::{Revset, RevsetEvaluationError, RevsetExpression};
use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError};
use crate::store::Store;
#[derive(Debug, Error)]
@ -70,7 +70,7 @@ pub trait Index: Send + Sync {
fn evaluate_revset<'index>(
&'index self,
expression: &RevsetExpression,
expression: &ResolvedExpression,
store: &Arc<Store>,
visible_heads: &[CommitId],
) -> Result<Box<dyn Revset<'index> + 'index>, RevsetEvaluationError>;

View file

@ -424,18 +424,80 @@ impl RevsetExpression {
pub fn resolve(
self: Rc<Self>,
repo: &dyn Repo,
) -> Result<Rc<RevsetExpression>, RevsetResolutionError> {
resolve_symbols(repo, self, None)
) -> Result<ResolvedExpression, RevsetResolutionError> {
resolve_symbols(repo, self, None).map(|expression| resolve_visibility(repo, &expression))
}
pub fn resolve_in_workspace(
self: Rc<Self>,
repo: &dyn Repo,
workspace_ctx: &RevsetWorkspaceContext,
) -> Result<Rc<RevsetExpression>, RevsetResolutionError> {
) -> Result<ResolvedExpression, RevsetResolutionError> {
resolve_symbols(repo, self, Some(workspace_ctx))
.map(|expression| resolve_visibility(repo, &expression))
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ResolvedPredicateExpression {
/// Pure filter predicate.
Filter(RevsetFilterPredicate),
/// Set expression to be evaluated as filter. This is typically a subtree
/// node of `Union` with a pure filter predicate.
Set(Box<ResolvedExpression>),
NotIn(Box<ResolvedPredicateExpression>),
Union(
Box<ResolvedPredicateExpression>,
Box<ResolvedPredicateExpression>,
),
}
/// Describes evaluation plan of revset expression.
///
/// Unlike `RevsetExpression`, this doesn't contain unresolved symbols or `View`
/// properties.
///
/// Use `RevsetExpression` API to build a query programmatically.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ResolvedExpression {
None,
All, // TODO: should be substituted at resolve_visibility()
Commits(Vec<CommitId>),
Children(Box<ResolvedExpression>), // TODO: add heads: VisibleHeads
Ancestors {
heads: Box<ResolvedExpression>,
generation: Range<u32>,
},
/// Commits that are ancestors of `heads` but not ancestors of `roots`.
Range {
roots: Box<ResolvedExpression>,
heads: Box<ResolvedExpression>,
generation: Range<u32>,
},
/// Commits that are descendants of `roots` and ancestors of `heads`.
DagRange {
roots: Box<ResolvedExpression>,
heads: Box<ResolvedExpression>,
},
Heads(Box<ResolvedExpression>),
Roots(Box<ResolvedExpression>),
VisibleHeads, // TODO: should be substituted at resolve_visibility()
Latest {
candidates: Box<ResolvedExpression>,
count: usize,
},
Union(Box<ResolvedExpression>, Box<ResolvedExpression>),
/// Intersects `candidates` with `predicate` by filtering.
FilterWithin {
candidates: Box<ResolvedExpression>,
predicate: ResolvedPredicateExpression,
},
/// Intersects expressions by merging.
Intersection(Box<ResolvedExpression>, Box<ResolvedExpression>),
Difference(Box<ResolvedExpression>, Box<ResolvedExpression>),
}
impl ResolvedExpression {
pub fn evaluate<'index>(
&self,
repo: &'index dyn Repo,
@ -1699,9 +1761,6 @@ fn resolve_commit_ref(
}
}
// TODO: Maybe return a new type (RevsetParameters?) instead of
// RevsetExpression. Then pass that to evaluate(), so it's clear which variants
// are allowed.
fn resolve_symbols(
repo: &dyn Repo,
expression: Rc<RevsetExpression>,
@ -1734,6 +1793,162 @@ fn resolve_symbols(
.unwrap_or(expression))
}
/// Inserts implicit `all()` and `visible_heads()` nodes to the `expression`.
///
/// Symbols and commit refs in the `expression` should have been resolved.
///
/// This is a separate step because a symbol-resolved `expression` could be
/// transformed further to e.g. combine OR-ed `Commits(_)`, or to collect
/// commit ids to make `all()` include hidden-but-specified commits. The
/// return type `ResolvedExpression` is stricter than `RevsetExpression`,
/// and isn't designed for such transformation.
fn resolve_visibility(_repo: &dyn Repo, expression: &RevsetExpression) -> ResolvedExpression {
let context = VisibilityResolutionContext {};
context.resolve(expression)
}
#[derive(Clone, Debug)]
struct VisibilityResolutionContext {
// TODO: visible_heads
}
impl VisibilityResolutionContext {
/// Resolves expression tree as set.
fn resolve(&self, expression: &RevsetExpression) -> ResolvedExpression {
match expression {
RevsetExpression::None => ResolvedExpression::None,
RevsetExpression::All => self.resolve_all(),
RevsetExpression::Commits(commit_ids) => {
ResolvedExpression::Commits(commit_ids.clone())
}
RevsetExpression::CommitRef(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller");
}
RevsetExpression::Children(roots) => {
ResolvedExpression::Children(self.resolve(roots).into())
}
RevsetExpression::Ancestors { heads, generation } => ResolvedExpression::Ancestors {
heads: self.resolve(heads).into(),
generation: generation.clone(),
},
RevsetExpression::Range {
roots,
heads,
generation,
} => ResolvedExpression::Range {
roots: self.resolve(roots).into(),
heads: self.resolve(heads).into(),
generation: generation.clone(),
},
RevsetExpression::DagRange { roots, heads } => ResolvedExpression::DagRange {
roots: self.resolve(roots).into(),
heads: self.resolve(heads).into(),
},
RevsetExpression::Heads(candidates) => {
ResolvedExpression::Heads(self.resolve(candidates).into())
}
RevsetExpression::Roots(candidates) => {
ResolvedExpression::Roots(self.resolve(candidates).into())
}
RevsetExpression::VisibleHeads => self.resolve_visible_heads(),
RevsetExpression::Latest { candidates, count } => ResolvedExpression::Latest {
candidates: self.resolve(candidates).into(),
count: *count,
},
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
// Top-level filter without intersection: e.g. "~author(_)" is represented as
// `AsFilter(NotIn(Filter(Author(_))))`.
ResolvedExpression::FilterWithin {
candidates: self.resolve_all().into(),
predicate: self.resolve_predicate(expression),
}
}
RevsetExpression::Present(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller");
}
RevsetExpression::NotIn(complement) => ResolvedExpression::Difference(
self.resolve_all().into(),
self.resolve(complement).into(),
),
RevsetExpression::Union(expression1, expression2) => ResolvedExpression::Union(
self.resolve(expression1).into(),
self.resolve(expression2).into(),
),
RevsetExpression::Intersection(expression1, expression2) => {
match expression2.as_ref() {
RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => {
ResolvedExpression::FilterWithin {
candidates: self.resolve(expression1).into(),
predicate: self.resolve_predicate(expression2),
}
}
_ => ResolvedExpression::Intersection(
self.resolve(expression1).into(),
self.resolve(expression2).into(),
),
}
}
RevsetExpression::Difference(expression1, expression2) => {
ResolvedExpression::Difference(
self.resolve(expression1).into(),
self.resolve(expression2).into(),
)
}
}
}
fn resolve_all(&self) -> ResolvedExpression {
ResolvedExpression::All
}
fn resolve_visible_heads(&self) -> ResolvedExpression {
ResolvedExpression::VisibleHeads
}
/// Resolves expression tree as filter predicate.
///
/// For filter expression, this never inserts a hidden `all()` since a
/// filter predicate doesn't need to produce revisions to walk.
fn resolve_predicate(&self, expression: &RevsetExpression) -> ResolvedPredicateExpression {
match expression {
RevsetExpression::None
| RevsetExpression::All
| RevsetExpression::Commits(_)
| RevsetExpression::CommitRef(_)
| RevsetExpression::Children(_)
| RevsetExpression::Ancestors { .. }
| RevsetExpression::Range { .. }
| RevsetExpression::DagRange { .. }
| RevsetExpression::Heads(_)
| RevsetExpression::Roots(_)
| RevsetExpression::VisibleHeads
| RevsetExpression::Latest { .. } => {
ResolvedPredicateExpression::Set(self.resolve(expression).into())
}
RevsetExpression::Filter(predicate) => {
ResolvedPredicateExpression::Filter(predicate.clone())
}
RevsetExpression::AsFilter(candidates) => self.resolve_predicate(candidates),
RevsetExpression::Present(_) => {
panic!("Expression '{expression:?}' should have been resolved by caller")
}
RevsetExpression::NotIn(complement) => {
ResolvedPredicateExpression::NotIn(self.resolve_predicate(complement).into())
}
RevsetExpression::Union(expression1, expression2) => {
let predicate1 = self.resolve_predicate(expression1);
let predicate2 = self.resolve_predicate(expression2);
ResolvedPredicateExpression::Union(predicate1.into(), predicate2.into())
}
// Intersection of filters should have been substituted by optimize().
// If it weren't, just fall back to the set evaluation path.
RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => {
ResolvedPredicateExpression::Set(self.resolve(expression).into())
}
}
}
}
pub trait Revset<'index>: fmt::Debug {
/// Iterate in topological order with children before parents.
fn iter(&self) -> Box<dyn Iterator<Item = CommitId> + '_>;

View file

@ -17,7 +17,7 @@ use jujutsu_lib::commit::Commit;
use jujutsu_lib::default_index_store::ReadonlyIndexImpl;
use jujutsu_lib::default_revset_engine::{evaluate, RevsetImpl};
use jujutsu_lib::repo::Repo;
use jujutsu_lib::revset::{RevsetExpression, RevsetGraphEdge};
use jujutsu_lib::revset::{ResolvedExpression, RevsetGraphEdge};
use test_case::test_case;
use testutils::{CommitGraphBuilder, TestRepo};
@ -28,7 +28,7 @@ fn revset_for_commits<'index>(repo: &'index dyn Repo, commits: &[&Commit]) -> Re
.downcast_ref::<ReadonlyIndexImpl>()
.unwrap();
let expression =
RevsetExpression::commits(commits.iter().map(|commit| commit.id().clone()).collect());
ResolvedExpression::Commits(commits.iter().map(|commit| commit.id().clone()).collect());
evaluate(
&expression,
repo.store(),