From 0fcc13a6f4e83d47eb1f2a3fba9c8b3d380b3324 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Tue, 4 Apr 2023 19:22:53 +0900 Subject: [PATCH] revset: make resolve() return different type describing evaluation plan New ResolvedExpression enum ensures that the evaluation engine doesn't have to know the symbol resolution details. In this commit, I've moved Filter and NotIn resolution to resolve_visibility(). Implicit All/VisibleHeads resolution will be migrated later. It's tempting to combine resolve_symbols() and resolve_visibility() to get rid of panic!()s, but the resolution might have to be two passes to first resolve&collect explicit commit ids, and then substitute "all()" with "(:visible_heads())|commit_id|..". It's also possible to apply some tree transformation after symbol resolution. --- lib/src/default_index_store.rs | 6 +- lib/src/default_revset_engine.rs | 111 +++------ lib/src/index.rs | 4 +- lib/src/revset.rs | 227 +++++++++++++++++- .../test_default_revset_graph_iterator.rs | 4 +- 5 files changed, 262 insertions(+), 90 deletions(-) diff --git a/lib/src/default_index_store.rs b/lib/src/default_index_store.rs index 883843f8a..4fb59200a 100644 --- a/lib/src/default_index_store.rs +++ b/lib/src/default_index_store.rs @@ -44,7 +44,7 @@ use crate::index::{ use crate::nightly_shims::BTreeSetExt; use crate::op_store::OperationId; use crate::operation::Operation; -use crate::revset::{Revset, RevsetEvaluationError, RevsetExpression}; +use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; use crate::store::Store; use crate::{backend, dag_walk, default_revset_engine}; @@ -729,7 +729,7 @@ impl Index for MutableIndexImpl { fn evaluate_revset<'index>( &'index self, - expression: &RevsetExpression, + expression: &ResolvedExpression, store: &Arc, visible_heads: &[CommitId], ) -> Result + 'index>, RevsetEvaluationError> { @@ -1817,7 +1817,7 @@ impl Index for ReadonlyIndexImpl { fn evaluate_revset<'index>( &'index self, - expression: &RevsetExpression, + expression: &ResolvedExpression, store: &Arc, visible_heads: &[CommitId], ) -> Result + 'index>, RevsetEvaluationError> { diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 030fe979a..797b1c34a 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -29,8 +29,8 @@ use crate::index::{HexPrefix, Index, PrefixResolution}; use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit}; use crate::repo_path::RepoPath; use crate::revset::{ - ChangeIdIndex, Revset, RevsetEvaluationError, RevsetExpression, RevsetFilterPredicate, - RevsetGraphEdge, GENERATION_RANGE_FULL, + ChangeIdIndex, ResolvedExpression, ResolvedPredicateExpression, Revset, RevsetEvaluationError, + RevsetFilterPredicate, RevsetGraphEdge, GENERATION_RANGE_FULL, }; use crate::store::Store; use crate::{backend, rewrite}; @@ -545,7 +545,7 @@ impl<'index, I1: Iterator>, I2: Iterator( - expression: &RevsetExpression, + expression: &ResolvedExpression, store: &Arc, index: &'index dyn Index, composite_index: CompositeIndex<'index>, @@ -571,14 +571,11 @@ struct EvaluationContext<'index, 'heads> { impl<'index, 'heads> EvaluationContext<'index, 'heads> { fn evaluate( &self, - expression: &RevsetExpression, + expression: &ResolvedExpression, ) -> Result + 'index>, RevsetEvaluationError> { match expression { - RevsetExpression::CommitRef(_) | RevsetExpression::Present(_) => { - panic!("Expression '{expression:?}' should have been resolved by caller"); - } - RevsetExpression::None => Ok(Box::new(EagerRevset::empty())), - RevsetExpression::All => { + ResolvedExpression::None => Ok(Box::new(EagerRevset::empty())), + ResolvedExpression::All => { // Since `all()` does not include hidden commits, some of the logical // transformation rules may subtly change the evaluated set. For example, // `all() & x` is not `x` if `x` is hidden. This wouldn't matter in practice, @@ -589,10 +586,10 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { let walk = self.composite_index.walk_revs(self.visible_heads, &[]); Ok(Box::new(RevWalkRevset { walk })) } - RevsetExpression::Commits(commit_ids) => { + ResolvedExpression::Commits(commit_ids) => { Ok(Box::new(self.revset_for_commit_ids(commit_ids))) } - RevsetExpression::Children(roots) => { + ResolvedExpression::Children(roots) => { let root_set = self.evaluate(roots)?; let head_set = self.revset_for_commit_ids(self.visible_heads); let (walk, root_positions) = self.walk_ancestors_until_roots(&*root_set, &head_set); @@ -610,7 +607,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { predicate, })) } - RevsetExpression::Ancestors { heads, generation } => { + ResolvedExpression::Ancestors { heads, generation } => { let head_set = self.evaluate(heads)?; let walk = self.walk_ancestors(&*head_set); if generation == &GENERATION_RANGE_FULL { @@ -620,7 +617,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { Ok(Box::new(RevWalkRevset { walk })) } } - RevsetExpression::Range { + ResolvedExpression::Range { roots, heads, generation, @@ -637,16 +634,16 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { Ok(Box::new(RevWalkRevset { walk })) } } - RevsetExpression::DagRange { roots, heads } => { + ResolvedExpression::DagRange { roots, heads } => { let root_set = self.evaluate(roots)?; let head_set = self.evaluate(heads)?; let (dag_range_set, _) = self.collect_dag_range(&*root_set, &*head_set); Ok(Box::new(dag_range_set)) } - RevsetExpression::VisibleHeads => { + ResolvedExpression::VisibleHeads => { Ok(Box::new(self.revset_for_commit_ids(self.visible_heads))) } - RevsetExpression::Heads(candidates) => { + ResolvedExpression::Heads(candidates) => { let candidate_set = self.evaluate(candidates)?; let candidate_ids = candidate_set .iter() @@ -656,7 +653,7 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { &self.composite_index.heads(&mut candidate_ids.iter()), ))) } - RevsetExpression::Roots(candidates) => { + ResolvedExpression::Roots(candidates) => { let candidate_set = EagerRevset { index_entries: self.evaluate(candidates)?.iter().collect(), }; @@ -673,48 +670,30 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { } Ok(Box::new(EagerRevset { index_entries })) } - RevsetExpression::Latest { candidates, count } => { + ResolvedExpression::Latest { candidates, count } => { let candidate_set = self.evaluate(candidates)?; Ok(Box::new( self.take_latest_revset(candidate_set.as_ref(), *count), )) } - RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { - // Top-level filter without intersection: e.g. "~author(_)" is represented as - // `AsFilter(NotIn(Filter(Author(_))))`. - Ok(Box::new(FilterRevset { - candidates: self.evaluate(&RevsetExpression::All)?, - predicate: self.evaluate_predicate(expression)?, - })) - } - RevsetExpression::NotIn(complement) => { - let set1 = self.evaluate(&RevsetExpression::All)?; - let set2 = self.evaluate(complement)?; - Ok(Box::new(DifferenceRevset { set1, set2 })) - } - RevsetExpression::Union(expression1, expression2) => { + ResolvedExpression::Union(expression1, expression2) => { let set1 = self.evaluate(expression1)?; let set2 = self.evaluate(expression2)?; Ok(Box::new(UnionRevset { set1, set2 })) } - RevsetExpression::Intersection(expression1, expression2) => { - match expression2.as_ref() { - RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { - Ok(Box::new(FilterRevset { - candidates: self.evaluate(expression1)?, - predicate: self.evaluate_predicate(expression2)?, - })) - } - _ => { - // TODO: 'set2' can be turned into a predicate, and use FilterRevset - // if a predicate function can terminate the 'set1' iterator early. - let set1 = self.evaluate(expression1)?; - let set2 = self.evaluate(expression2)?; - Ok(Box::new(IntersectionRevset { set1, set2 })) - } - } + ResolvedExpression::FilterWithin { + candidates, + predicate, + } => Ok(Box::new(FilterRevset { + candidates: self.evaluate(candidates)?, + predicate: self.evaluate_predicate(predicate)?, + })), + ResolvedExpression::Intersection(expression1, expression2) => { + let set1 = self.evaluate(expression1)?; + let set2 = self.evaluate(expression2)?; + Ok(Box::new(IntersectionRevset { set1, set2 })) } - RevsetExpression::Difference(expression1, expression2) => { + ResolvedExpression::Difference(expression1, expression2) => { let set1 = self.evaluate(expression1)?; let set2 = self.evaluate(expression2)?; Ok(Box::new(DifferenceRevset { set1, set2 })) @@ -722,50 +701,28 @@ impl<'index, 'heads> EvaluationContext<'index, 'heads> { } } - /// Evaluates expression tree as filter predicate. - /// - /// For filter expression, this never inserts a hidden `all()` since a - /// filter predicate doesn't need to produce revisions to walk. fn evaluate_predicate( &self, - expression: &RevsetExpression, + expression: &ResolvedPredicateExpression, ) -> Result, RevsetEvaluationError> { match expression { - RevsetExpression::None - | RevsetExpression::All - | RevsetExpression::Commits(_) - | RevsetExpression::CommitRef(_) - | RevsetExpression::Children(_) - | RevsetExpression::Ancestors { .. } - | RevsetExpression::Range { .. } - | RevsetExpression::DagRange { .. } - | RevsetExpression::Heads(_) - | RevsetExpression::Roots(_) - | RevsetExpression::VisibleHeads - | RevsetExpression::Latest { .. } => Ok(self.evaluate(expression)?.into_predicate()), - RevsetExpression::Filter(predicate) => Ok(build_predicate_fn( + ResolvedPredicateExpression::Filter(predicate) => Ok(build_predicate_fn( self.store.clone(), self.index, predicate, )), - RevsetExpression::AsFilter(candidates) => self.evaluate_predicate(candidates), - RevsetExpression::Present(_) => { - panic!("Expression '{expression:?}' should have been resolved by caller") + ResolvedPredicateExpression::Set(expression) => { + Ok(self.evaluate(expression)?.into_predicate()) } - RevsetExpression::NotIn(complement) => { + ResolvedPredicateExpression::NotIn(complement) => { let set = self.evaluate_predicate(complement)?; Ok(Box::new(NotInPredicate(set))) } - RevsetExpression::Union(expression1, expression2) => { + ResolvedPredicateExpression::Union(expression1, expression2) => { let set1 = self.evaluate_predicate(expression1)?; let set2 = self.evaluate_predicate(expression2)?; Ok(Box::new(UnionPredicate { set1, set2 })) } - // Intersection of filters should have been substituted by revset::optimize(). - // If it weren't, just fall back to the set evaluation path. - RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => { - Ok(self.evaluate(expression)?.into_predicate()) - } } } diff --git a/lib/src/index.rs b/lib/src/index.rs index 6b1cfac50..8f83df309 100644 --- a/lib/src/index.rs +++ b/lib/src/index.rs @@ -23,7 +23,7 @@ use crate::commit::Commit; use crate::default_index_store::{IndexEntry, RevWalk}; use crate::op_store::OperationId; use crate::operation::Operation; -use crate::revset::{Revset, RevsetEvaluationError, RevsetExpression}; +use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError}; use crate::store::Store; #[derive(Debug, Error)] @@ -70,7 +70,7 @@ pub trait Index: Send + Sync { fn evaluate_revset<'index>( &'index self, - expression: &RevsetExpression, + expression: &ResolvedExpression, store: &Arc, visible_heads: &[CommitId], ) -> Result + 'index>, RevsetEvaluationError>; diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 53032c0ca..e62089302 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -424,18 +424,80 @@ impl RevsetExpression { pub fn resolve( self: Rc, repo: &dyn Repo, - ) -> Result, RevsetResolutionError> { - resolve_symbols(repo, self, None) + ) -> Result { + resolve_symbols(repo, self, None).map(|expression| resolve_visibility(repo, &expression)) } pub fn resolve_in_workspace( self: Rc, repo: &dyn Repo, workspace_ctx: &RevsetWorkspaceContext, - ) -> Result, RevsetResolutionError> { + ) -> Result { resolve_symbols(repo, self, Some(workspace_ctx)) + .map(|expression| resolve_visibility(repo, &expression)) } +} +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ResolvedPredicateExpression { + /// Pure filter predicate. + Filter(RevsetFilterPredicate), + /// Set expression to be evaluated as filter. This is typically a subtree + /// node of `Union` with a pure filter predicate. + Set(Box), + NotIn(Box), + Union( + Box, + Box, + ), +} + +/// Describes evaluation plan of revset expression. +/// +/// Unlike `RevsetExpression`, this doesn't contain unresolved symbols or `View` +/// properties. +/// +/// Use `RevsetExpression` API to build a query programmatically. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum ResolvedExpression { + None, + All, // TODO: should be substituted at resolve_visibility() + Commits(Vec), + Children(Box), // TODO: add heads: VisibleHeads + Ancestors { + heads: Box, + generation: Range, + }, + /// Commits that are ancestors of `heads` but not ancestors of `roots`. + Range { + roots: Box, + heads: Box, + generation: Range, + }, + /// Commits that are descendants of `roots` and ancestors of `heads`. + DagRange { + roots: Box, + heads: Box, + }, + Heads(Box), + Roots(Box), + VisibleHeads, // TODO: should be substituted at resolve_visibility() + Latest { + candidates: Box, + count: usize, + }, + Union(Box, Box), + /// Intersects `candidates` with `predicate` by filtering. + FilterWithin { + candidates: Box, + predicate: ResolvedPredicateExpression, + }, + /// Intersects expressions by merging. + Intersection(Box, Box), + Difference(Box, Box), +} + +impl ResolvedExpression { pub fn evaluate<'index>( &self, repo: &'index dyn Repo, @@ -1699,9 +1761,6 @@ fn resolve_commit_ref( } } -// TODO: Maybe return a new type (RevsetParameters?) instead of -// RevsetExpression. Then pass that to evaluate(), so it's clear which variants -// are allowed. fn resolve_symbols( repo: &dyn Repo, expression: Rc, @@ -1734,6 +1793,162 @@ fn resolve_symbols( .unwrap_or(expression)) } +/// Inserts implicit `all()` and `visible_heads()` nodes to the `expression`. +/// +/// Symbols and commit refs in the `expression` should have been resolved. +/// +/// This is a separate step because a symbol-resolved `expression` could be +/// transformed further to e.g. combine OR-ed `Commits(_)`, or to collect +/// commit ids to make `all()` include hidden-but-specified commits. The +/// return type `ResolvedExpression` is stricter than `RevsetExpression`, +/// and isn't designed for such transformation. +fn resolve_visibility(_repo: &dyn Repo, expression: &RevsetExpression) -> ResolvedExpression { + let context = VisibilityResolutionContext {}; + context.resolve(expression) +} + +#[derive(Clone, Debug)] +struct VisibilityResolutionContext { + // TODO: visible_heads +} + +impl VisibilityResolutionContext { + /// Resolves expression tree as set. + fn resolve(&self, expression: &RevsetExpression) -> ResolvedExpression { + match expression { + RevsetExpression::None => ResolvedExpression::None, + RevsetExpression::All => self.resolve_all(), + RevsetExpression::Commits(commit_ids) => { + ResolvedExpression::Commits(commit_ids.clone()) + } + RevsetExpression::CommitRef(_) => { + panic!("Expression '{expression:?}' should have been resolved by caller"); + } + RevsetExpression::Children(roots) => { + ResolvedExpression::Children(self.resolve(roots).into()) + } + RevsetExpression::Ancestors { heads, generation } => ResolvedExpression::Ancestors { + heads: self.resolve(heads).into(), + generation: generation.clone(), + }, + RevsetExpression::Range { + roots, + heads, + generation, + } => ResolvedExpression::Range { + roots: self.resolve(roots).into(), + heads: self.resolve(heads).into(), + generation: generation.clone(), + }, + RevsetExpression::DagRange { roots, heads } => ResolvedExpression::DagRange { + roots: self.resolve(roots).into(), + heads: self.resolve(heads).into(), + }, + RevsetExpression::Heads(candidates) => { + ResolvedExpression::Heads(self.resolve(candidates).into()) + } + RevsetExpression::Roots(candidates) => { + ResolvedExpression::Roots(self.resolve(candidates).into()) + } + RevsetExpression::VisibleHeads => self.resolve_visible_heads(), + RevsetExpression::Latest { candidates, count } => ResolvedExpression::Latest { + candidates: self.resolve(candidates).into(), + count: *count, + }, + RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { + // Top-level filter without intersection: e.g. "~author(_)" is represented as + // `AsFilter(NotIn(Filter(Author(_))))`. + ResolvedExpression::FilterWithin { + candidates: self.resolve_all().into(), + predicate: self.resolve_predicate(expression), + } + } + RevsetExpression::Present(_) => { + panic!("Expression '{expression:?}' should have been resolved by caller"); + } + RevsetExpression::NotIn(complement) => ResolvedExpression::Difference( + self.resolve_all().into(), + self.resolve(complement).into(), + ), + RevsetExpression::Union(expression1, expression2) => ResolvedExpression::Union( + self.resolve(expression1).into(), + self.resolve(expression2).into(), + ), + RevsetExpression::Intersection(expression1, expression2) => { + match expression2.as_ref() { + RevsetExpression::Filter(_) | RevsetExpression::AsFilter(_) => { + ResolvedExpression::FilterWithin { + candidates: self.resolve(expression1).into(), + predicate: self.resolve_predicate(expression2), + } + } + _ => ResolvedExpression::Intersection( + self.resolve(expression1).into(), + self.resolve(expression2).into(), + ), + } + } + RevsetExpression::Difference(expression1, expression2) => { + ResolvedExpression::Difference( + self.resolve(expression1).into(), + self.resolve(expression2).into(), + ) + } + } + } + + fn resolve_all(&self) -> ResolvedExpression { + ResolvedExpression::All + } + + fn resolve_visible_heads(&self) -> ResolvedExpression { + ResolvedExpression::VisibleHeads + } + + /// Resolves expression tree as filter predicate. + /// + /// For filter expression, this never inserts a hidden `all()` since a + /// filter predicate doesn't need to produce revisions to walk. + fn resolve_predicate(&self, expression: &RevsetExpression) -> ResolvedPredicateExpression { + match expression { + RevsetExpression::None + | RevsetExpression::All + | RevsetExpression::Commits(_) + | RevsetExpression::CommitRef(_) + | RevsetExpression::Children(_) + | RevsetExpression::Ancestors { .. } + | RevsetExpression::Range { .. } + | RevsetExpression::DagRange { .. } + | RevsetExpression::Heads(_) + | RevsetExpression::Roots(_) + | RevsetExpression::VisibleHeads + | RevsetExpression::Latest { .. } => { + ResolvedPredicateExpression::Set(self.resolve(expression).into()) + } + RevsetExpression::Filter(predicate) => { + ResolvedPredicateExpression::Filter(predicate.clone()) + } + RevsetExpression::AsFilter(candidates) => self.resolve_predicate(candidates), + RevsetExpression::Present(_) => { + panic!("Expression '{expression:?}' should have been resolved by caller") + } + RevsetExpression::NotIn(complement) => { + ResolvedPredicateExpression::NotIn(self.resolve_predicate(complement).into()) + } + RevsetExpression::Union(expression1, expression2) => { + let predicate1 = self.resolve_predicate(expression1); + let predicate2 = self.resolve_predicate(expression2); + ResolvedPredicateExpression::Union(predicate1.into(), predicate2.into()) + } + // Intersection of filters should have been substituted by optimize(). + // If it weren't, just fall back to the set evaluation path. + RevsetExpression::Intersection(..) | RevsetExpression::Difference(..) => { + ResolvedPredicateExpression::Set(self.resolve(expression).into()) + } + } + } +} + pub trait Revset<'index>: fmt::Debug { /// Iterate in topological order with children before parents. fn iter(&self) -> Box + '_>; diff --git a/lib/tests/test_default_revset_graph_iterator.rs b/lib/tests/test_default_revset_graph_iterator.rs index d3a365966..72f2c5194 100644 --- a/lib/tests/test_default_revset_graph_iterator.rs +++ b/lib/tests/test_default_revset_graph_iterator.rs @@ -17,7 +17,7 @@ use jujutsu_lib::commit::Commit; use jujutsu_lib::default_index_store::ReadonlyIndexImpl; use jujutsu_lib::default_revset_engine::{evaluate, RevsetImpl}; use jujutsu_lib::repo::Repo; -use jujutsu_lib::revset::{RevsetExpression, RevsetGraphEdge}; +use jujutsu_lib::revset::{ResolvedExpression, RevsetGraphEdge}; use test_case::test_case; use testutils::{CommitGraphBuilder, TestRepo}; @@ -28,7 +28,7 @@ fn revset_for_commits<'index>(repo: &'index dyn Repo, commits: &[&Commit]) -> Re .downcast_ref::() .unwrap(); let expression = - RevsetExpression::commits(commits.iter().map(|commit| commit.id().clone()).collect()); + ResolvedExpression::Commits(commits.iter().map(|commit| commit.id().clone()).collect()); evaluate( &expression, repo.store(),