revset: resolve symbols earlier, before passing to revset engine

For large repos, it's useful to be able to use shorter change id and
commit id prefixes by resolving the prefix in a limited subset of the
repo (typically the same subset that you'd want to see in your default
log output). For very large repos, like Google's internal one, the
shortest unique prefix evaluated within the whole repo is practically
useless because it's long enough that the user would want to copy and
paste it anyway.

Mercurial supports this with its `revisions.disambiguatewithin` config
(added in https://www.mercurial-scm.org/repo/hg/rev/503f936489dd). I'd
like to add the same feature to jj. Mercurial's implementation works
by attempting to resolve the prefix in the whole repo and then, if the
prefix was ambiguous, it resolves it in the configured subset
instead. The advantage of doing it that way is that there's no extra
cost of resolving the revset defining the subset if the prefix was not
ambiguous within the whole repo. However, there are two important
reasons to do it differently in jj:

* We support very large repos using custom backends, and it's probably
  cheaper to resolve a prefix within the subset because it can all be
  cached on the client. Resolving the prefix within the whole repo
  requires a roundtrip to the server.

* We want to be able to resolve change id prefixes, which is always
  done in *some* revset. That revset is currently `all()`, i.e. all
  visible commits. Even on local disk, it's probably cheaper to
  resolve a small revset first and then resolve the prefix within that
  than it is to build up the index of all visible change ids.

We could achieve the goal by letting each revset engine respect the
configured subset, but since the solution proposed above makes sense
also for local-disk repos, I think it's better to do it outside of the
revset engine, so all revset engines can share the code.

This commit prepares for the new functionality by moving the symbol
resolution out of `Index::evaluate_revset()`.
This commit is contained in:
Martin von Zweigbergk 2023-03-16 10:03:47 -07:00 committed by Martin von Zweigbergk
parent ac23395ea1
commit 94aec90bee
4 changed files with 45 additions and 12 deletions

View file

@ -536,6 +536,8 @@ pub fn evaluate<'index>(
Ok(Box::new(revset_impl))
}
// TODO: delete unused workspace_ctx argument
#[allow(clippy::only_used_in_recursion)]
fn evaluate_impl<'index>(
repo: &'index dyn Repo,
expression: &RevsetExpression,
@ -559,8 +561,7 @@ fn evaluate_impl<'index>(
}
RevsetExpression::Commits(commit_ids) => Ok(revset_for_commit_ids(repo, commit_ids)),
RevsetExpression::Symbol(symbol) => {
let commit_ids = resolve_symbol(repo, symbol, workspace_ctx.map(|c| c.workspace_id))?;
evaluate_impl(repo, &RevsetExpression::Commits(commit_ids), workspace_ctx)
panic!("Symbol '{}' should have been resolved by caller", symbol);
}
RevsetExpression::Children(roots) => {
let root_set = evaluate_impl(repo, roots, workspace_ctx)?;

View file

@ -30,6 +30,7 @@ use thiserror::Error;
use crate::backend::{BackendError, BackendResult, CommitId};
use crate::commit::Commit;
use crate::default_index_store::{IndexEntry, IndexPosition};
use crate::default_revset_engine::resolve_symbol;
use crate::op_store::WorkspaceId;
use crate::repo::Repo;
use crate::repo_path::{FsPathParseError, RepoPath};
@ -1112,9 +1113,14 @@ fn try_transform_expression_bottom_up(
RevsetExpression::AsFilter(candidates) => {
transform_rec(candidates, f)?.map(RevsetExpression::AsFilter)
}
RevsetExpression::Present(candidates) => {
transform_rec(candidates, f)?.map(RevsetExpression::Present)
}
RevsetExpression::Present(candidates) => match transform_rec(candidates, f) {
Ok(None) => None,
Ok(Some(expression)) => Some(RevsetExpression::Present(expression)),
Err(RevsetError::NoSuchRevision(_)) => Some(RevsetExpression::None),
r @ Err(RevsetError::AmbiguousIdPrefix(_) | RevsetError::StoreError(_)) => {
return r;
}
},
RevsetExpression::NotIn(complement) => {
transform_rec(complement, f)?.map(RevsetExpression::NotIn)
}
@ -1385,6 +1391,29 @@ pub fn optimize(expression: Rc<RevsetExpression>) -> Rc<RevsetExpression> {
fold_difference(&expression).unwrap_or(expression)
}
// TODO: Maybe return a new type (RevsetParameters?) instead of
// RevsetExpression. Then pass that to evaluate(), so it's clear which variants
// are allowed.
pub fn resolve_symbols(
repo: &dyn Repo,
expression: Rc<RevsetExpression>,
workspace_ctx: Option<&RevsetWorkspaceContext>,
) -> Result<Rc<RevsetExpression>, RevsetError> {
Ok(
try_transform_expression_bottom_up(&expression, |expression| {
Ok(match expression.as_ref() {
RevsetExpression::Symbol(symbol) => {
let commit_ids =
resolve_symbol(repo, symbol, workspace_ctx.map(|ctx| ctx.workspace_id))?;
Some(RevsetExpression::commits(commit_ids))
}
_ => None,
})
})?
.unwrap_or(expression),
)
}
pub trait Revset<'index> {
// All revsets currently iterate in order of descending index position
fn iter(&self) -> Box<dyn Iterator<Item = IndexEntry<'index>> + '_>;

View file

@ -23,8 +23,9 @@ use jujutsu_lib::op_store::{RefTarget, WorkspaceId};
use jujutsu_lib::repo::Repo;
use jujutsu_lib::repo_path::RepoPath;
use jujutsu_lib::revset::{
optimize, parse, ReverseRevsetGraphIterator, RevsetAliasesMap, RevsetError, RevsetExpression,
RevsetFilterPredicate, RevsetGraphEdge, RevsetIteratorExt, RevsetWorkspaceContext,
optimize, parse, resolve_symbols, ReverseRevsetGraphIterator, RevsetAliasesMap, RevsetError,
RevsetExpression, RevsetFilterPredicate, RevsetGraphEdge, RevsetIteratorExt,
RevsetWorkspaceContext,
};
use jujutsu_lib::settings::GitSettings;
use jujutsu_lib::workspace::Workspace;
@ -146,9 +147,7 @@ fn test_resolve_symbol_commit_id() {
// Test present() suppresses only NoSuchRevision error
assert_eq!(resolve_commit_ids(&repo, "present(foo)"), []);
assert_matches!(
optimize(parse("present(04)", &RevsetAliasesMap::new(), None).unwrap())
.evaluate(&repo, None)
.map(|_| ()),
resolve_symbols(&repo, optimize(parse("present(04)", &RevsetAliasesMap::new(), None).unwrap()), None),
Err(RevsetError::AmbiguousIdPrefix(s)) if s == "04"
);
assert_eq!(
@ -478,6 +477,7 @@ fn test_resolve_symbol_git_refs() {
fn resolve_commit_ids(repo: &dyn Repo, revset_str: &str) -> Vec<CommitId> {
let expression = optimize(parse(revset_str, &RevsetAliasesMap::new(), None).unwrap());
let expression = resolve_symbols(repo, expression, None).unwrap();
expression
.evaluate(repo, None)
.unwrap()
@ -499,6 +499,7 @@ fn resolve_commit_ids_in_workspace(
};
let expression =
optimize(parse(revset_str, &RevsetAliasesMap::new(), Some(&workspace_ctx)).unwrap());
let expression = resolve_symbols(repo, expression, Some(&workspace_ctx)).unwrap();
expression
.evaluate(repo, Some(&workspace_ctx))
.unwrap()

View file

@ -44,8 +44,8 @@ use jujutsu_lib::repo::{
};
use jujutsu_lib::repo_path::{FsPathParseError, RepoPath};
use jujutsu_lib::revset::{
Revset, RevsetAliasesMap, RevsetError, RevsetExpression, RevsetIteratorExt, RevsetParseError,
RevsetWorkspaceContext,
resolve_symbols, Revset, RevsetAliasesMap, RevsetError, RevsetExpression, RevsetIteratorExt,
RevsetParseError, RevsetWorkspaceContext,
};
use jujutsu_lib::settings::UserSettings;
use jujutsu_lib::transaction::Transaction;
@ -827,6 +827,8 @@ impl WorkspaceCommandHelper {
&'repo self,
revset_expression: Rc<RevsetExpression>,
) -> Result<Box<dyn Revset<'repo> + 'repo>, RevsetError> {
let revset_expression =
resolve_symbols(&self.repo, revset_expression, Some(&self.revset_context()))?;
revset_expression.evaluate(&self.repo, Some(&self.revset_context()))
}