fileset: add basic pattern parsing functions

Naming convention is described in FilePattern::from_str_kind(). It's based
on Mercurial's pattern prefixes, but hopefully fixes some inconsistencies.
https://github.com/martinvonz/jj/issues/2915#issuecomment-1956401114

#3239
This commit is contained in:
Yuya Nishihara 2024-04-05 21:39:59 +09:00
parent 3c1d485452
commit 850887cf09
3 changed files with 179 additions and 1 deletions

21
docs/filesets.md Normal file
View file

@ -0,0 +1,21 @@
# Filesets
<!--
TODO: implement fileset parser and add logical operators
Jujutsu supports a functional language for selecting a set of files.
Expressions in this language are called "filesets" (the idea comes from
[Mercurial](https://repo.mercurial-scm.org/hg/help/filesets)). The language
consists of symbols, operators, and functions.
-->
## File patterns
The following patterns are supported:
* `"path"`, `path` (the quotes are optional), or `cwd:"path"`: Matches
cwd-relative path prefix (file or files under directory recursively.)
* `cwd-file:"path"` or `file:"path"`: Matches cwd-relative file (or exact) path.
* `root:"path"`: Matches workspace-relative path prefix (file or files under
directory recursively.)
* `root-file:"path"`: Matches workspace-relative file (or exact) path.

View file

@ -14,13 +14,30 @@
//! Functional language for selecting a set of paths. //! Functional language for selecting a set of paths.
use std::path::Path;
use std::slice; use std::slice;
use thiserror::Error;
use crate::matchers::{ use crate::matchers::{
DifferenceMatcher, EverythingMatcher, FilesMatcher, IntersectionMatcher, Matcher, DifferenceMatcher, EverythingMatcher, FilesMatcher, IntersectionMatcher, Matcher,
NothingMatcher, PrefixMatcher, UnionMatcher, NothingMatcher, PrefixMatcher, UnionMatcher,
}; };
use crate::repo_path::RepoPathBuf; use crate::repo_path::{FsPathParseError, RelativePathParseError, RepoPathBuf};
/// Error occurred during file pattern parsing.
#[derive(Debug, Error)]
pub enum FilePatternParseError {
/// Unknown pattern kind is specified.
#[error(r#"Invalid file pattern kind "{0}:""#)]
InvalidKind(String),
/// Failed to parse input cwd-relative path.
#[error(transparent)]
FsPath(#[from] FsPathParseError),
/// Failed to parse input workspace-relative path.
#[error(transparent)]
RelativePath(#[from] RelativePathParseError),
}
/// Basic pattern to match `RepoPath`. /// Basic pattern to match `RepoPath`.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
@ -35,6 +52,78 @@ pub enum FilePattern {
// - NameGlob or SuffixGlob: file name with glob? // - NameGlob or SuffixGlob: file name with glob?
} }
impl FilePattern {
/// Parses the given `input` string as a file pattern.
// TODO: If we decide to parse any file argument as a fileset expression,
// this function can be removed.
pub fn parse(ctx: &FilesetParseContext, input: &str) -> Result<Self, FilePatternParseError> {
if let Some((kind, pat)) = input.split_once(':') {
Self::from_str_kind(ctx, pat, kind)
} else {
Self::cwd_prefix_path(ctx, input)
}
}
/// Parses the given `input` string as pattern of the specified `kind`.
pub fn from_str_kind(
ctx: &FilesetParseContext,
input: &str,
kind: &str,
) -> Result<Self, FilePatternParseError> {
// Naming convention:
// * path normalization
// * cwd: cwd-relative path (default)
// * root: workspace-relative path
// * where to anchor
// * file: exact file path
// * prefix: path prefix (files under directory recursively) (default)
// * files-in: files in directory non-recursively
// * name: file name component (or suffix match?)
// * substring: substring match?
// * string pattern syntax (+ case sensitivity?)
// * path: literal path (default)
// * glob
// * regex?
match kind {
"cwd" => Self::cwd_prefix_path(ctx, input),
"cwd-file" | "file" => Self::cwd_file_path(ctx, input),
"root" => Self::root_prefix_path(input),
"root-file" => Self::root_file_path(input),
_ => Err(FilePatternParseError::InvalidKind(kind.to_owned())),
}
}
/// Pattern that matches cwd-relative file (or exact) path.
pub fn cwd_file_path(
ctx: &FilesetParseContext,
input: impl AsRef<Path>,
) -> Result<Self, FilePatternParseError> {
let path = ctx.parse_cwd_path(input)?;
Ok(FilePattern::FilePath(path))
}
/// Pattern that matches cwd-relative path prefix.
pub fn cwd_prefix_path(
ctx: &FilesetParseContext,
input: impl AsRef<Path>,
) -> Result<Self, FilePatternParseError> {
let path = ctx.parse_cwd_path(input)?;
Ok(FilePattern::PrefixPath(path))
}
/// Pattern that matches workspace-relative file (or exact) path.
pub fn root_file_path(input: impl AsRef<Path>) -> Result<Self, FilePatternParseError> {
let path = RepoPathBuf::from_relative_path(input)?;
Ok(FilePattern::FilePath(path))
}
/// Pattern that matches workspace-relative path prefix.
pub fn root_prefix_path(input: impl AsRef<Path>) -> Result<Self, FilePatternParseError> {
let path = RepoPathBuf::from_relative_path(input)?;
Ok(FilePattern::PrefixPath(path))
}
}
/// AST-level representation of the fileset expression. /// AST-level representation of the fileset expression.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum FilesetExpression { pub enum FilesetExpression {
@ -178,6 +267,21 @@ fn union_all_matchers(matchers: &mut [Option<Box<dyn Matcher>>]) -> Box<dyn Matc
} }
} }
/// Environment where fileset expression is parsed.
#[derive(Clone, Debug)]
pub struct FilesetParseContext<'a> {
/// Normalized path to the current working directory.
pub cwd: &'a Path,
/// Normalized path to the workspace root.
pub workspace_root: &'a Path,
}
impl FilesetParseContext<'_> {
fn parse_cwd_path(&self, input: impl AsRef<Path>) -> Result<RepoPathBuf, FsPathParseError> {
RepoPathBuf::parse_fs_path(self.cwd, self.workspace_root, input)
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -186,6 +290,58 @@ mod tests {
RepoPathBuf::from_internal_string(value) RepoPathBuf::from_internal_string(value)
} }
#[test]
fn test_parse_file_pattern() {
let ctx = FilesetParseContext {
cwd: Path::new("/ws/cur"),
workspace_root: Path::new("/ws"),
};
// TODO: implement fileset expression parser and test it instead
let parse = |input| FilePattern::parse(&ctx, input).map(FilesetExpression::pattern);
// cwd-relative patterns
assert_eq!(
parse(".").unwrap(),
FilesetExpression::prefix_path(repo_path_buf("cur"))
);
assert_eq!(
parse("..").unwrap(),
FilesetExpression::prefix_path(RepoPathBuf::root())
);
assert!(parse("../..").is_err());
assert_eq!(
parse("foo").unwrap(),
FilesetExpression::prefix_path(repo_path_buf("cur/foo"))
);
assert_eq!(
parse("cwd:.").unwrap(),
FilesetExpression::prefix_path(repo_path_buf("cur"))
);
assert_eq!(
parse("cwd-file:foo").unwrap(),
FilesetExpression::file_path(repo_path_buf("cur/foo"))
);
assert_eq!(
parse("file:../foo/bar").unwrap(),
FilesetExpression::file_path(repo_path_buf("foo/bar"))
);
// workspace-relative patterns
assert_eq!(
parse("root:.").unwrap(),
FilesetExpression::prefix_path(RepoPathBuf::root())
);
assert!(parse("root:..").is_err());
assert_eq!(
parse("root:foo/bar").unwrap(),
FilesetExpression::prefix_path(repo_path_buf("foo/bar"))
);
assert_eq!(
parse("root-file:bar").unwrap(),
FilesetExpression::file_path(repo_path_buf("bar"))
);
}
#[test] #[test]
fn test_build_matcher_simple() { fn test_build_matcher_simple() {
insta::assert_debug_snapshot!(FilesetExpression::none().to_matcher(), @"NothingMatcher"); insta::assert_debug_snapshot!(FilesetExpression::none().to_matcher(), @"NothingMatcher");

View file

@ -99,6 +99,7 @@ nav:
- 'Configuration': - 'Configuration':
- 'Settings': 'config.md' - 'Settings': 'config.md'
- 'Fileset language': 'filesets.md'
- 'Revset language': 'revsets.md' - 'Revset language': 'revsets.md'
- 'Templating language': 'templates.md' - 'Templating language': 'templates.md'