str_util: add regex pattern

This patch adds minimal support for the regex pattern. We might have to add
"regex-i:" for completeness, but it can be achieved by "regex:'(?i)..'".
This commit is contained in:
Yuya Nishihara 2024-07-21 21:44:43 +09:00
parent 845793a7ad
commit ddc601fbf9
8 changed files with 57 additions and 7 deletions

View file

@ -58,6 +58,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
address unconditionally. Only ASCII case folding is currently implemented, address unconditionally. Only ASCII case folding is currently implemented,
but this will likely change in the future. but this will likely change in the future.
* String patterns now support `regex:"pattern"`.
* New `tracked_remote_branches()` and `untracked_remote_branches()` revset * New `tracked_remote_branches()` and `untracked_remote_branches()` revset
functions can be used to select tracked/untracked remote branches. functions can be used to select tracked/untracked remote branches.

View file

@ -630,9 +630,9 @@ fn revset_resolution_error_hint(err: &RevsetResolutionError) -> Option<String> {
fn string_pattern_parse_error_hint(err: &StringPatternParseError) -> Option<String> { fn string_pattern_parse_error_hint(err: &StringPatternParseError) -> Option<String> {
match err { match err {
StringPatternParseError::InvalidKind(_) => { StringPatternParseError::InvalidKind(_) => {
Some("Try prefixing with one of `exact:`, `glob:` or `substring:`".into()) Some("Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`".into())
} }
StringPatternParseError::GlobPattern(_) => None, StringPatternParseError::GlobPattern(_) | StringPatternParseError::Regex(_) => None,
} }
} }

View file

@ -621,7 +621,7 @@ fn test_branch_delete_glob() {
error: invalid value 'whatever:branch' for '<NAMES>...': Invalid string pattern kind "whatever:" error: invalid value 'whatever:branch' for '<NAMES>...': Invalid string pattern kind "whatever:"
For more information, try '--help'. For more information, try '--help'.
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###); "###);
} }

View file

@ -851,7 +851,7 @@ fn test_log_contained_in() {
| |
= Invalid string pattern = Invalid string pattern
3: Invalid string pattern kind "x:" 3: Invalid string pattern kind "x:"
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###); "###);
let stderr = test_env.jj_cmd_failure( let stderr = test_env.jj_cmd_failure(

View file

@ -208,7 +208,23 @@ fn test_bad_function_call() {
| |
= Invalid string pattern = Invalid string pattern
2: Invalid string pattern kind "bad:" 2: Invalid string pattern kind "bad:"
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "branches(regex:'(')"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to parse revset: Invalid string pattern
Caused by:
1: --> 1:10
|
1 | branches(regex:'(')
| ^-------^
|
= Invalid string pattern
2: regex parse error:
(
^
error: unclosed group
"###); "###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "root()::whatever()"]); let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "root()::whatever()"]);

View file

@ -353,6 +353,8 @@ Functions that perform string matching support the following pattern syntax:
* `exact:"string"`: Matches strings exactly equal to `string`. * `exact:"string"`: Matches strings exactly equal to `string`.
* `glob:"pattern"`: Matches strings with Unix-style shell [wildcard * `glob:"pattern"`: Matches strings with Unix-style shell [wildcard
`pattern`](https://docs.rs/glob/latest/glob/struct.Pattern.html). `pattern`](https://docs.rs/glob/latest/glob/struct.Pattern.html).
* `regex:"pattern"`: Matches substrings with [regular
expression `pattern`](https://docs.rs/regex/latest/regex/#syntax).
You can append `-i` after the kind to match caseinsensitively (e.g. You can append `-i` after the kind to match caseinsensitively (e.g.
`glob-i:"fix*jpeg*"`). `glob-i:"fix*jpeg*"`).

View file

@ -30,6 +30,9 @@ pub enum StringPatternParseError {
/// Failed to parse glob pattern. /// Failed to parse glob pattern.
#[error(transparent)] #[error(transparent)]
GlobPattern(glob::PatternError), GlobPattern(glob::PatternError),
/// Failed to parse regular expression.
#[error(transparent)]
Regex(regex::Error),
} }
fn parse_glob(src: &str) -> Result<glob::Pattern, StringPatternParseError> { fn parse_glob(src: &str) -> Result<glob::Pattern, StringPatternParseError> {
@ -52,6 +55,9 @@ pub enum StringPattern {
Glob(glob::Pattern), Glob(glob::Pattern),
/// Matches with a caseinsensitive Unixstyle shell wildcard pattern. /// Matches with a caseinsensitive Unixstyle shell wildcard pattern.
GlobI(glob::Pattern), GlobI(glob::Pattern),
/// Matches substrings with a regular expression.
Regex(regex::Regex),
// TODO: Should we add RegexI and "regex-i" prefix?
} }
impl StringPattern { impl StringPattern {
@ -107,6 +113,12 @@ impl StringPattern {
Ok(StringPattern::GlobI(parse_glob(src)?)) Ok(StringPattern::GlobI(parse_glob(src)?))
} }
/// Parses the given string as a regular expression.
pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?;
Ok(StringPattern::Regex(pattern))
}
/// Parses the given string as a pattern of the specified `kind`. /// Parses the given string as a pattern of the specified `kind`.
pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> { pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
match kind { match kind {
@ -116,6 +128,7 @@ impl StringPattern {
"substring-i" => Ok(StringPattern::substring_i(src)), "substring-i" => Ok(StringPattern::substring_i(src)),
"glob" => StringPattern::glob(src), "glob" => StringPattern::glob(src),
"glob-i" => StringPattern::glob_i(src), "glob-i" => StringPattern::glob_i(src),
"regex" => StringPattern::regex(src),
_ => Err(StringPatternParseError::InvalidKind(kind.to_owned())), _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
} }
} }
@ -147,14 +160,13 @@ impl StringPattern {
StringPattern::SubstringI(needle) => needle, StringPattern::SubstringI(needle) => needle,
StringPattern::Glob(pattern) => pattern.as_str(), StringPattern::Glob(pattern) => pattern.as_str(),
StringPattern::GlobI(pattern) => pattern.as_str(), StringPattern::GlobI(pattern) => pattern.as_str(),
StringPattern::Regex(pattern) => pattern.as_str(),
} }
} }
/// Converts this pattern to a glob string. Returns `None` if the pattern /// Converts this pattern to a glob string. Returns `None` if the pattern
/// can't be represented as a glob. /// can't be represented as a glob.
pub fn to_glob(&self) -> Option<Cow<'_, str>> { pub fn to_glob(&self) -> Option<Cow<'_, str>> {
// TODO: If we add Regex pattern, it will return None.
//
// TODO: Handle trivial caseinsensitive patterns here? It might make people // TODO: Handle trivial caseinsensitive patterns here? It might make people
// expect they can use caseinsensitive patterns in contexts where they // expect they can use caseinsensitive patterns in contexts where they
// generally cant. // generally cant.
@ -171,6 +183,7 @@ impl StringPattern {
StringPattern::ExactI(_) => None, StringPattern::ExactI(_) => None,
StringPattern::SubstringI(_) => None, StringPattern::SubstringI(_) => None,
StringPattern::GlobI(_) => None, StringPattern::GlobI(_) => None,
StringPattern::Regex(_) => None,
} }
} }
@ -209,6 +222,7 @@ impl StringPattern {
..glob::MatchOptions::new() ..glob::MatchOptions::new()
}, },
), ),
StringPattern::Regex(pattern) => pattern.is_match(haystack),
} }
} }
@ -292,6 +306,14 @@ mod tests {
StringPattern::from_str_kind("foo", "substring-i"), StringPattern::from_str_kind("foo", "substring-i"),
Ok(StringPattern::SubstringI(s)) if s == "foo" Ok(StringPattern::SubstringI(s)) if s == "foo"
); );
assert_matches!(
StringPattern::parse("regex:foo"),
Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
);
assert_matches!(
StringPattern::from_str_kind("foo", "regex"),
Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
);
// Parse a pattern that contains a : itself. // Parse a pattern that contains a : itself.
assert_matches!( assert_matches!(

View file

@ -2015,6 +2015,14 @@ fn test_evaluate_expression_branches() {
resolve_commit_ids(mut_repo, r#"branches(glob-i:"Branch?")"#), resolve_commit_ids(mut_repo, r#"branches(glob-i:"Branch?")"#),
vec![commit2.id().clone(), commit1.id().clone()] vec![commit2.id().clone(), commit1.id().clone()]
); );
assert_eq!(
resolve_commit_ids(mut_repo, "branches(regex:'ranch')"),
vec![commit2.id().clone(), commit1.id().clone()]
);
assert_eq!(
resolve_commit_ids(mut_repo, "branches(regex:'^[Bb]ranch1$')"),
vec![commit1.id().clone()]
);
// Can silently resolve to an empty set if there's no matches // Can silently resolve to an empty set if there's no matches
assert_eq!(resolve_commit_ids(mut_repo, "branches(branch3)"), vec![]); assert_eq!(resolve_commit_ids(mut_repo, "branches(branch3)"), vec![]);
assert_eq!( assert_eq!(