str_util: add regex pattern

This patch adds minimal support for the regex pattern. We might have to add
"regex-i:" for completeness, but it can be achieved by "regex:'(?i)..'".
This commit is contained in:
Yuya Nishihara 2024-07-21 21:44:43 +09:00
parent 845793a7ad
commit ddc601fbf9
8 changed files with 57 additions and 7 deletions

View file

@ -58,6 +58,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
address unconditionally. Only ASCII case folding is currently implemented,
but this will likely change in the future.
* String patterns now support `regex:"pattern"`.
* New `tracked_remote_branches()` and `untracked_remote_branches()` revset
functions can be used to select tracked/untracked remote branches.

View file

@ -630,9 +630,9 @@ fn revset_resolution_error_hint(err: &RevsetResolutionError) -> Option<String> {
fn string_pattern_parse_error_hint(err: &StringPatternParseError) -> Option<String> {
match err {
StringPatternParseError::InvalidKind(_) => {
Some("Try prefixing with one of `exact:`, `glob:` or `substring:`".into())
Some("Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`".into())
}
StringPatternParseError::GlobPattern(_) => None,
StringPatternParseError::GlobPattern(_) | StringPatternParseError::Regex(_) => None,
}
}

View file

@ -621,7 +621,7 @@ fn test_branch_delete_glob() {
error: invalid value 'whatever:branch' for '<NAMES>...': Invalid string pattern kind "whatever:"
For more information, try '--help'.
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:`
Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###);
}

View file

@ -851,7 +851,7 @@ fn test_log_contained_in() {
|
= Invalid string pattern
3: Invalid string pattern kind "x:"
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:`
Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###);
let stderr = test_env.jj_cmd_failure(

View file

@ -208,7 +208,23 @@ fn test_bad_function_call() {
|
= Invalid string pattern
2: Invalid string pattern kind "bad:"
Hint: Try prefixing with one of `exact:`, `glob:` or `substring:`
Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`
"###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "branches(regex:'(')"]);
insta::assert_snapshot!(stderr, @r###"
Error: Failed to parse revset: Invalid string pattern
Caused by:
1: --> 1:10
|
1 | branches(regex:'(')
| ^-------^
|
= Invalid string pattern
2: regex parse error:
(
^
error: unclosed group
"###);
let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "root()::whatever()"]);

View file

@ -353,6 +353,8 @@ Functions that perform string matching support the following pattern syntax:
* `exact:"string"`: Matches strings exactly equal to `string`.
* `glob:"pattern"`: Matches strings with Unix-style shell [wildcard
`pattern`](https://docs.rs/glob/latest/glob/struct.Pattern.html).
* `regex:"pattern"`: Matches substrings with [regular
expression `pattern`](https://docs.rs/regex/latest/regex/#syntax).
You can append `-i` after the kind to match caseinsensitively (e.g.
`glob-i:"fix*jpeg*"`).

View file

@ -30,6 +30,9 @@ pub enum StringPatternParseError {
/// Failed to parse glob pattern.
#[error(transparent)]
GlobPattern(glob::PatternError),
/// Failed to parse regular expression.
#[error(transparent)]
Regex(regex::Error),
}
fn parse_glob(src: &str) -> Result<glob::Pattern, StringPatternParseError> {
@ -52,6 +55,9 @@ pub enum StringPattern {
Glob(glob::Pattern),
/// Matches with a caseinsensitive Unixstyle shell wildcard pattern.
GlobI(glob::Pattern),
/// Matches substrings with a regular expression.
Regex(regex::Regex),
// TODO: Should we add RegexI and "regex-i" prefix?
}
impl StringPattern {
@ -107,6 +113,12 @@ impl StringPattern {
Ok(StringPattern::GlobI(parse_glob(src)?))
}
/// Parses the given string as a regular expression.
pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?;
Ok(StringPattern::Regex(pattern))
}
/// Parses the given string as a pattern of the specified `kind`.
pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
match kind {
@ -116,6 +128,7 @@ impl StringPattern {
"substring-i" => Ok(StringPattern::substring_i(src)),
"glob" => StringPattern::glob(src),
"glob-i" => StringPattern::glob_i(src),
"regex" => StringPattern::regex(src),
_ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
}
}
@ -147,14 +160,13 @@ impl StringPattern {
StringPattern::SubstringI(needle) => needle,
StringPattern::Glob(pattern) => pattern.as_str(),
StringPattern::GlobI(pattern) => pattern.as_str(),
StringPattern::Regex(pattern) => pattern.as_str(),
}
}
/// Converts this pattern to a glob string. Returns `None` if the pattern
/// can't be represented as a glob.
pub fn to_glob(&self) -> Option<Cow<'_, str>> {
// TODO: If we add Regex pattern, it will return None.
//
// TODO: Handle trivial caseinsensitive patterns here? It might make people
// expect they can use caseinsensitive patterns in contexts where they
// generally cant.
@ -171,6 +183,7 @@ impl StringPattern {
StringPattern::ExactI(_) => None,
StringPattern::SubstringI(_) => None,
StringPattern::GlobI(_) => None,
StringPattern::Regex(_) => None,
}
}
@ -209,6 +222,7 @@ impl StringPattern {
..glob::MatchOptions::new()
},
),
StringPattern::Regex(pattern) => pattern.is_match(haystack),
}
}
@ -292,6 +306,14 @@ mod tests {
StringPattern::from_str_kind("foo", "substring-i"),
Ok(StringPattern::SubstringI(s)) if s == "foo"
);
assert_matches!(
StringPattern::parse("regex:foo"),
Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
);
assert_matches!(
StringPattern::from_str_kind("foo", "regex"),
Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
);
// Parse a pattern that contains a : itself.
assert_matches!(

View file

@ -2015,6 +2015,14 @@ fn test_evaluate_expression_branches() {
resolve_commit_ids(mut_repo, r#"branches(glob-i:"Branch?")"#),
vec![commit2.id().clone(), commit1.id().clone()]
);
assert_eq!(
resolve_commit_ids(mut_repo, "branches(regex:'ranch')"),
vec![commit2.id().clone(), commit1.id().clone()]
);
assert_eq!(
resolve_commit_ids(mut_repo, "branches(regex:'^[Bb]ranch1$')"),
vec![commit1.id().clone()]
);
// Can silently resolve to an empty set if there's no matches
assert_eq!(resolve_commit_ids(mut_repo, "branches(branch3)"), vec![]);
assert_eq!(