From ddc601fbf9625285da4ca476d4e0c404823280a9 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sun, 21 Jul 2024 21:44:43 +0900 Subject: [PATCH] str_util: add regex pattern This patch adds minimal support for the regex pattern. We might have to add "regex-i:" for completeness, but it can be achieved by "regex:'(?i)..'". --- CHANGELOG.md | 2 ++ cli/src/command_error.rs | 4 ++-- cli/tests/test_branch_command.rs | 2 +- cli/tests/test_commit_template.rs | 2 +- cli/tests/test_revset_output.rs | 18 +++++++++++++++++- docs/revsets.md | 2 ++ lib/src/str_util.rs | 26 ++++++++++++++++++++++++-- lib/tests/test_revset.rs | 8 ++++++++ 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8db6d9580..7b3a549f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,8 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). address unconditionally. Only ASCII case folding is currently implemented, but this will likely change in the future. +* String patterns now support `regex:"pattern"`. + * New `tracked_remote_branches()` and `untracked_remote_branches()` revset functions can be used to select tracked/untracked remote branches. diff --git a/cli/src/command_error.rs b/cli/src/command_error.rs index 4471f9b6f..6f95390aa 100644 --- a/cli/src/command_error.rs +++ b/cli/src/command_error.rs @@ -630,9 +630,9 @@ fn revset_resolution_error_hint(err: &RevsetResolutionError) -> Option { fn string_pattern_parse_error_hint(err: &StringPatternParseError) -> Option { match err { StringPatternParseError::InvalidKind(_) => { - Some("Try prefixing with one of `exact:`, `glob:` or `substring:`".into()) + Some("Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:`".into()) } - StringPatternParseError::GlobPattern(_) => None, + StringPatternParseError::GlobPattern(_) | StringPatternParseError::Regex(_) => None, } } diff --git a/cli/tests/test_branch_command.rs b/cli/tests/test_branch_command.rs index adfd61e36..c07006d91 100644 --- a/cli/tests/test_branch_command.rs +++ b/cli/tests/test_branch_command.rs @@ -621,7 +621,7 @@ fn test_branch_delete_glob() { error: invalid value 'whatever:branch' for '...': Invalid string pattern kind "whatever:" For more information, try '--help'. - Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` + Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:` "###); } diff --git a/cli/tests/test_commit_template.rs b/cli/tests/test_commit_template.rs index bc1705609..a6641f007 100644 --- a/cli/tests/test_commit_template.rs +++ b/cli/tests/test_commit_template.rs @@ -851,7 +851,7 @@ fn test_log_contained_in() { | = Invalid string pattern 3: Invalid string pattern kind "x:" - Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` + Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:` "###); let stderr = test_env.jj_cmd_failure( diff --git a/cli/tests/test_revset_output.rs b/cli/tests/test_revset_output.rs index 27f15360a..315e5b4c1 100644 --- a/cli/tests/test_revset_output.rs +++ b/cli/tests/test_revset_output.rs @@ -208,7 +208,23 @@ fn test_bad_function_call() { | = Invalid string pattern 2: Invalid string pattern kind "bad:" - Hint: Try prefixing with one of `exact:`, `glob:` or `substring:` + Hint: Try prefixing with one of `exact:`, `glob:`, `regex:`, or `substring:` + "###); + + let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "branches(regex:'(')"]); + insta::assert_snapshot!(stderr, @r###" + Error: Failed to parse revset: Invalid string pattern + Caused by: + 1: --> 1:10 + | + 1 | branches(regex:'(') + | ^-------^ + | + = Invalid string pattern + 2: regex parse error: + ( + ^ + error: unclosed group "###); let stderr = test_env.jj_cmd_failure(&repo_path, &["log", "-r", "root()::whatever()"]); diff --git a/docs/revsets.md b/docs/revsets.md index ada858a24..4b7dcbca4 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -353,6 +353,8 @@ Functions that perform string matching support the following pattern syntax: * `exact:"string"`: Matches strings exactly equal to `string`. * `glob:"pattern"`: Matches strings with Unix-style shell [wildcard `pattern`](https://docs.rs/glob/latest/glob/struct.Pattern.html). +* `regex:"pattern"`: Matches substrings with [regular + expression `pattern`](https://docs.rs/regex/latest/regex/#syntax). You can append `-i` after the kind to match case‐insensitively (e.g. `glob-i:"fix*jpeg*"`). diff --git a/lib/src/str_util.rs b/lib/src/str_util.rs index f1fdec2a4..3f0a832b0 100644 --- a/lib/src/str_util.rs +++ b/lib/src/str_util.rs @@ -30,6 +30,9 @@ pub enum StringPatternParseError { /// Failed to parse glob pattern. #[error(transparent)] GlobPattern(glob::PatternError), + /// Failed to parse regular expression. + #[error(transparent)] + Regex(regex::Error), } fn parse_glob(src: &str) -> Result { @@ -52,6 +55,9 @@ pub enum StringPattern { Glob(glob::Pattern), /// Matches with a case‐insensitive Unix‐style shell wildcard pattern. GlobI(glob::Pattern), + /// Matches substrings with a regular expression. + Regex(regex::Regex), + // TODO: Should we add RegexI and "regex-i" prefix? } impl StringPattern { @@ -107,6 +113,12 @@ impl StringPattern { Ok(StringPattern::GlobI(parse_glob(src)?)) } + /// Parses the given string as a regular expression. + pub fn regex(src: &str) -> Result { + let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?; + Ok(StringPattern::Regex(pattern)) + } + /// Parses the given string as a pattern of the specified `kind`. pub fn from_str_kind(src: &str, kind: &str) -> Result { match kind { @@ -116,6 +128,7 @@ impl StringPattern { "substring-i" => Ok(StringPattern::substring_i(src)), "glob" => StringPattern::glob(src), "glob-i" => StringPattern::glob_i(src), + "regex" => StringPattern::regex(src), _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())), } } @@ -147,14 +160,13 @@ impl StringPattern { StringPattern::SubstringI(needle) => needle, StringPattern::Glob(pattern) => pattern.as_str(), StringPattern::GlobI(pattern) => pattern.as_str(), + StringPattern::Regex(pattern) => pattern.as_str(), } } /// Converts this pattern to a glob string. Returns `None` if the pattern /// can't be represented as a glob. pub fn to_glob(&self) -> Option> { - // TODO: If we add Regex pattern, it will return None. - // // TODO: Handle trivial case‐insensitive patterns here? It might make people // expect they can use case‐insensitive patterns in contexts where they // generally can’t. @@ -171,6 +183,7 @@ impl StringPattern { StringPattern::ExactI(_) => None, StringPattern::SubstringI(_) => None, StringPattern::GlobI(_) => None, + StringPattern::Regex(_) => None, } } @@ -209,6 +222,7 @@ impl StringPattern { ..glob::MatchOptions::new() }, ), + StringPattern::Regex(pattern) => pattern.is_match(haystack), } } @@ -292,6 +306,14 @@ mod tests { StringPattern::from_str_kind("foo", "substring-i"), Ok(StringPattern::SubstringI(s)) if s == "foo" ); + assert_matches!( + StringPattern::parse("regex:foo"), + Ok(StringPattern::Regex(p)) if p.as_str() == "foo" + ); + assert_matches!( + StringPattern::from_str_kind("foo", "regex"), + Ok(StringPattern::Regex(p)) if p.as_str() == "foo" + ); // Parse a pattern that contains a : itself. assert_matches!( diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index 6add8b4d6..43f3fb75f 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -2015,6 +2015,14 @@ fn test_evaluate_expression_branches() { resolve_commit_ids(mut_repo, r#"branches(glob-i:"Branch?")"#), vec![commit2.id().clone(), commit1.id().clone()] ); + assert_eq!( + resolve_commit_ids(mut_repo, "branches(regex:'ranch')"), + vec![commit2.id().clone(), commit1.id().clone()] + ); + assert_eq!( + resolve_commit_ids(mut_repo, "branches(regex:'^[Bb]ranch1$')"), + vec![commit1.id().clone()] + ); // Can silently resolve to an empty set if there's no matches assert_eq!(resolve_commit_ids(mut_repo, "branches(branch3)"), vec![]); assert_eq!(