templates: support hex bytes in string literals

One particular use case for these is escape sequences -- and to that
end, I'm also adding `\e` as a shorthand for `\x1b`.

Change-Id: Id000000040ea6fd8e2d720219931485960c570dd
This commit is contained in:
Vamsi Avula 2024-10-06 09:14:53 +05:30
parent 93a4fcfe32
commit a6aa25c9eb
9 changed files with 102 additions and 16 deletions

View file

@ -32,6 +32,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* `jj squash` now supports `-f/-t` shorthands for `--from/--[in]to`.
* String literals in filesets, revsets and templates now support hex bytes
(with `\e` as escape / shorthand for `\x1b`).
### Fixed bugs
* Error on `trunk()` revset resolution is now handled gracefully.

View file

@ -1,11 +1,11 @@
// Copyright 2020 The Jujutsu Authors
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// https://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -19,7 +19,10 @@
whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" }
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
string_escape = @{
"\\"
~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\")
}
string_content_char = @{ !("\"" | "\\") ~ ANY }
string_content = @{ string_content_char+ }
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }

View file

@ -992,8 +992,8 @@ mod tests {
fn test_string_literal() {
// "\<char>" escapes
assert_eq!(
parse_into_kind(r#" "\t\r\n\"\\\0" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned())),
parse_into_kind(r#" "\t\r\n\"\\\0\e" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned())),
);
// Invalid "\<char>" escape
@ -1019,6 +1019,28 @@ mod tests {
parse_into_kind(r#" '"' "#),
Ok(ExpressionKind::String(r#"""#.to_owned())),
);
// Hex bytes
assert_eq!(
parse_into_kind(r#""\x61\x65\x69\x6f\x75""#),
Ok(ExpressionKind::String("aeiou".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
Ok(ExpressionKind::String("àèìðù".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(TemplateParseErrorKind::SyntaxError),
);
assert_eq!(
parse_into_kind(r#""\xf""#),
Err(TemplateParseErrorKind::SyntaxError),
);
assert_eq!(
parse_into_kind(r#""\xgg""#),
Err(TemplateParseErrorKind::SyntaxError),
);
}
#[test]

View file

@ -237,6 +237,8 @@ A double-quoted string literal supports the following escape sequences:
* `\r`: carriage return
* `\n`: new line
* `\0`: null
* `\e`: escape (i.e., `\x1b`)
* `\xHH`: byte with hex value `HH`
Other escape sequences are not supported. Any UTF-8 characters are allowed
inside a string literal, with two exceptions: unescaped `"`-s and uses of `\`

View file

@ -412,6 +412,12 @@ impl<R: RuleType> StringLiteralParser<R> {
"r" => result.push('\r'),
"n" => result.push('\n'),
"0" => result.push('\0'),
"e" => result.push('\x1b'),
hex if hex.starts_with('x') => {
result.push(char::from(
u8::from_str_radix(&hex[1..], 16).expect("hex characters"),
));
}
char => panic!("invalid escape: \\{char:?}"),
}
} else {

View file

@ -34,7 +34,10 @@ bare_string = @{
| '\u{80}'..'\u{10ffff}' )+
}
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
string_escape = @{
"\\"
~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\")
}
string_content_char = @{ !("\"" | "\\") ~ ANY }
string_content = @{ string_content_char+ }
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }

View file

@ -485,14 +485,14 @@ mod tests {
fn test_parse_string_literal() {
// "\<char>" escapes
assert_eq!(
parse_into_kind(r#" "\t\r\n\"\\\0" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned()))
parse_into_kind(r#" "\t\r\n\"\\\0\e" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned())),
);
// Invalid "\<char>" escape
assert_eq!(
parse_into_kind(r#" "\y" "#),
Err(FilesetParseErrorKind::SyntaxError)
Err(FilesetParseErrorKind::SyntaxError),
);
// Single-quoted raw string
@ -512,6 +512,28 @@ mod tests {
parse_into_kind(r#" '"' "#),
Ok(ExpressionKind::String(r#"""#.to_owned())),
);
// Hex bytes
assert_eq!(
parse_into_kind(r#""\x61\x65\x69\x6f\x75""#),
Ok(ExpressionKind::String("aeiou".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
Ok(ExpressionKind::String("àèìðù".to_owned())),
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(FilesetParseErrorKind::SyntaxError),
);
assert_eq!(
parse_into_kind(r#""\xf""#),
Err(FilesetParseErrorKind::SyntaxError),
);
assert_eq!(
parse_into_kind(r#""\xgg""#),
Err(FilesetParseErrorKind::SyntaxError),
);
}
#[test]

View file

@ -1,11 +1,11 @@
// Copyright 2021 The Jujutsu Authors
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// https://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,7 +24,10 @@ symbol = _{
| raw_string_literal
}
string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") }
string_escape = @{
"\\"
~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\")
}
string_content_char = @{ !("\"" | "\\") ~ ANY }
string_content = @{ string_content_char+ }
string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }

View file

@ -1266,8 +1266,8 @@ mod tests {
fn test_parse_string_literal() {
// "\<char>" escapes
assert_eq!(
parse_into_kind(r#" "\t\r\n\"\\\0" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned()))
parse_into_kind(r#" "\t\r\n\"\\\0\e" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned()))
);
// Invalid "\<char>" escape
@ -1293,6 +1293,28 @@ mod tests {
parse_into_kind(r#" '"' "#),
Ok(ExpressionKind::String(r#"""#.to_owned()))
);
// Hex bytes
assert_eq!(
parse_into_kind(r#""\x61\x65\x69\x6f\x75""#),
Ok(ExpressionKind::String("aeiou".to_owned()))
);
assert_eq!(
parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#),
Ok(ExpressionKind::String("àèìðù".to_owned()))
);
assert_eq!(
parse_into_kind(r#""\x""#),
Err(RevsetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_into_kind(r#""\xf""#),
Err(RevsetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_into_kind(r#""\xgg""#),
Err(RevsetParseErrorKind::SyntaxError)
);
}
#[test]