diff --git a/CHANGELOG.md b/CHANGELOG.md index b5a2dc8b5..91fd87c76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * `jj squash` now supports `-f/-t` shorthands for `--from/--[in]to`. +* String literals in filesets, revsets and templates now support hex bytes + (with `\e` as escape / shorthand for `\x1b`). + ### Fixed bugs * Error on `trunk()` revset resolution is now handled gracefully. diff --git a/cli/src/template.pest b/cli/src/template.pest index 928b99132..a396f81fc 100644 --- a/cli/src/template.pest +++ b/cli/src/template.pest @@ -1,11 +1,11 @@ // Copyright 2020 The Jujutsu Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,7 +19,10 @@ whitespace = _{ " " | "\t" | "\r" | "\n" | "\x0c" } -string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } +string_escape = @{ + "\\" + ~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\") +} string_content_char = @{ !("\"" | "\\") ~ ANY } string_content = @{ string_content_char+ } string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" } diff --git a/cli/src/template_parser.rs b/cli/src/template_parser.rs index b9bf143a2..7987b5ec8 100644 --- a/cli/src/template_parser.rs +++ b/cli/src/template_parser.rs @@ -992,8 +992,8 @@ mod tests { fn test_string_literal() { // "\" escapes assert_eq!( - parse_into_kind(r#" "\t\r\n\"\\\0" "#), - Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned())), + parse_into_kind(r#" "\t\r\n\"\\\0\e" "#), + Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned())), ); // Invalid "\" escape @@ -1019,6 +1019,28 @@ mod tests { parse_into_kind(r#" '"' "#), Ok(ExpressionKind::String(r#"""#.to_owned())), ); + + // Hex bytes + assert_eq!( + parse_into_kind(r#""\x61\x65\x69\x6f\x75""#), + Ok(ExpressionKind::String("aeiou".to_owned())), + ); + assert_eq!( + parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#), + Ok(ExpressionKind::String("àèìðù".to_owned())), + ); + assert_eq!( + parse_into_kind(r#""\x""#), + Err(TemplateParseErrorKind::SyntaxError), + ); + assert_eq!( + parse_into_kind(r#""\xf""#), + Err(TemplateParseErrorKind::SyntaxError), + ); + assert_eq!( + parse_into_kind(r#""\xgg""#), + Err(TemplateParseErrorKind::SyntaxError), + ); } #[test] diff --git a/docs/templates.md b/docs/templates.md index 3925534f8..3809479e1 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -237,6 +237,8 @@ A double-quoted string literal supports the following escape sequences: * `\r`: carriage return * `\n`: new line * `\0`: null +* `\e`: escape (i.e., `\x1b`) +* `\xHH`: byte with hex value `HH` Other escape sequences are not supported. Any UTF-8 characters are allowed inside a string literal, with two exceptions: unescaped `"`-s and uses of `\` diff --git a/lib/src/dsl_util.rs b/lib/src/dsl_util.rs index c92f01cef..898b4fd41 100644 --- a/lib/src/dsl_util.rs +++ b/lib/src/dsl_util.rs @@ -412,6 +412,12 @@ impl StringLiteralParser { "r" => result.push('\r'), "n" => result.push('\n'), "0" => result.push('\0'), + "e" => result.push('\x1b'), + hex if hex.starts_with('x') => { + result.push(char::from( + u8::from_str_radix(&hex[1..], 16).expect("hex characters"), + )); + } char => panic!("invalid escape: \\{char:?}"), } } else { diff --git a/lib/src/fileset.pest b/lib/src/fileset.pest index 41fb377b7..534198631 100644 --- a/lib/src/fileset.pest +++ b/lib/src/fileset.pest @@ -34,7 +34,10 @@ bare_string = @{ | '\u{80}'..'\u{10ffff}' )+ } -string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } +string_escape = @{ + "\\" + ~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\") +} string_content_char = @{ !("\"" | "\\") ~ ANY } string_content = @{ string_content_char+ } string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" } diff --git a/lib/src/fileset_parser.rs b/lib/src/fileset_parser.rs index 09f722c58..2091fb96a 100644 --- a/lib/src/fileset_parser.rs +++ b/lib/src/fileset_parser.rs @@ -485,14 +485,14 @@ mod tests { fn test_parse_string_literal() { // "\" escapes assert_eq!( - parse_into_kind(r#" "\t\r\n\"\\\0" "#), - Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned())) + parse_into_kind(r#" "\t\r\n\"\\\0\e" "#), + Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned())), ); // Invalid "\" escape assert_eq!( parse_into_kind(r#" "\y" "#), - Err(FilesetParseErrorKind::SyntaxError) + Err(FilesetParseErrorKind::SyntaxError), ); // Single-quoted raw string @@ -512,6 +512,28 @@ mod tests { parse_into_kind(r#" '"' "#), Ok(ExpressionKind::String(r#"""#.to_owned())), ); + + // Hex bytes + assert_eq!( + parse_into_kind(r#""\x61\x65\x69\x6f\x75""#), + Ok(ExpressionKind::String("aeiou".to_owned())), + ); + assert_eq!( + parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#), + Ok(ExpressionKind::String("àèìðù".to_owned())), + ); + assert_eq!( + parse_into_kind(r#""\x""#), + Err(FilesetParseErrorKind::SyntaxError), + ); + assert_eq!( + parse_into_kind(r#""\xf""#), + Err(FilesetParseErrorKind::SyntaxError), + ); + assert_eq!( + parse_into_kind(r#""\xgg""#), + Err(FilesetParseErrorKind::SyntaxError), + ); } #[test] diff --git a/lib/src/revset.pest b/lib/src/revset.pest index 2705b512d..80449743a 100644 --- a/lib/src/revset.pest +++ b/lib/src/revset.pest @@ -1,11 +1,11 @@ // Copyright 2021 The Jujutsu Authors -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// +// // https://www.apache.org/licenses/LICENSE-2.0 -// +// // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,7 +24,10 @@ symbol = _{ | raw_string_literal } -string_escape = @{ "\\" ~ ("t" | "r" | "n" | "0" | "\"" | "\\") } +string_escape = @{ + "\\" + ~ ("t" | "r" | "n" | "0" | "e" | ("x" ~ ASCII_HEX_DIGIT{2}) | "\"" | "\\") +} string_content_char = @{ !("\"" | "\\") ~ ANY } string_content = @{ string_content_char+ } string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" } diff --git a/lib/src/revset_parser.rs b/lib/src/revset_parser.rs index 68839d13f..80bae5c99 100644 --- a/lib/src/revset_parser.rs +++ b/lib/src/revset_parser.rs @@ -1266,8 +1266,8 @@ mod tests { fn test_parse_string_literal() { // "\" escapes assert_eq!( - parse_into_kind(r#" "\t\r\n\"\\\0" "#), - Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned())) + parse_into_kind(r#" "\t\r\n\"\\\0\e" "#), + Ok(ExpressionKind::String("\t\r\n\"\\\0\u{1b}".to_owned())) ); // Invalid "\" escape @@ -1293,6 +1293,28 @@ mod tests { parse_into_kind(r#" '"' "#), Ok(ExpressionKind::String(r#"""#.to_owned())) ); + + // Hex bytes + assert_eq!( + parse_into_kind(r#""\x61\x65\x69\x6f\x75""#), + Ok(ExpressionKind::String("aeiou".to_owned())) + ); + assert_eq!( + parse_into_kind(r#""\xe0\xe8\xec\xf0\xf9""#), + Ok(ExpressionKind::String("àèìðù".to_owned())) + ); + assert_eq!( + parse_into_kind(r#""\x""#), + Err(RevsetParseErrorKind::SyntaxError) + ); + assert_eq!( + parse_into_kind(r#""\xf""#), + Err(RevsetParseErrorKind::SyntaxError) + ); + assert_eq!( + parse_into_kind(r#""\xgg""#), + Err(RevsetParseErrorKind::SyntaxError) + ); } #[test]