jj/lib/src/fileset_parser.rs

657 lines
22 KiB
Rust
Raw Normal View History

// Copyright 2024 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Parser for the fileset language.
use std::error;
use itertools::Itertools as _;
use once_cell::sync::Lazy;
use pest::iterators::Pair;
use pest::pratt_parser::{Assoc, Op, PrattParser};
use pest::Parser;
use pest_derive::Parser;
use thiserror::Error;
use crate::dsl_util::{self, InvalidArguments, StringLiteralParser};
#[derive(Parser)]
#[grammar = "fileset.pest"]
struct FilesetParser;
const STRING_LITERAL_PARSER: StringLiteralParser<Rule> = StringLiteralParser {
content_rule: Rule::string_content,
escape_rule: Rule::string_escape,
};
impl Rule {
fn to_symbol(self) -> Option<&'static str> {
match self {
Rule::EOI => None,
Rule::whitespace => None,
Rule::identifier => None,
Rule::strict_identifier_part => None,
Rule::strict_identifier => None,
Rule::bare_string => None,
Rule::string_escape => None,
Rule::string_content_char => None,
Rule::string_content => None,
Rule::string_literal => None,
Rule::raw_string_content => None,
Rule::raw_string_literal => None,
Rule::pattern_kind_op => Some(":"),
Rule::negate_op => Some("~"),
Rule::union_op => Some("|"),
Rule::intersection_op => Some("&"),
Rule::difference_op => Some("~"),
Rule::prefix_ops => None,
Rule::infix_ops => None,
Rule::function => None,
Rule::function_name => None,
Rule::function_arguments => None,
Rule::string_pattern => None,
Rule::bare_string_pattern => None,
Rule::primary => None,
Rule::expression => None,
Rule::program => None,
Rule::program_or_bare_string => None,
}
}
}
/// Result of fileset parsing and name resolution.
pub type FilesetParseResult<T> = Result<T, FilesetParseError>;
/// Error occurred during fileset parsing and name resolution.
#[derive(Debug, Error)]
#[error("{pest_error}")]
pub struct FilesetParseError {
kind: FilesetParseErrorKind,
pest_error: Box<pest::error::Error<Rule>>,
source: Option<Box<dyn error::Error + Send + Sync>>,
}
/// Categories of fileset parsing and name resolution error.
#[allow(missing_docs)]
#[derive(Clone, Debug, Eq, Error, PartialEq)]
pub enum FilesetParseErrorKind {
#[error("Syntax error")]
SyntaxError,
#[error(r#"Function "{name}" doesn't exist"#)]
NoSuchFunction {
name: String,
candidates: Vec<String>,
},
#[error(r#"Function "{name}": {message}"#)]
InvalidArguments { name: String, message: String },
#[error("{0}")]
Expression(String),
}
impl FilesetParseError {
pub(super) fn new(kind: FilesetParseErrorKind, span: pest::Span<'_>) -> Self {
let message = kind.to_string();
let pest_error = Box::new(pest::error::Error::new_from_span(
pest::error::ErrorVariant::CustomError { message },
span,
));
FilesetParseError {
kind,
pest_error,
source: None,
}
}
pub(super) fn with_source(
mut self,
source: impl Into<Box<dyn error::Error + Send + Sync>>,
) -> Self {
self.source = Some(source.into());
self
}
/// Some other expression error.
pub(super) fn expression(message: impl Into<String>, span: pest::Span<'_>) -> Self {
FilesetParseError::new(FilesetParseErrorKind::Expression(message.into()), span)
}
/// Category of the underlying error.
pub fn kind(&self) -> &FilesetParseErrorKind {
&self.kind
}
}
impl From<pest::error::Error<Rule>> for FilesetParseError {
fn from(err: pest::error::Error<Rule>) -> Self {
FilesetParseError {
kind: FilesetParseErrorKind::SyntaxError,
pest_error: Box::new(rename_rules_in_pest_error(err)),
source: None,
}
}
}
impl From<InvalidArguments<'_>> for FilesetParseError {
fn from(err: InvalidArguments<'_>) -> Self {
let kind = FilesetParseErrorKind::InvalidArguments {
name: err.name.to_owned(),
message: err.message,
};
Self::new(kind, err.span)
}
}
2024-05-26 09:57:47 +00:00
fn rename_rules_in_pest_error(err: pest::error::Error<Rule>) -> pest::error::Error<Rule> {
err.renamed_rules(|rule| {
rule.to_symbol()
.map(|sym| format!("`{sym}`"))
.unwrap_or_else(|| format!("<{rule:?}>"))
})
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ExpressionKind<'i> {
Identifier(&'i str),
String(String),
StringPattern { kind: &'i str, value: String },
Unary(UnaryOp, Box<ExpressionNode<'i>>),
Binary(BinaryOp, Box<ExpressionNode<'i>>, Box<ExpressionNode<'i>>),
FunctionCall(Box<FunctionCallNode<'i>>),
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum UnaryOp {
/// `~`
Negate,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum BinaryOp {
/// `|`
Union,
/// `&`
Intersection,
/// `~`
Difference,
}
pub type ExpressionNode<'i> = dsl_util::ExpressionNode<'i, ExpressionKind<'i>>;
pub type FunctionCallNode<'i> = dsl_util::FunctionCallNode<'i, ExpressionKind<'i>>;
fn parse_function_call_node(pair: Pair<Rule>) -> FilesetParseResult<FunctionCallNode> {
assert_eq!(pair.as_rule(), Rule::function);
let (name_pair, args_pair) = pair.into_inner().collect_tuple().unwrap();
assert_eq!(name_pair.as_rule(), Rule::function_name);
assert_eq!(args_pair.as_rule(), Rule::function_arguments);
let name_span = name_pair.as_span();
let args_span = args_pair.as_span();
let name = name_pair.as_str();
let args = args_pair
.into_inner()
.map(parse_expression_node)
.try_collect()?;
Ok(FunctionCallNode {
name,
name_span,
args,
keyword_args: vec![], // unsupported
args_span,
})
}
fn parse_as_string_literal(pair: Pair<Rule>) -> String {
match pair.as_rule() {
Rule::identifier => pair.as_str().to_owned(),
Rule::string_literal => STRING_LITERAL_PARSER.parse(pair.into_inner()),
Rule::raw_string_literal => {
let (content,) = pair.into_inner().collect_tuple().unwrap();
assert_eq!(content.as_rule(), Rule::raw_string_content);
content.as_str().to_owned()
}
r => panic!("unexpected string literal rule: {r:?}"),
}
}
fn parse_primary_node(pair: Pair<Rule>) -> FilesetParseResult<ExpressionNode> {
assert_eq!(pair.as_rule(), Rule::primary);
let first = pair.into_inner().next().unwrap();
let span = first.as_span();
let expr = match first.as_rule() {
Rule::expression => return parse_expression_node(first),
Rule::function => {
let function = Box::new(parse_function_call_node(first)?);
ExpressionKind::FunctionCall(function)
}
Rule::string_pattern => {
let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap();
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
let kind = lhs.as_str();
let value = parse_as_string_literal(rhs);
ExpressionKind::StringPattern { kind, value }
}
Rule::identifier => ExpressionKind::Identifier(first.as_str()),
Rule::string_literal | Rule::raw_string_literal => {
ExpressionKind::String(parse_as_string_literal(first))
}
r => panic!("unexpected primary rule: {r:?}"),
};
Ok(ExpressionNode::new(expr, span))
}
fn parse_expression_node(pair: Pair<Rule>) -> FilesetParseResult<ExpressionNode> {
assert_eq!(pair.as_rule(), Rule::expression);
static PRATT: Lazy<PrattParser<Rule>> = Lazy::new(|| {
PrattParser::new()
.op(Op::infix(Rule::union_op, Assoc::Left))
.op(Op::infix(Rule::intersection_op, Assoc::Left)
| Op::infix(Rule::difference_op, Assoc::Left))
.op(Op::prefix(Rule::negate_op))
});
PRATT
.map_primary(parse_primary_node)
.map_prefix(|op, rhs| {
let op_kind = match op.as_rule() {
Rule::negate_op => UnaryOp::Negate,
r => panic!("unexpected prefix operator rule {r:?}"),
};
let rhs = Box::new(rhs?);
let span = op.as_span().start_pos().span(&rhs.span.end_pos());
let expr = ExpressionKind::Unary(op_kind, rhs);
Ok(ExpressionNode::new(expr, span))
})
.map_infix(|lhs, op, rhs| {
let op_kind = match op.as_rule() {
Rule::union_op => BinaryOp::Union,
Rule::intersection_op => BinaryOp::Intersection,
Rule::difference_op => BinaryOp::Difference,
r => panic!("unexpected infix operator rule {r:?}"),
};
let lhs = Box::new(lhs?);
let rhs = Box::new(rhs?);
let span = lhs.span.start_pos().span(&rhs.span.end_pos());
let expr = ExpressionKind::Binary(op_kind, lhs, rhs);
Ok(ExpressionNode::new(expr, span))
})
.parse(pair.into_inner())
}
/// Parses text into expression tree. No name resolution is made at this stage.
#[cfg(test)] // TODO: alias will be parsed with no bare_string fallback
pub fn parse_program(text: &str) -> FilesetParseResult<ExpressionNode> {
let mut pairs = FilesetParser::parse(Rule::program, text)?;
let first = pairs.next().unwrap();
parse_expression_node(first)
}
/// Parses text into expression tree with bare string fallback. No name
/// resolution is made at this stage.
///
/// If the text can't be parsed as a fileset expression, and if it doesn't
/// contain any operator-like characters, it will be parsed as a file path.
pub fn parse_program_or_bare_string(text: &str) -> FilesetParseResult<ExpressionNode> {
let mut pairs = FilesetParser::parse(Rule::program_or_bare_string, text)?;
let first = pairs.next().unwrap();
let span = first.as_span();
let expr = match first.as_rule() {
Rule::expression => return parse_expression_node(first),
Rule::bare_string_pattern => {
let (lhs, op, rhs) = first.into_inner().collect_tuple().unwrap();
assert_eq!(lhs.as_rule(), Rule::strict_identifier);
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
assert_eq!(rhs.as_rule(), Rule::bare_string);
let kind = lhs.as_str();
let value = rhs.as_str().to_owned();
ExpressionKind::StringPattern { kind, value }
}
Rule::bare_string => ExpressionKind::String(first.as_str().to_owned()),
r => panic!("unexpected program or bare string rule: {r:?}"),
};
Ok(ExpressionNode::new(expr, span))
}
#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use super::*;
use crate::dsl_util::KeywordArgument;
fn parse_into_kind(text: &str) -> Result<ExpressionKind, FilesetParseErrorKind> {
parse_program(text)
.map(|node| node.kind)
.map_err(|err| err.kind)
}
fn parse_maybe_bare_into_kind(text: &str) -> Result<ExpressionKind, FilesetParseErrorKind> {
parse_program_or_bare_string(text)
.map(|node| node.kind)
.map_err(|err| err.kind)
}
fn parse_normalized(text: &str) -> ExpressionNode {
normalize_tree(parse_program(text).unwrap())
}
fn parse_maybe_bare_normalized(text: &str) -> ExpressionNode {
normalize_tree(parse_program_or_bare_string(text).unwrap())
}
/// Drops auxiliary data from parsed tree so it can be compared with other.
fn normalize_tree(node: ExpressionNode) -> ExpressionNode {
fn empty_span() -> pest::Span<'static> {
pest::Span::new("", 0, 0).unwrap()
}
fn normalize_list(nodes: Vec<ExpressionNode>) -> Vec<ExpressionNode> {
nodes.into_iter().map(normalize_tree).collect()
}
fn normalize_function_call(function: FunctionCallNode) -> FunctionCallNode {
FunctionCallNode {
name: function.name,
name_span: empty_span(),
args: normalize_list(function.args),
keyword_args: function
.keyword_args
.into_iter()
.map(|arg| KeywordArgument {
name: arg.name,
name_span: empty_span(),
value: normalize_tree(arg.value),
})
.collect(),
args_span: empty_span(),
}
}
let normalized_kind = match node.kind {
ExpressionKind::Identifier(_)
| ExpressionKind::String(_)
| ExpressionKind::StringPattern { .. } => node.kind,
ExpressionKind::Unary(op, arg) => {
let arg = Box::new(normalize_tree(*arg));
ExpressionKind::Unary(op, arg)
}
ExpressionKind::Binary(op, lhs, rhs) => {
let lhs = Box::new(normalize_tree(*lhs));
let rhs = Box::new(normalize_tree(*rhs));
ExpressionKind::Binary(op, lhs, rhs)
}
ExpressionKind::FunctionCall(function) => {
let function = Box::new(normalize_function_call(*function));
ExpressionKind::FunctionCall(function)
}
};
ExpressionNode {
kind: normalized_kind,
span: empty_span(),
}
}
#[test]
fn test_parse_tree_eq() {
assert_eq!(
parse_normalized(r#" foo( x ) | ~bar:"baz" "#),
parse_normalized(r#"(foo(x))|(~(bar:"baz"))"#)
);
assert_ne!(parse_normalized(r#" foo "#), parse_normalized(r#" "foo" "#));
}
#[test]
fn test_parse_whitespace() {
let ascii_whitespaces: String = ('\x00'..='\x7f')
.filter(char::is_ascii_whitespace)
.collect();
assert_eq!(
parse_normalized(&format!("{ascii_whitespaces}f()")),
parse_normalized("f()")
);
}
#[test]
fn test_parse_identifier() {
assert_eq!(
parse_into_kind("dir/foo-bar_0.baz"),
Ok(ExpressionKind::Identifier("dir/foo-bar_0.baz"))
);
assert_eq!(
parse_into_kind("cli-reference@.md.snap"),
Ok(ExpressionKind::Identifier("cli-reference@.md.snap"))
);
assert_eq!(
parse_into_kind("柔術.jj"),
Ok(ExpressionKind::Identifier("柔術.jj"))
);
assert_eq!(
parse_into_kind(r#"Windows\Path"#),
Ok(ExpressionKind::Identifier(r#"Windows\Path"#))
);
}
#[test]
fn test_parse_string_literal() {
// "\<char>" escapes
assert_eq!(
parse_into_kind(r#" "\t\r\n\"\\\0" "#),
Ok(ExpressionKind::String("\t\r\n\"\\\0".to_owned()))
);
// Invalid "\<char>" escape
assert_eq!(
parse_into_kind(r#" "\y" "#),
Err(FilesetParseErrorKind::SyntaxError)
);
// Single-quoted raw string
assert_eq!(
parse_into_kind(r#" '' "#),
Ok(ExpressionKind::String("".to_owned())),
);
assert_eq!(
parse_into_kind(r#" 'a\n' "#),
Ok(ExpressionKind::String(r"a\n".to_owned())),
);
assert_eq!(
parse_into_kind(r#" '\' "#),
Ok(ExpressionKind::String(r"\".to_owned())),
);
assert_eq!(
parse_into_kind(r#" '"' "#),
Ok(ExpressionKind::String(r#"""#.to_owned())),
);
}
#[test]
fn test_parse_string_pattern() {
assert_eq!(
parse_into_kind(r#" foo:bar "#),
Ok(ExpressionKind::StringPattern {
kind: "foo",
value: "bar".to_owned()
})
);
assert_eq!(
parse_into_kind(r#" foo:"bar" "#),
Ok(ExpressionKind::StringPattern {
kind: "foo",
value: "bar".to_owned()
})
);
assert_eq!(
parse_into_kind(r#" foo:"" "#),
Ok(ExpressionKind::StringPattern {
kind: "foo",
value: "".to_owned()
})
);
assert_eq!(
parse_into_kind(r#" foo:'\' "#),
Ok(ExpressionKind::StringPattern {
kind: "foo",
value: r"\".to_owned()
})
);
assert_eq!(
parse_into_kind(r#" foo: "#),
Err(FilesetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_into_kind(r#" foo: "" "#),
Err(FilesetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_into_kind(r#" foo :"" "#),
Err(FilesetParseErrorKind::SyntaxError)
);
}
#[test]
fn test_parse_operator() {
assert_matches!(
parse_into_kind("~x"),
Ok(ExpressionKind::Unary(UnaryOp::Negate, _))
);
assert_matches!(
parse_into_kind("x|y"),
Ok(ExpressionKind::Binary(BinaryOp::Union, _, _))
);
assert_matches!(
parse_into_kind("x&y"),
Ok(ExpressionKind::Binary(BinaryOp::Intersection, _, _))
);
assert_matches!(
parse_into_kind("x~y"),
Ok(ExpressionKind::Binary(BinaryOp::Difference, _, _))
);
// Set operator associativity/precedence
assert_eq!(parse_normalized("~x|y"), parse_normalized("(~x)|y"));
assert_eq!(parse_normalized("x&~y"), parse_normalized("x&(~y)"));
assert_eq!(parse_normalized("x~~y"), parse_normalized("x~(~y)"));
assert_eq!(parse_normalized("x~~~y"), parse_normalized("x~(~(~y))"));
assert_eq!(parse_normalized("x|y|z"), parse_normalized("(x|y)|z"));
assert_eq!(parse_normalized("x&y|z"), parse_normalized("(x&y)|z"));
assert_eq!(parse_normalized("x|y&z"), parse_normalized("x|(y&z)"));
assert_eq!(parse_normalized("x|y~z"), parse_normalized("x|(y~z)"));
assert_eq!(parse_normalized("~x:y"), parse_normalized("~(x:y)"));
assert_eq!(parse_normalized("x|y:z"), parse_normalized("x|(y:z)"));
// Expression span
assert_eq!(parse_program(" ~ x ").unwrap().span.as_str(), "~ x");
assert_eq!(parse_program(" x |y ").unwrap().span.as_str(), "x |y");
}
#[test]
fn test_parse_function_call() {
assert_matches!(
parse_into_kind("foo()"),
Ok(ExpressionKind::FunctionCall(_))
);
// Trailing comma isn't allowed for empty argument
assert!(parse_into_kind("foo(,)").is_err());
// Trailing comma is allowed for the last argument
assert_eq!(parse_normalized("foo(a,)"), parse_normalized("foo(a)"));
assert_eq!(parse_normalized("foo(a , )"), parse_normalized("foo(a)"));
assert!(parse_into_kind("foo(,a)").is_err());
assert!(parse_into_kind("foo(a,,)").is_err());
assert!(parse_into_kind("foo(a , , )").is_err());
assert_eq!(parse_normalized("foo(a,b,)"), parse_normalized("foo(a,b)"));
assert!(parse_into_kind("foo(a,,b)").is_err());
}
#[test]
fn test_parse_bare_string() {
// Valid expression should be parsed as such
assert_eq!(
parse_maybe_bare_into_kind(" valid "),
Ok(ExpressionKind::Identifier("valid"))
);
assert_eq!(
parse_maybe_bare_normalized("f(x)&y"),
parse_normalized("f(x)&y")
);
// Bare string
assert_eq!(
parse_maybe_bare_into_kind("Foo Bar.txt"),
Ok(ExpressionKind::String("Foo Bar.txt".to_owned()))
);
assert_eq!(
parse_maybe_bare_into_kind(r#"Windows\Path with space"#),
Ok(ExpressionKind::String(
r#"Windows\Path with space"#.to_owned()
))
);
assert_eq!(
parse_maybe_bare_into_kind("柔 術 . j j"),
Ok(ExpressionKind::String("柔 術 . j j".to_owned()))
);
assert_eq!(
parse_maybe_bare_into_kind("Unicode emoji 💩"),
Ok(ExpressionKind::String("Unicode emoji 💩".to_owned()))
);
assert_eq!(
parse_maybe_bare_into_kind("looks like & expression"),
Err(FilesetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_maybe_bare_into_kind("unbalanced_parens("),
Err(FilesetParseErrorKind::SyntaxError)
);
// Bare string pattern
assert_eq!(
parse_maybe_bare_into_kind("foo: bar baz"),
Ok(ExpressionKind::StringPattern {
kind: "foo",
value: " bar baz".to_owned()
})
);
assert_eq!(
parse_maybe_bare_into_kind("foo:bar:baz"),
Err(FilesetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_maybe_bare_into_kind("foo:"),
Err(FilesetParseErrorKind::SyntaxError)
);
assert_eq!(
parse_maybe_bare_into_kind(r#"foo:"unclosed quote"#),
Err(FilesetParseErrorKind::SyntaxError)
);
// Surrounding spaces are simply preserved. They could be trimmed, but
// space is valid bare_string character.
assert_eq!(
parse_maybe_bare_into_kind(" No trim "),
Ok(ExpressionKind::String(" No trim ".to_owned()))
);
}
#[test]
fn test_parse_error() {
insta::assert_snapshot!(parse_program("foo|").unwrap_err().to_string(), @r###"
--> 1:5
|
1 | foo|
| ^---
|
= expected `~` or <primary>
"###);
}
}