mirror of
https://github.com/salsa-rs/salsa.git
synced 2024-11-24 12:16:25 +00:00
847 lines
32 KiB
Rust
847 lines
32 KiB
Rust
use ordered_float::OrderedFloat;
|
|
use salsa::Accumulator;
|
|
|
|
use crate::ir::{
|
|
Diagnostic, Expression, ExpressionData, Function, FunctionId, Op, Program, SourceProgram, Span,
|
|
Statement, StatementData, VariableId,
|
|
};
|
|
|
|
// ANCHOR: parse_statements
|
|
#[salsa::tracked]
|
|
pub fn parse_statements(db: &dyn crate::Db, source: SourceProgram) -> Program<'_> {
|
|
// Get the source text from the database
|
|
let source_text = source.text(db);
|
|
|
|
// Create the parser
|
|
let mut parser = Parser {
|
|
db,
|
|
source_text,
|
|
position: 0,
|
|
};
|
|
|
|
// Read in statements until we reach the end of the input
|
|
let mut result = vec![];
|
|
loop {
|
|
// Skip over any whitespace
|
|
parser.skip_whitespace();
|
|
|
|
// If there are no more tokens, break
|
|
if parser.peek().is_none() {
|
|
break;
|
|
}
|
|
|
|
// Otherwise, there is more input, so parse a statement.
|
|
if let Some(statement) = parser.parse_statement() {
|
|
result.push(statement);
|
|
} else {
|
|
// If we failed, report an error at whatever position the parser
|
|
// got stuck. We could recover here by skipping to the end of the line
|
|
// or something like that. But we leave that as an exercise for the reader!
|
|
parser.report_error();
|
|
break;
|
|
}
|
|
}
|
|
|
|
Program::new(db, result)
|
|
}
|
|
// ANCHOR_END: parse_statements
|
|
|
|
/// The parser tracks the current position in the input.
|
|
///
|
|
/// There are parsing methods on the parser named `parse_foo`. Each such method tries to parse a
|
|
/// `foo` at current position. Once they've recognized a `foo`, they return `Some(foo)` with the
|
|
/// result, and they update the position. If there is a parse error
|
|
/// (i.e., they don't recognize a `foo` at the current position), they return `None`,
|
|
/// and they leave `position` at roughly the spot where parsing failed. You can use this to
|
|
/// report errors and recover.
|
|
///
|
|
/// There are some simpler method that read a single token (e.g., [`Parser::ch`]
|
|
/// or [`Parser::word`]). These methods guarantee that, when they return `None`, the position
|
|
/// is not changed apart from consuming whitespace. This allows them to be used to probe ahead
|
|
/// and test the next token.
|
|
struct Parser<'source, 'db> {
|
|
db: &'db dyn crate::Db,
|
|
source_text: &'source str,
|
|
position: usize,
|
|
}
|
|
|
|
impl<'db> Parser<'_, 'db> {
|
|
// Invoke `f` and, if it returns `None`, then restore the parsing position.
|
|
fn probe<T: std::fmt::Debug>(&mut self, f: impl FnOnce(&mut Self) -> Option<T>) -> Option<T> {
|
|
let p = self.position;
|
|
if let Some(v) = f(self) {
|
|
Some(v)
|
|
} else {
|
|
self.position = p;
|
|
None
|
|
}
|
|
}
|
|
|
|
// ANCHOR: report_error
|
|
/// Report an error diagnostic at the current position.
|
|
fn report_error(&self) {
|
|
let next_position = match self.peek() {
|
|
Some(ch) => self.position + ch.len_utf8(),
|
|
None => self.position,
|
|
};
|
|
Diagnostic {
|
|
start: self.position,
|
|
end: next_position,
|
|
message: "unexpected character".to_string(),
|
|
}
|
|
.accumulate(self.db);
|
|
}
|
|
// ANCHOR_END: report_error
|
|
|
|
fn peek(&self) -> Option<char> {
|
|
self.source_text[self.position..].chars().next()
|
|
}
|
|
|
|
// Returns a span ranging from `start_position` until the current position (exclusive)
|
|
fn span_from(&self, start_position: usize) -> Span<'db> {
|
|
Span::new(self.db, start_position, self.position)
|
|
}
|
|
|
|
fn consume(&mut self, ch: char) {
|
|
debug_assert!(self.peek() == Some(ch));
|
|
self.position += ch.len_utf8();
|
|
}
|
|
|
|
/// Skips whitespace and returns the new position.
|
|
fn skip_whitespace(&mut self) -> usize {
|
|
while let Some(ch) = self.peek() {
|
|
if ch.is_whitespace() {
|
|
self.consume(ch);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
self.position
|
|
}
|
|
|
|
// ANCHOR: parse_statement
|
|
fn parse_statement(&mut self) -> Option<Statement<'db>> {
|
|
let start_position = self.skip_whitespace();
|
|
let word = self.word()?;
|
|
if word == "fn" {
|
|
let func = self.parse_function()?;
|
|
Some(Statement::new(
|
|
self.span_from(start_position),
|
|
StatementData::Function(func),
|
|
))
|
|
} else if word == "print" {
|
|
let expr = self.parse_expression()?;
|
|
Some(Statement::new(
|
|
self.span_from(start_position),
|
|
StatementData::Print(expr),
|
|
))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
// ANCHOR_END: parse_statement
|
|
|
|
// ANCHOR: parse_function
|
|
fn parse_function(&mut self) -> Option<Function<'db>> {
|
|
let start_position = self.skip_whitespace();
|
|
let name = self.word()?;
|
|
let name_span = self.span_from(start_position);
|
|
let name: FunctionId = FunctionId::new(self.db, name);
|
|
// ^^^^^^^^^^^^^^^
|
|
// Create a new interned struct.
|
|
self.ch('(')?;
|
|
let args = self.parameters()?;
|
|
self.ch(')')?;
|
|
self.ch('=')?;
|
|
let body = self.parse_expression()?;
|
|
Some(Function::new(self.db, name, name_span, args, body))
|
|
// ^^^^^^^^^^^^^
|
|
// Create a new entity struct.
|
|
}
|
|
// ANCHOR_END: parse_function
|
|
|
|
fn parse_expression(&mut self) -> Option<Expression<'db>> {
|
|
self.parse_op_expression(Self::parse_expression1, Self::low_op)
|
|
}
|
|
|
|
fn low_op(&mut self) -> Option<Op> {
|
|
if self.ch('+').is_some() {
|
|
Some(Op::Add)
|
|
} else if self.ch('-').is_some() {
|
|
Some(Op::Subtract)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Parses a high-precedence expression (times, div).
|
|
///
|
|
/// On failure, skips arbitrary tokens.
|
|
fn parse_expression1(&mut self) -> Option<Expression<'db>> {
|
|
self.parse_op_expression(Self::parse_expression2, Self::high_op)
|
|
}
|
|
|
|
fn high_op(&mut self) -> Option<Op> {
|
|
if self.ch('*').is_some() {
|
|
Some(Op::Multiply)
|
|
} else if self.ch('/').is_some() {
|
|
Some(Op::Divide)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn parse_op_expression(
|
|
&mut self,
|
|
mut parse_expr: impl FnMut(&mut Self) -> Option<Expression<'db>>,
|
|
mut op: impl FnMut(&mut Self) -> Option<Op>,
|
|
) -> Option<Expression<'db>> {
|
|
let start_position = self.skip_whitespace();
|
|
let mut expr1 = parse_expr(self)?;
|
|
|
|
while let Some(op) = op(self) {
|
|
let expr2 = parse_expr(self)?;
|
|
expr1 = Expression::new(
|
|
self.span_from(start_position),
|
|
ExpressionData::Op(Box::new(expr1), op, Box::new(expr2)),
|
|
);
|
|
}
|
|
|
|
Some(expr1)
|
|
}
|
|
|
|
/// Parses a "base expression" (no operators).
|
|
///
|
|
/// On failure, skips arbitrary tokens.
|
|
fn parse_expression2(&mut self) -> Option<Expression<'db>> {
|
|
let start_position = self.skip_whitespace();
|
|
if let Some(w) = self.word() {
|
|
if self.ch('(').is_some() {
|
|
let f = FunctionId::new(self.db, w);
|
|
let args = self.parse_expressions()?;
|
|
self.ch(')')?;
|
|
return Some(Expression::new(
|
|
self.span_from(start_position),
|
|
ExpressionData::Call(f, args),
|
|
));
|
|
}
|
|
|
|
let v = VariableId::new(self.db, w);
|
|
Some(Expression::new(
|
|
self.span_from(start_position),
|
|
ExpressionData::Variable(v),
|
|
))
|
|
} else if let Some(n) = self.number() {
|
|
Some(Expression::new(
|
|
self.span_from(start_position),
|
|
ExpressionData::Number(OrderedFloat::from(n)),
|
|
))
|
|
} else if self.ch('(').is_some() {
|
|
let expr = self.parse_expression()?;
|
|
self.ch(')')?;
|
|
Some(expr)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn parse_expressions(&mut self) -> Option<Vec<Expression<'db>>> {
|
|
let mut r = vec![];
|
|
loop {
|
|
let expr = self.parse_expression()?;
|
|
r.push(expr);
|
|
if self.ch(',').is_none() {
|
|
return Some(r);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parses a list of variable identifiers, like `a, b, c`.
|
|
/// No trailing commas because I am lazy.
|
|
///
|
|
/// On failure, skips arbitrary tokens.
|
|
fn parameters(&mut self) -> Option<Vec<VariableId<'db>>> {
|
|
let mut r = vec![];
|
|
loop {
|
|
let name = self.word()?;
|
|
let vid = VariableId::new(self.db, name);
|
|
r.push(vid);
|
|
|
|
if self.ch(',').is_none() {
|
|
return Some(r);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parses a single character.
|
|
///
|
|
/// Even on failure, only skips whitespace.
|
|
fn ch(&mut self, c: char) -> Option<Span<'db>> {
|
|
let start_position = self.skip_whitespace();
|
|
match self.peek() {
|
|
Some(p) if c == p => {
|
|
self.consume(c);
|
|
Some(self.span_from(start_position))
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Parses an identifier.
|
|
///
|
|
/// Even on failure, only skips whitespace.
|
|
fn word(&mut self) -> Option<String> {
|
|
self.skip_whitespace();
|
|
|
|
// In this loop, if we consume any characters, we always
|
|
// return `Some`.
|
|
let mut s = String::new();
|
|
let _position = self.position;
|
|
while let Some(ch) = self.peek() {
|
|
if ch.is_alphabetic() || ch == '_' || (!s.is_empty() && ch.is_numeric()) {
|
|
s.push(ch);
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
self.consume(ch);
|
|
}
|
|
|
|
if s.is_empty() {
|
|
None
|
|
} else {
|
|
Some(s)
|
|
}
|
|
}
|
|
|
|
/// Parses a number.
|
|
///
|
|
/// Even on failure, only skips whitespace.
|
|
fn number(&mut self) -> Option<f64> {
|
|
let _start_position = self.skip_whitespace();
|
|
|
|
self.probe(|this| {
|
|
// 👆 We need the call to `probe` here because we could consume
|
|
// some characters like `3.1.2.3`, invoke `str::parse`, and then
|
|
// still return `None`.
|
|
let mut s = String::new();
|
|
while let Some(ch) = this.peek() {
|
|
if ch.is_numeric() || ch == '.' {
|
|
s.push(ch);
|
|
} else {
|
|
break;
|
|
}
|
|
|
|
this.consume(ch);
|
|
}
|
|
|
|
if s.is_empty() {
|
|
None
|
|
} else if let Ok(n) = str::parse(&s) {
|
|
Some(n)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// ANCHOR: parse_string
|
|
/// Create a new database with the given source text and parse the result.
|
|
/// Returns the statements and the diagnostics generated.
|
|
#[cfg(test)]
|
|
fn parse_string(source_text: &str) -> String {
|
|
use salsa::Database as _;
|
|
|
|
crate::db::Database::default().attach(|db| {
|
|
// Create the source program
|
|
let source_program = SourceProgram::new(db, source_text.to_string());
|
|
|
|
// Invoke the parser
|
|
let statements = parse_statements(db, source_program);
|
|
|
|
// Read out any diagnostics
|
|
let accumulated = parse_statements::accumulated::<Diagnostic>(db, source_program);
|
|
|
|
// Format the result as a string and return it
|
|
format!("{:#?}", (statements, accumulated))
|
|
})
|
|
}
|
|
// ANCHOR_END: parse_string
|
|
|
|
// ANCHOR: parse_print
|
|
#[test]
|
|
fn parse_print() {
|
|
let actual = parse_string("print 1 + 2");
|
|
let expected = expect_test::expect![[r#"
|
|
(
|
|
Program {
|
|
[salsa id]: Id(0),
|
|
statements: [
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(4),
|
|
start: 0,
|
|
end: 11,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(3),
|
|
start: 6,
|
|
end: 11,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(0),
|
|
start: 6,
|
|
end: 7,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
1.0,
|
|
),
|
|
),
|
|
},
|
|
Add,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(2),
|
|
start: 10,
|
|
end: 11,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
2.0,
|
|
),
|
|
),
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
],
|
|
},
|
|
[],
|
|
)"#]];
|
|
expected.assert_eq(&actual);
|
|
}
|
|
// ANCHOR_END: parse_print
|
|
|
|
#[test]
|
|
fn parse_example() {
|
|
let actual = parse_string(
|
|
"
|
|
fn area_rectangle(w, h) = w * h
|
|
fn area_circle(r) = 3.14 * r * r
|
|
print area_rectangle(3, 4)
|
|
print area_circle(1)
|
|
print 11 * 2
|
|
",
|
|
);
|
|
let expected = expect_test::expect![[r#"
|
|
(
|
|
Program {
|
|
[salsa id]: Id(0),
|
|
statements: [
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(9),
|
|
start: 13,
|
|
end: 57,
|
|
},
|
|
data: Function(
|
|
Function {
|
|
[salsa id]: Id(0),
|
|
name: FunctionId {
|
|
text: "area_rectangle",
|
|
},
|
|
name_span: Span {
|
|
[salsa id]: Id(0),
|
|
start: 16,
|
|
end: 30,
|
|
},
|
|
args: [
|
|
VariableId {
|
|
text: "w",
|
|
},
|
|
VariableId {
|
|
text: "h",
|
|
},
|
|
],
|
|
body: Expression {
|
|
span: Span {
|
|
[salsa id]: Id(8),
|
|
start: 39,
|
|
end: 57,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(5),
|
|
start: 39,
|
|
end: 41,
|
|
},
|
|
data: Variable(
|
|
VariableId {
|
|
text: "w",
|
|
},
|
|
),
|
|
},
|
|
Multiply,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(7),
|
|
start: 43,
|
|
end: 57,
|
|
},
|
|
data: Variable(
|
|
VariableId {
|
|
text: "h",
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
},
|
|
),
|
|
},
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(21),
|
|
start: 57,
|
|
end: 102,
|
|
},
|
|
data: Function(
|
|
Function {
|
|
[salsa id]: Id(1),
|
|
name: FunctionId {
|
|
text: "area_circle",
|
|
},
|
|
name_span: Span {
|
|
[salsa id]: Id(10),
|
|
start: 60,
|
|
end: 71,
|
|
},
|
|
args: [
|
|
VariableId {
|
|
text: "r",
|
|
},
|
|
],
|
|
body: Expression {
|
|
span: Span {
|
|
[salsa id]: Id(20),
|
|
start: 77,
|
|
end: 102,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(17),
|
|
start: 77,
|
|
end: 86,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(14),
|
|
start: 77,
|
|
end: 81,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
3.14,
|
|
),
|
|
),
|
|
},
|
|
Multiply,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(16),
|
|
start: 84,
|
|
end: 86,
|
|
},
|
|
data: Variable(
|
|
VariableId {
|
|
text: "r",
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
Multiply,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(19),
|
|
start: 88,
|
|
end: 102,
|
|
},
|
|
data: Variable(
|
|
VariableId {
|
|
text: "r",
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
},
|
|
),
|
|
},
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(28),
|
|
start: 102,
|
|
end: 141,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(27),
|
|
start: 108,
|
|
end: 128,
|
|
},
|
|
data: Call(
|
|
FunctionId {
|
|
text: "area_rectangle",
|
|
},
|
|
[
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(23),
|
|
start: 123,
|
|
end: 124,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
3.0,
|
|
),
|
|
),
|
|
},
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(25),
|
|
start: 126,
|
|
end: 127,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
4.0,
|
|
),
|
|
),
|
|
},
|
|
],
|
|
),
|
|
},
|
|
),
|
|
},
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(33),
|
|
start: 141,
|
|
end: 174,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(32),
|
|
start: 147,
|
|
end: 161,
|
|
},
|
|
data: Call(
|
|
FunctionId {
|
|
text: "area_circle",
|
|
},
|
|
[
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(30),
|
|
start: 159,
|
|
end: 160,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
1.0,
|
|
),
|
|
),
|
|
},
|
|
],
|
|
),
|
|
},
|
|
),
|
|
},
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(38),
|
|
start: 174,
|
|
end: 195,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(37),
|
|
start: 180,
|
|
end: 186,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(34),
|
|
start: 180,
|
|
end: 182,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
11.0,
|
|
),
|
|
),
|
|
},
|
|
Multiply,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(36),
|
|
start: 185,
|
|
end: 186,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
2.0,
|
|
),
|
|
),
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
],
|
|
},
|
|
[],
|
|
)"#]];
|
|
expected.assert_eq(&actual);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_error() {
|
|
let source_text: &str = "print 1 + + 2";
|
|
// 0123456789^ <-- this is the position 10, where the error is reported
|
|
let actual = parse_string(source_text);
|
|
let expected = expect_test::expect![[r#"
|
|
(
|
|
Program {
|
|
[salsa id]: Id(0),
|
|
statements: [],
|
|
},
|
|
[
|
|
Diagnostic {
|
|
start: 10,
|
|
end: 11,
|
|
message: "unexpected character",
|
|
},
|
|
],
|
|
)"#]];
|
|
expected.assert_eq(&actual);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_precedence() {
|
|
// this parses as `(1 + (2 * 3)) + 4`
|
|
let source_text: &str = "print 1 + 2 * 3 + 4";
|
|
let actual = parse_string(source_text);
|
|
let expected = expect_test::expect![[r#"
|
|
(
|
|
Program {
|
|
[salsa id]: Id(0),
|
|
statements: [
|
|
Statement {
|
|
span: Span {
|
|
[salsa id]: Id(10),
|
|
start: 0,
|
|
end: 19,
|
|
},
|
|
data: Print(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(9),
|
|
start: 6,
|
|
end: 19,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(6),
|
|
start: 6,
|
|
end: 16,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(0),
|
|
start: 6,
|
|
end: 7,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
1.0,
|
|
),
|
|
),
|
|
},
|
|
Add,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(5),
|
|
start: 10,
|
|
end: 15,
|
|
},
|
|
data: Op(
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(2),
|
|
start: 10,
|
|
end: 11,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
2.0,
|
|
),
|
|
),
|
|
},
|
|
Multiply,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(4),
|
|
start: 14,
|
|
end: 15,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
3.0,
|
|
),
|
|
),
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
Add,
|
|
Expression {
|
|
span: Span {
|
|
[salsa id]: Id(8),
|
|
start: 18,
|
|
end: 19,
|
|
},
|
|
data: Number(
|
|
OrderedFloat(
|
|
4.0,
|
|
),
|
|
),
|
|
},
|
|
),
|
|
},
|
|
),
|
|
},
|
|
],
|
|
},
|
|
[],
|
|
)"#]];
|
|
expected.assert_eq(&actual);
|
|
}
|