initial commit
This commit is contained in:
commit
6673a24701
40 changed files with 22290 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
book
|
||||
target
|
2281
Cargo.lock
generated
Normal file
2281
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
31
Cargo.toml
Normal file
31
Cargo.toml
Normal file
|
@ -0,0 +1,31 @@
|
|||
[package]
|
||||
name = "libsrc"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[features]
|
||||
default = ["lalrpop"]
|
||||
|
||||
[build-dependencies]
|
||||
lalrpop = { version = "0.20.2", optional = true }
|
||||
anyhow = "1.0.45"
|
||||
phf_codegen = "0.10"
|
||||
tiny-keccak = { version = "2", features = ["sha3"] }
|
||||
|
||||
|
||||
[dependencies]
|
||||
salsa = { git = "https://github.com/salsa-rs/salsa.git", branch = "master", package = "salsa-2022" }
|
||||
salsa-macros = { git = "https://github.com/salsa-rs/salsa.git", branch = "master", package = "salsa-2022-macros" }
|
||||
insta = "1.38.0"
|
||||
lalrpop = "0.20.2"
|
||||
lalrpop-util = { version = "0.20.2", features = ["lexer", "unicode"] }
|
||||
okstd = { version = "0.1.0", path = "../okstd", features = [
|
||||
], default-features = false }
|
||||
proptest = "1.4.0"
|
||||
stringzilla = "3.8.1"
|
||||
syn = "2.0.60"
|
||||
bitflags = "2.5.0"
|
||||
[dev-dependencies]
|
||||
insta = "1.38.0"
|
||||
proptest = "1.4.0"
|
17
book.toml
Normal file
17
book.toml
Normal file
|
@ -0,0 +1,17 @@
|
|||
[book]
|
||||
authors = ["@sevki"]
|
||||
language = "en"
|
||||
multilingual = true
|
||||
src = "docs"
|
||||
|
||||
# additional css https://raw.githubusercontent.com/oknotokcomputer/okcss/main/ok.css
|
||||
[output.html]
|
||||
additional-css = ["ok.css"]
|
||||
theme = "../theme"
|
||||
default-theme = "dark"
|
||||
git-repository-url = "https://github.com/oknotokcomputer/roar"
|
||||
preferred-dark-theme = "rust"
|
||||
|
||||
[preprocessor.svgbob]
|
||||
|
||||
[preprocessor.alerts]
|
100
build.rs
Normal file
100
build.rs
Normal file
|
@ -0,0 +1,100 @@
|
|||
use core::panic;
|
||||
use std::fmt::Write as _;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||||
use std::path::PathBuf;
|
||||
use tiny_keccak::{Hasher, Sha3};
|
||||
|
||||
const SOURCE: &str = "src/parser/src.lalrpop";
|
||||
const TARGET: &str = "parser/src.rs";
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
println!("cargo:rerun-if-changed={SOURCE}");
|
||||
|
||||
try_lalrpop(SOURCE, TARGET)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn requires_lalrpop(source: &str, target: &str) -> bool {
|
||||
let target = if let Ok(target) = File::open(target) {
|
||||
target
|
||||
} else {
|
||||
println!("cargo:warning={TARGET} doesn't exist. regenerate.");
|
||||
return true;
|
||||
};
|
||||
|
||||
let sha_prefix = "// sha3: ";
|
||||
let sha3_line = BufReader::with_capacity(128, target)
|
||||
.lines()
|
||||
.find_map(|line| {
|
||||
let line = line.unwrap();
|
||||
line.starts_with(sha_prefix).then_some(line)
|
||||
})
|
||||
.expect("no sha3 line?");
|
||||
let expected_sha3_str = sha3_line.strip_prefix(sha_prefix).unwrap();
|
||||
|
||||
let actual_sha3 = {
|
||||
let mut hasher = Sha3::v256();
|
||||
let mut f = BufReader::new(File::open(source).unwrap());
|
||||
let mut line = String::new();
|
||||
while f.read_line(&mut line).unwrap() != 0 {
|
||||
if line.ends_with('\n') {
|
||||
line.pop();
|
||||
if line.ends_with('\r') {
|
||||
line.pop();
|
||||
}
|
||||
}
|
||||
hasher.update(line.as_bytes());
|
||||
hasher.update(b"\n");
|
||||
line.clear();
|
||||
}
|
||||
let mut hash = [0u8; 32];
|
||||
hasher.finalize(&mut hash);
|
||||
hash
|
||||
};
|
||||
let eq = sha_equal(expected_sha3_str, &actual_sha3);
|
||||
if !eq {
|
||||
println!("cargo:warning={TARGET} hash expected: {expected_sha3_str}");
|
||||
let mut actual_sha3_str = String::new();
|
||||
for byte in actual_sha3 {
|
||||
write!(actual_sha3_str, "{byte:02x}").unwrap();
|
||||
}
|
||||
println!("cargo:warning={TARGET} hash actual: {actual_sha3_str}");
|
||||
}
|
||||
!eq
|
||||
}
|
||||
|
||||
fn try_lalrpop(source: &str, target: &str) -> anyhow::Result<()> {
|
||||
if !requires_lalrpop(source, target) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
#[cfg(feature = "lalrpop")]
|
||||
{
|
||||
lalrpop::process_root().expect("running lalrpop failed");
|
||||
|
||||
let full_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(SOURCE);
|
||||
let path = full_path.to_str().unwrap();
|
||||
println!("cargo:rerun-if-changed={}", path);
|
||||
let p = lalrpop::Configuration::new()
|
||||
.generate_in_source_tree()
|
||||
.process_file(path).expect("msg");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "lalrpop"))]
|
||||
panic!("try: cargo build --manifest-path=compiler/parser/Cargo.toml --features=lalrpop");
|
||||
}
|
||||
|
||||
fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
|
||||
if expected_sha3_str.len() != 64 {
|
||||
panic!("lalrpop version? hash bug is fixed in 0.19.8");
|
||||
}
|
||||
|
||||
let mut expected_sha3 = [0u8; 32];
|
||||
for (i, b) in expected_sha3.iter_mut().enumerate() {
|
||||
*b = u8::from_str_radix(&expected_sha3_str[i * 2..][..2], 16).unwrap();
|
||||
}
|
||||
*actual_sha3 == expected_sha3
|
||||
}
|
43
docs/0intro.md
Normal file
43
docs/0intro.md
Normal file
|
@ -0,0 +1,43 @@
|
|||
<img src="taocp.png" align="right" width="200px" />
|
||||
|
||||
|
||||
# src Language
|
||||
|
||||
`src` is a domain specific language for manipulating source code and building, progressively distiributed apps or [PDA](https://fistfulofbytes.com/progressive-distributed-apps/).
|
||||
|
||||
It draws a lot of inspiration from [Effekt](https://www.effekt-lang.org/) and [Koka](https://koka-lang.github.io/koka/doc/kokaspec.html) languages.
|
||||
|
||||
`src` is main aim is to provide a gradually distributed programming
|
||||
environment for building software.
|
||||
|
||||
It tries to achive these goals by providing a thin veneer over the operating systems `libc` or equivalent by treating the syscalls to the operating system as effects.
|
||||
|
||||
Therefore the operating system becomes the [effect handler](https://effect-handlers.org/) for the execution environment.
|
||||
|
||||
```src
|
||||
use { host } from std
|
||||
|
||||
effect Make: async + throws + execs + reads + writes {
|
||||
catch() [throws]
|
||||
await<T>(f: Future<T>) [async, throws] -> T
|
||||
exec(arg0: string, args: stringvec) [Make] -> i32
|
||||
}
|
||||
|
||||
struct Local {
|
||||
host: host
|
||||
}
|
||||
|
||||
impl Make for Local {
|
||||
fn catch(self) [throws] {
|
||||
}
|
||||
fn await<T>(f: Future<T>) [async, trhows] -> T {
|
||||
yield()
|
||||
}
|
||||
fn exec(self, arg0: string, args: vec<string>) [Vm] -> i32 {
|
||||
self.host.read("jobserver").await
|
||||
if self.host.exec(arg0, args) {
|
||||
raise(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
5
docs/SUMMARY.md
Normal file
5
docs/SUMMARY.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Summary
|
||||
|
||||
- [Intro](0intro.md)
|
||||
- [Examples](examples.md)
|
||||
- [Language](language/0intro.md)
|
30
docs/examples.md
Normal file
30
docs/examples.md
Normal file
|
@ -0,0 +1,30 @@
|
|||
# Examples
|
||||
|
||||
```src
|
||||
use { native_fs, native_exec } from host
|
||||
use { fs } from std
|
||||
|
||||
struct Innitguv {
|
||||
fs: native_fs,
|
||||
exec: native_exec
|
||||
current_pid: i32
|
||||
}
|
||||
|
||||
impl Exec for Innitguv {
|
||||
fn exec(&self, arg0: str, args: vec<str>) [nd, exec, await] -> i32 {
|
||||
let path = arg0
|
||||
let pid = self.exec.exec(path, args)
|
||||
if pid == -1 {
|
||||
return -1
|
||||
}
|
||||
self.current_pid = pid
|
||||
yield()
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for Innitguv {
|
||||
fn recv(&self, msg: Message) [recv, await] {
|
||||
self.exec(msg.path, msg.args)
|
||||
}
|
||||
}
|
||||
```
|
6
docs/language/0intro.md
Normal file
6
docs/language/0intro.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Language
|
||||
|
||||
## Specification
|
||||
```rust,ignore
|
||||
{{#include ../../src/parser/src.lalrpop}}
|
||||
```
|
BIN
docs/taocp.png
Normal file
BIN
docs/taocp.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 822 KiB |
16
proptest-regressions/lexer/lexer_prop_tests.txt
Normal file
16
proptest-regressions/lexer/lexer_prop_tests.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 1c83167e024771d976df58234003dac45ba031f887e42fa2b061acf1a1370cb2 # shrinks to string = "\"#A\""
|
||||
cc fcd774e0a6ebcde805b7b7e5f476e7f93c232d572f0bf4c04e24da0e58cb1888 # shrinks to rnd = ("yzzwjqmyhiqofkfmuwzwibirqlbm", 7)
|
||||
cc f8cfea8e580b693c74835373498e4d90e0fb5999d6e8829e428af48ad1c41dea # shrinks to rnd = ("ebnicpjotxbnflxi", 4)
|
||||
cc 260dbc051bb574f18d0222da79697035cf1f9a0385d6e35082558a2173098aa4 # shrinks to rnd = ("djmooxmjjdocvtacweycdtky", 8)
|
||||
cc 3f2143c462bdd5113f5580c05acab2508c0ad23c9bc09811fc3b7477101c48ed # shrinks to rnd = ("aa", 1)
|
||||
cc 57751377005060ccb986d37941310b6a76eb50bab6bd26e774c42e319b01bdff # shrinks to rnd = ("a", 1)
|
||||
cc 1bb75d4dad1c2903dfbdc6ebc91c09af7fa197de6a0d454f4bf94e4ca2107ef7 # shrinks to rnd = ("pxqoxoktdyppluvr", 3)
|
||||
cc 861504c1bde04633ced8c7c2ba53300b17c5c33a33576455f31ea0381cd84822 # shrinks to rnd = ("ugetuobzjwwggmlvfaldp", 3)
|
||||
cc 61d685f319f26003a845170ab7aec02e26eb3696b0531bbc4eed3d9d139e5ed6 # shrinks to rnd = ("a", 1)
|
||||
cc f708e7be5415e8f960e050cd92279d78c905068f4307bdd97208910e5ed6f037 # shrinks to rnd = ("spvytpknoqtgglxefqbi", 1)
|
7
proptest-regressions/lexer/lexer_tests.txt
Normal file
7
proptest-regressions/lexer/lexer_tests.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 539eafd927ae06585e452c563dbd935506987f19333d5d97a7e47e769cb4fb78 # shrinks to input = "¡"
|
14
proptest-regressions/parser/parser_prop_tests.txt
Normal file
14
proptest-regressions/parser/parser_prop_tests.txt
Normal file
|
@ -0,0 +1,14 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc 53ed407a7836b80d1aeb8237943360dc417a3b4ff1d09cccf98ad30d4e5daf0b # shrinks to s = "self"
|
||||
cc 25c567f936eb3aef0681be73d32fc06584db01b9cb8be5ee01e5d99b75f183e5 # shrinks to s = 0.0
|
||||
cc 2c4f316af5e79461d0e55217d80b990b0bf3f5857bca9dc0116c15a113e721ea # shrinks to s = ("26496402", "8", "+")
|
||||
cc 10570f46a35ba69ae6f950da4381e32d5eea576a431fa733c68fe0de437bd1a9 # shrinks to s = 0, n = 0
|
||||
cc 849502590009964980943dc6b028f19129755e96a35e61fef820103c7a47141e # shrinks to s = "true", n = 208438984
|
||||
cc 3c31cefc4aa84bb451917c892bd0210df9d1c09c9860763bc56eb19fc93b660e # shrinks to s = "true", n = 0
|
||||
cc 8d077c98fec19684acd476a9013ce5c9c19e6f0f9bad1b187eddea4d4103ac21 # shrinks to s = "true"
|
||||
cc 8e3bd3186714efb1f2255d04318cb0f90a271cf4fdcba60a302b9210f41366cf # shrinks to s = ("-379538130", "0", "+")
|
14
proptest-regressions/parser/parser_tests.txt
Normal file
14
proptest-regressions/parser/parser_tests.txt
Normal file
|
@ -0,0 +1,14 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc f4b12d9127d638ad4a31ad4c95b20d10138f371fe1ae926525e66fdac917c823 # shrinks to s = 0
|
||||
cc 8479b82e178bfabc6b68933111be0bf965b5e6b258f0eac472b431518f39fca3 # shrinks to s = "\""
|
||||
cc 92cd10d9b071770069af466193eec50a9363a1a56469c922f05a71a3d25f2b20 # shrinks to s = "\\"
|
||||
cc 1b5e71738bc9301b8164e86be87ffa4819af75b3fab471597050fb8bbf044fbe # shrinks to s = "null"
|
||||
cc 39e17d0930f1f7ceae8389177f28a2c43b3233541c7a49854fc23635fcb8f216 # shrinks to s = ("a", "18447000000000000000")
|
||||
cc d4e50e6c8c2a24006e7a78ad8b3d23f24089e6f7eaa32f1e42afd83eb82a10fb # shrinks to s = ("4043826871", "2280852354", "*")
|
||||
cc c64ace410e7d4926e2910b3161690270c72c6dd321820e02791b879ed656437d # shrinks to s = "^"
|
||||
cc 6dde019e8b018f1bcf0eee378c4f0b24a2a412d357011a64770eca7ec4c17e6d # shrinks to s = "true", n = "\"\""
|
0
src/compiler/checks.rs
Normal file
0
src/compiler/checks.rs
Normal file
64
src/compiler/db.rs
Normal file
64
src/compiler/db.rs
Normal file
|
@ -0,0 +1,64 @@
|
|||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use salsa::DebugWithDb;
|
||||
|
||||
use crate::{
|
||||
lexer::{self, Lexer, Token},
|
||||
Db,
|
||||
};
|
||||
|
||||
use super::text::SourceProgram;
|
||||
|
||||
#[derive(Default)]
|
||||
#[salsa::db(crate::Jar)]
|
||||
pub(crate) struct Database {
|
||||
storage: salsa::Storage<Self>,
|
||||
|
||||
// The logs are only used for testing and demonstrating reuse:
|
||||
//
|
||||
logs: Option<Arc<Mutex<Vec<String>>>>,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
/// Enable logging of each salsa event.
|
||||
#[cfg(test)]
|
||||
pub fn enable_logging(self) -> Self {
|
||||
assert!(self.logs.is_none());
|
||||
Self {
|
||||
storage: self.storage,
|
||||
logs: Some(Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn take_logs(&mut self) -> Vec<String> {
|
||||
if let Some(logs) = &self.logs {
|
||||
std::mem::take(&mut *logs.lock().unwrap())
|
||||
} else {
|
||||
panic!("logs not enabled");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl salsa::Database for Database {
|
||||
fn salsa_event(&self, event: salsa::Event) {
|
||||
eprintln!("Event: {event:?}");
|
||||
// Log interesting events, if logging is enabled
|
||||
if let Some(logs) = &self.logs {
|
||||
// don't log boring events
|
||||
if let salsa::EventKind::WillExecute { .. } = event.kind {
|
||||
logs.lock()
|
||||
.unwrap()
|
||||
.push(format!("Event: {:?}", event.debug(self)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
impl salsa::ParallelDatabase for Database {
|
||||
fn snapshot(&self) -> salsa::Snapshot<Self> {
|
||||
salsa::Snapshot::new(Database {
|
||||
storage: self.storage.snapshot(),
|
||||
logs: self.logs.clone(),
|
||||
})
|
||||
}
|
||||
}
|
76
src/compiler/errors.rs
Normal file
76
src/compiler/errors.rs
Normal file
|
@ -0,0 +1,76 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use crate::Db;
|
||||
|
||||
use super::text::SourceProgram;
|
||||
|
||||
|
||||
pub struct Errors<'a>(Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>);
|
||||
|
||||
impl<'a> From<Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>>
|
||||
for Errors<'a>
|
||||
{
|
||||
fn from(
|
||||
errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>,
|
||||
) -> Self {
|
||||
Self(errors)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for Errors<'a> {
|
||||
type Item = Range<usize>;
|
||||
|
||||
type IntoIter = <Vec<std::ops::Range<usize>> as IntoIterator>::IntoIter;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0
|
||||
.into_iter()
|
||||
.map(|error| match error.error {
|
||||
lalrpop_util::ParseError::InvalidToken { location } => location..location,
|
||||
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => {
|
||||
location..location
|
||||
}
|
||||
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => token.0..token.2,
|
||||
lalrpop_util::ParseError::ExtraToken { token } => token.0..token.2,
|
||||
lalrpop_util::ParseError::User { error } => todo!(),
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn handle_errors(errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>>, src: &str) -> String {
|
||||
let mut pretty = String::new();
|
||||
let mut last_end = 0;
|
||||
|
||||
for error in errors {
|
||||
match error.error {
|
||||
lalrpop_util::ParseError::InvalidToken { location } => todo!(),
|
||||
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => todo!(),
|
||||
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
|
||||
// find the line and column of the start and end tokens,
|
||||
// and print the line with a caret pointing to the error
|
||||
let start = token.0;
|
||||
let end = token.2;
|
||||
let start_line = src[..start].rfind('\n').map_or(0, |i| i + 1);
|
||||
let end_line = src[end..].find('\n').map_or(src.len(), |i| end + i);
|
||||
let line = &src[start_line..end_line];
|
||||
let start_col = start - start_line;
|
||||
let end_col = end - start_line;
|
||||
// pretty.push_str(&src[last_end..start]);
|
||||
pretty.push_str(&format!("error: unexpected token {:?}, expected one of {:?}\n", token.1, expected));
|
||||
pretty.push_str(&line);
|
||||
pretty.push_str("\n");
|
||||
pretty.push_str(&" ".repeat(start_col));
|
||||
pretty.push_str(&"^".repeat(end_col - start_col));
|
||||
last_end = end;
|
||||
},
|
||||
lalrpop_util::ParseError::ExtraToken { token } => todo!(),
|
||||
lalrpop_util::ParseError::User { error } => todo!(),
|
||||
};
|
||||
|
||||
}
|
||||
// pretty.push_str(&src[last_end..]);
|
||||
pretty
|
||||
}
|
63
src/compiler/ir.rs
Normal file
63
src/compiler/ir.rs
Normal file
|
@ -0,0 +1,63 @@
|
|||
#![allow(clippy::needless_borrow)]
|
||||
|
||||
use salsa::*;
|
||||
use std::{
|
||||
array::IntoIter,
|
||||
collections::BTreeMap,
|
||||
path::Iter,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use crate::{parser::ast};
|
||||
|
||||
#[salsa::tracked]
|
||||
pub struct Program {
|
||||
#[return_ref]
|
||||
pub modul: Vec<Function>,
|
||||
#[return_ref]
|
||||
pub symbols: BTreeMap<Mangled, Symbol>,
|
||||
}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub struct Function {
|
||||
#[return_ref]
|
||||
pub name: String,
|
||||
|
||||
#[return_ref]
|
||||
pub body: Vec<Box<Function>>,
|
||||
|
||||
#[return_ref]
|
||||
pub effects: Vec<InternedEffect>,
|
||||
}
|
||||
|
||||
#[salsa::interned]
|
||||
pub struct InternedEffect {
|
||||
pub effect: String,
|
||||
}
|
||||
|
||||
#[salsa::interned]
|
||||
pub struct Symbol {
|
||||
#[return_ref]
|
||||
pub symbol: Mangled,
|
||||
}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub struct EffectDef {
|
||||
#[return_ref]
|
||||
pub effect: ast::EffectDef,
|
||||
}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub struct Import {
|
||||
#[return_ref]
|
||||
pub imports: Vec<String>,
|
||||
#[return_ref]
|
||||
pub module: String,
|
||||
}
|
||||
|
||||
#[salsa::interned]
|
||||
pub struct Mangled {
|
||||
#[return_ref]
|
||||
pub mangled: String,
|
||||
}
|
||||
|
81
src/compiler/mod.rs
Normal file
81
src/compiler/mod.rs
Normal file
|
@ -0,0 +1,81 @@
|
|||
use std::{
|
||||
collections::BTreeMap,
|
||||
ops::{Range, RangeBounds},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
compiler::{errors::Errors, text::{Position, Span, Spanned}},
|
||||
parser::ast::{self, EffectDef, Module},
|
||||
Db,
|
||||
};
|
||||
|
||||
use self::text::SourceProgram;
|
||||
|
||||
mod db;
|
||||
mod errors;
|
||||
pub mod ir;
|
||||
mod tests;
|
||||
pub mod text;
|
||||
|
||||
|
||||
#[salsa::tracked]
|
||||
pub fn compile(db: &dyn Db, src: SourceProgram) -> ir::Program {
|
||||
let mut errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>> = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(src.text(db));
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
// let mut errors_in_positions: Vec<ir::Position> = vec![];
|
||||
if !errors.is_empty() {
|
||||
for error_range in Into::<Errors>::into(errors) {
|
||||
text::to_spans(db, src);
|
||||
}
|
||||
panic!();
|
||||
}
|
||||
|
||||
let modul = t.unwrap();
|
||||
let mut symbol_table = BTreeMap::new();
|
||||
for toplevel in modul.0 {
|
||||
match *toplevel {
|
||||
ast::Expression::BinaryExpression(_) => todo!(),
|
||||
ast::Expression::Bool(_) => todo!(),
|
||||
ast::Expression::Integer(_) => todo!(),
|
||||
ast::Expression::Float(_) => todo!(),
|
||||
ast::Expression::Ident(_) => todo!(),
|
||||
ast::Expression::Binding(_) => todo!(),
|
||||
ast::Expression::FnCall(_) => todo!(),
|
||||
ast::Expression::String(_) => todo!(),
|
||||
ast::Expression::FnDef(_) => {}
|
||||
ast::Expression::ShellCommand(_, _) => todo!(),
|
||||
ast::Expression::EffectDef(_) => todo!(),
|
||||
ast::Expression::StructDef(_) => todo!(),
|
||||
ast::Expression::UseDef(usedef) => {
|
||||
let import =
|
||||
ir::Import::new(db, usedef.0.into_iter().map(|x| x.0).collect(), usedef.1 .0);
|
||||
for import in add_imports(db, import) {
|
||||
symbol_table.insert(import, ir::Symbol::new(db, import));
|
||||
}
|
||||
}
|
||||
ast::Expression::Keyword(_) => todo!(),
|
||||
ast::Expression::ImplDef(_) => todo!(),
|
||||
ast::Expression::Branch(_) => todo!(),
|
||||
ast::Expression::Error => todo!(),
|
||||
}
|
||||
}
|
||||
let program = ir::Program::new(db, vec![], symbol_table);
|
||||
|
||||
program
|
||||
}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub fn compile_effect(db: &dyn Db, effect: ir::EffectDef) {}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub fn add_imports(db: &dyn Db, import: ir::Import) -> Vec<ir::Mangled> {
|
||||
let mut mangled = vec![];
|
||||
for imp in import.imports(db) {
|
||||
mangled.push(ir::Mangled::new(
|
||||
db,
|
||||
format!("{}_{}", import.module(db), imp),
|
||||
));
|
||||
}
|
||||
mangled
|
||||
}
|
0
src/compiler/std/mod.rs
Normal file
0
src/compiler/std/mod.rs
Normal file
42
src/compiler/tests.rs
Normal file
42
src/compiler/tests.rs
Normal file
|
@ -0,0 +1,42 @@
|
|||
#[cfg(test)]
|
||||
#[okstd::test]
|
||||
fn debug() {
|
||||
use salsa::{database::AsSalsaDatabase, storage::HasJarsDyn};
|
||||
|
||||
use super::{db, text::SourceProgram};
|
||||
|
||||
let src = r#"use { native_fs, native_exec } from host
|
||||
use { fs } from std
|
||||
|
||||
struct Innitguv {
|
||||
fs: native_fs,
|
||||
exec: native_exec
|
||||
current_pid: i32
|
||||
}
|
||||
|
||||
impl Exec for Innitguv {
|
||||
fn exec(&self, arg0: str, args: vec<str>) [nd, exec, await] -> i32 {
|
||||
let path = arg0
|
||||
let pid = self.exec.exec(path, args)
|
||||
if pid == -1 {
|
||||
raise(-1)
|
||||
}
|
||||
self.current_pid = pid
|
||||
yield()
|
||||
}
|
||||
}
|
||||
|
||||
impl Actor for Innitguv {
|
||||
fn recv(&self, msg: Message) [recv, await] {
|
||||
self.exec(msg.path, msg.args)
|
||||
}
|
||||
}"#;
|
||||
let db = &crate::compiler::db::Database::default().enable_logging();
|
||||
|
||||
let prog = SourceProgram::new(db, src.to_string());
|
||||
let res = super::compile(db, prog);
|
||||
println!("{:?}", prog);
|
||||
println!("{:?}", res.symbols(db));
|
||||
let modul = res.modul(db);
|
||||
println!("{:?}", modul);
|
||||
}
|
155
src/compiler/text.rs
Normal file
155
src/compiler/text.rs
Normal file
|
@ -0,0 +1,155 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use bitflags::bitflags;
|
||||
use crate::Db;
|
||||
|
||||
/// Represents the source program text.
|
||||
#[salsa::input]
|
||||
pub struct SourceProgram {
|
||||
#[return_ref]
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
/// Represents a spanned piece of code.
|
||||
#[salsa::interned]
|
||||
pub struct Spanned {
|
||||
/// The span of the code.
|
||||
#[return_ref]
|
||||
pub span: Span,
|
||||
|
||||
/// The source program associated with the code.
|
||||
#[return_ref]
|
||||
pub src: SourceProgram,
|
||||
|
||||
/// The position of the code in the source program.
|
||||
#[return_ref]
|
||||
pub pos: Position,
|
||||
}
|
||||
|
||||
/// Represents a span of text.
|
||||
#[salsa::interned]
|
||||
pub struct Span {
|
||||
/// The range of the span in the source program text.
|
||||
pub span: (usize, usize),
|
||||
}
|
||||
|
||||
/// Represents a position in the source code.
|
||||
#[salsa::interned]
|
||||
pub struct Position {
|
||||
/// The line number of the position.
|
||||
l: usize,
|
||||
|
||||
/// The column number of the position.
|
||||
c: usize,
|
||||
}
|
||||
|
||||
/// Represents the source map of the program.
|
||||
#[salsa::tracked]
|
||||
pub struct SourceMap {
|
||||
#[return_ref]
|
||||
pub tokens: Vec<Spanned>,
|
||||
}
|
||||
|
||||
#[salsa::tracked]
|
||||
pub fn calculate_line_lengths(db: &dyn Db, src: SourceProgram) -> Vec<usize> {
|
||||
src.text(db).lines().map(|x| x.len()).collect()
|
||||
}
|
||||
|
||||
// spanoverlap is a bitflag that is used to determine how two spans overlap
|
||||
// it is used to determine if a token is within a line
|
||||
// it is rare a token will span multiple lines but it is possible
|
||||
bitflags! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct SpanOverlap: u8 {
|
||||
const NONE = 0b0000;
|
||||
const START = 0b0001;
|
||||
const END = 0b0010;
|
||||
const BOTH = 0b0011;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn cmp_range<T: Ord>(a: &Range<T>, b: &Range<T>) -> SpanOverlap {
|
||||
let mut overlap = SpanOverlap::NONE;
|
||||
if a.contains(&b.start) {
|
||||
overlap |= SpanOverlap::START;
|
||||
}
|
||||
if a.contains(&b.end) {
|
||||
overlap |= SpanOverlap::END;
|
||||
}
|
||||
overlap
|
||||
}
|
||||
|
||||
|
||||
/// todo(sevki): split this into two functions
|
||||
#[salsa::tracked]
|
||||
pub fn to_spans(db: &dyn Db, src: SourceProgram) -> SourceMap {
|
||||
let line_lengths: Vec<Range<usize>> = calculate_line_lengths(db, src)
|
||||
.into_iter()
|
||||
.scan(0, |acc, x| {
|
||||
let range = *acc..*acc + x;
|
||||
*acc += x;
|
||||
Some(range)
|
||||
})
|
||||
.collect();
|
||||
|
||||
// reverse the line lengths and make it peakable essentially
|
||||
// turinging it into a stack
|
||||
let mut line_lengths = line_lengths.into_iter().enumerate().rev().peekable();
|
||||
|
||||
let mut spans = vec![];
|
||||
|
||||
let lexer = crate::lexer::Lexer::new(src.text(db), 0);
|
||||
// this is sort of a zip~ish operation.
|
||||
// we have to arrays that we are iterating over. One is build cheaply, the line lengths
|
||||
// and the other is built expensively, the lexer.
|
||||
// Lexer tokens have a start and end position, and we want to map these to the line lengths
|
||||
// first we iterate over the lexer tokens
|
||||
for token in lexer {
|
||||
let size = token.end - token.start;
|
||||
// then we peek at the first line
|
||||
let mut start: Option<(usize, usize)> = None;
|
||||
loop {
|
||||
if let Some((line_no, span)) = line_lengths.clone().peek() {
|
||||
// if the token is within the line
|
||||
let overlap = cmp_range(&span, &(token.start..token.end));
|
||||
if overlap == SpanOverlap::NONE && start.is_none() {
|
||||
// if the token is not within the line
|
||||
line_lengths.next();
|
||||
}
|
||||
if overlap == SpanOverlap::START || overlap == SpanOverlap::BOTH {
|
||||
// if the token is within the line
|
||||
start = Some((*line_no, span.start));
|
||||
// we do not need to iterate more.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if start.is_none() {
|
||||
// if the token is not within the line
|
||||
break;
|
||||
}
|
||||
let start = start.unwrap();
|
||||
let leading_chars = src.text(db).get(start.1..token.start);
|
||||
let column = leading_chars.map(|x| x.chars().count()).unwrap_or(0);
|
||||
/*
|
||||
```text
|
||||
1,1 7
|
||||
| |
|
||||
# Intro
|
||||
8 lorem ipsum dolor sit amet
|
||||
│
|
||||
13 byte start
|
||||
6th column, 2nd line
|
||||
```
|
||||
*/
|
||||
spans.push(Spanned::new(
|
||||
db,
|
||||
Span::new(db, (token.start, token.end)),
|
||||
src,
|
||||
Position::new(db, start.0, column),
|
||||
));
|
||||
}
|
||||
SourceMap::new(db, spans)
|
||||
}
|
12
src/lexer/lexer_prop_tests.rs
Normal file
12
src/lexer/lexer_prop_tests.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
use super::*;
|
||||
use proptest::{num::i32, prelude::*};
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn test_strings(rnd in ("[a-z]+", 1..10)) {
|
||||
let input = format!(r#"let {} = "{}""#, rnd.0, rnd.1);
|
||||
let lexer = Lexer::new(&input, 0);
|
||||
let tokens: Vec<Spanned<Token, Position>> = lexer.collect();
|
||||
assert_eq!(tokens.len(), 4);
|
||||
}
|
||||
}
|
306
src/lexer/lexer_snap_tests.rs
Normal file
306
src/lexer/lexer_snap_tests.rs
Normal file
|
@ -0,0 +1,306 @@
|
|||
use crate::lexer::{Lexer, TokenStreamDisplay};
|
||||
|
||||
use insta::assert_snapshot;
|
||||
use okstd::prelude::*;
|
||||
|
||||
#[cfg(test)]
|
||||
#[okstd::test]
|
||||
fn test_empty_lexer() {
|
||||
let input = " ";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_1_plus_1() {
|
||||
let input = "1 + 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Integer(1), 1:1
|
||||
- Plus, 1:3
|
||||
- Integer(1), 1:5
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_1_plus_1_plus_1() {
|
||||
let input = "1 + 1 + 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Integer(1), 1:1
|
||||
- Plus, 1:3
|
||||
- Integer(1), 1:5
|
||||
- Plus, 1:7
|
||||
- Integer(1), 1:9
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_1_plus_1_plus_1_plus_1() {
|
||||
let input = "1 + 1 / 1 % 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Integer(1), 1:1
|
||||
- Plus, 1:3
|
||||
- Integer(1), 1:5
|
||||
- Divide, 1:7
|
||||
- Integer(1), 1:9
|
||||
- Percent, 1:11
|
||||
- Integer(1), 1:13
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_let_a_equals_1() {
|
||||
let input = "let a = 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Let), 1:3
|
||||
- Word(Ident("a")), 1:5
|
||||
- Equals, 1:7
|
||||
- Integer(1), 1:9
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_let_a_equals_1_plus_1() {
|
||||
let input = "let a = 1 + 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Let), 1:3
|
||||
- Word(Ident("a")), 1:5
|
||||
- Equals, 1:7
|
||||
- Integer(1), 1:9
|
||||
- Plus, 1:11
|
||||
- Integer(1), 1:13
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_let_a_equals_1_plus_3_point_14() {
|
||||
let input = "let a = 1 + 3.14";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Let), 1:3
|
||||
- Word(Ident("a")), 1:5
|
||||
- Equals, 1:7
|
||||
- Integer(1), 1:9
|
||||
- Plus, 1:11
|
||||
- Float(3.14), 1:16
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_let_a_equals_1_plus_3_point_14_plus_1() {
|
||||
let input = "let a = 1 + 3.14 + 1";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Let), 1:3
|
||||
- Word(Ident("a")), 1:5
|
||||
- Equals, 1:7
|
||||
- Integer(1), 1:9
|
||||
- Plus, 1:11
|
||||
- Float(3.14), 1:16
|
||||
- Plus, 1:18
|
||||
- Integer(1), 1:20
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo() {
|
||||
let input = "fn foo() {}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- RightParen, 1:8
|
||||
- LeftBrace, 1:10
|
||||
- RightBrace, 1:11
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo_bar() {
|
||||
let input = "fn foo(bar) {}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- RightParen, 1:11
|
||||
- LeftBrace, 1:13
|
||||
- RightBrace, 1:14
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo_bar_baz() {
|
||||
let input = "fn foo(bar, baz) {
|
||||
|
||||
}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- Comma, 1:11
|
||||
- Word(Ident("baz")), 1:15
|
||||
- RightParen, 1:16
|
||||
- LeftBrace, 1:18
|
||||
- NewLine, 2:0
|
||||
- NewLine, 3:0
|
||||
- RightBrace, 3:1
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo_bar_baz_qux() {
|
||||
let input = "fn foo(bar, baz) {
|
||||
qux()
|
||||
}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- Comma, 1:11
|
||||
- Word(Ident("baz")), 1:15
|
||||
- RightParen, 1:16
|
||||
- LeftBrace, 1:18
|
||||
- NewLine, 2:0
|
||||
- Word(Ident("qux")), 2:7
|
||||
- LeftParen, 2:8
|
||||
- RightParen, 2:9
|
||||
- NewLine, 3:0
|
||||
- RightBrace, 3:1
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo_bar_baz_qux_quux() {
|
||||
let input = "fn foo(bar, baz) {
|
||||
qux(quux)
|
||||
}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- Comma, 1:11
|
||||
- Word(Ident("baz")), 1:15
|
||||
- RightParen, 1:16
|
||||
- LeftBrace, 1:18
|
||||
- NewLine, 2:0
|
||||
- Word(Ident("qux")), 2:7
|
||||
- LeftParen, 2:8
|
||||
- Word(Ident("quux")), 2:12
|
||||
- RightParen, 2:13
|
||||
- NewLine, 3:0
|
||||
- RightBrace, 3:1
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_foo_bar_baz_qux_quux_quuz() {
|
||||
let input = "fn foo(bar, baz) {
|
||||
qux(quux, 3.14,0xdeadbeef)
|
||||
}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- Comma, 1:11
|
||||
- Word(Ident("baz")), 1:15
|
||||
- RightParen, 1:16
|
||||
- LeftBrace, 1:18
|
||||
- NewLine, 2:0
|
||||
- Word(Ident("qux")), 2:7
|
||||
- LeftParen, 2:8
|
||||
- Word(Ident("quux")), 2:12
|
||||
- Comma, 2:13
|
||||
- Float(3.14), 2:18
|
||||
- Comma, 2:19
|
||||
- Integer(3735928559), 2:29
|
||||
- RightParen, 2:30
|
||||
- NewLine, 3:0
|
||||
- RightBrace, 3:1
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_func_with_genetics() {
|
||||
let input = "fn foo<T>(bar: T)[throws, awaits, execs] {
|
||||
qux()
|
||||
}";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Fn), 1:2
|
||||
- Word(Ident("foo")), 1:6
|
||||
- LessThan, 1:7
|
||||
- Word(Ident("T")), 1:8
|
||||
- GreaterThan, 1:9
|
||||
- LeftParen, 1:10
|
||||
- Word(Ident("bar")), 1:13
|
||||
- Colon, 1:14
|
||||
- Word(Ident("T")), 1:16
|
||||
- RightParen, 1:17
|
||||
- LeftBracket, 1:18
|
||||
- Word(Ident("throws")), 1:24
|
||||
- Comma, 1:25
|
||||
- Word(Ident("awaits")), 1:32
|
||||
- Comma, 1:33
|
||||
- Word(Ident("execs")), 1:39
|
||||
- RightBracket, 1:40
|
||||
- LeftBrace, 1:42
|
||||
- NewLine, 2:0
|
||||
- Word(Ident("qux")), 2:7
|
||||
- LeftParen, 2:8
|
||||
- RightParen, 2:9
|
||||
- NewLine, 3:0
|
||||
- RightBrace, 3:1
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_func_call_with_genetics() {
|
||||
let input = "foo<T>(bar: T)[vm]";
|
||||
let lexer = Lexer::new(input, 0);
|
||||
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
|
||||
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
|
||||
- Word(Ident("foo")), 1:3
|
||||
- LessThan, 1:4
|
||||
- Word(Ident("T")), 1:5
|
||||
- GreaterThan, 1:6
|
||||
- LeftParen, 1:7
|
||||
- Word(Ident("bar")), 1:10
|
||||
- Colon, 1:11
|
||||
- Word(Ident("T")), 1:13
|
||||
- RightParen, 1:14
|
||||
- LeftBracket, 1:15
|
||||
- Word(Ident("vm")), 1:17
|
||||
- RightBracket, 1:18
|
||||
"###);
|
||||
}
|
987
src/lexer/mod.rs
Normal file
987
src/lexer/mod.rs
Normal file
|
@ -0,0 +1,987 @@
|
|||
/*
|
||||
lexer.rs is a lexer for the src language
|
||||
*/
|
||||
|
||||
use std::{fmt::Display, iter::Iterator, iter::Peekable, str::Chars};
|
||||
|
||||
use lalrpop_util::{
|
||||
lexer::Token as LAToken,
|
||||
state_machine::{ParserDefinition, TokenTriple},
|
||||
};
|
||||
use okstd::prelude::*;
|
||||
use syn::token;
|
||||
|
||||
// Identifier
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
pub enum Variable<'input> {
|
||||
// $$ is the process ID of the shell
|
||||
ProcessID,
|
||||
// $! is the process ID of the last background command
|
||||
LastBackgroundProcessID,
|
||||
// $? is the exit status of the last command executed
|
||||
LastCommandExitStatus,
|
||||
// $- is the current option flags as specified upon invocation, by the set built-in command, or by the shell invocation environment
|
||||
CurrentOptionFlags,
|
||||
// $@ is the positional parameters, starting from one
|
||||
PositionalParameters,
|
||||
// $# is the number of positional parameters in decimal
|
||||
PositionalParametersCount,
|
||||
// $0 is the name of the shell or shell script
|
||||
ShellName,
|
||||
// $1...$9 are the positional parameters, starting from zero
|
||||
PositionalParameter(usize),
|
||||
// ${parameter} is the value of the variable parameter
|
||||
Parameter(&'input str),
|
||||
// ${parameter:-word} is the value of the variable parameter if it is set; otherwise, the expansion of word is substituted
|
||||
ParameterDefault(&'input str, &'input str),
|
||||
}
|
||||
|
||||
impl<'input> Display for Variable<'input> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Variable::ProcessID => write!(f, "$$"),
|
||||
Variable::LastBackgroundProcessID => write!(f, "$!"),
|
||||
Variable::LastCommandExitStatus => write!(f, "$?"),
|
||||
Variable::CurrentOptionFlags => write!(f, "$-"),
|
||||
Variable::PositionalParameters => write!(f, "$@"),
|
||||
Variable::PositionalParametersCount => write!(f, "$#"),
|
||||
Variable::ShellName => write!(f, "$0"),
|
||||
Variable::PositionalParameter(i) => write!(f, "${}", i),
|
||||
Variable::Parameter(p) => write!(f, "${}", p),
|
||||
Variable::ParameterDefault(p, w) => write!(f, "${}:{}", p, w),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// LexicalError
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum LexicalError {
|
||||
// Unexpected character
|
||||
UnexpectedCharacter(char),
|
||||
// Unterminated string
|
||||
UnterminatedString,
|
||||
// Invalid number format
|
||||
InvalidNumberFormat,
|
||||
// Invalid variable format
|
||||
InvalidVariableFormat,
|
||||
// Unexpected end of input
|
||||
UnexpectedEndOfInput,
|
||||
// Invalid state emission
|
||||
InvalidStateEmission(State),
|
||||
}
|
||||
|
||||
type Result<T> = std::result::Result<T, LexicalError>;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Spanned<T, P = Position> {
|
||||
pub node: T,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
pub pos: P,
|
||||
}
|
||||
|
||||
impl Display for Spanned<Token<'_>> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{:?}, start: {} end: {}",
|
||||
self.node, self.start, self.end
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, P> Spanned<T, P> {
|
||||
pub fn new(node: T, start: usize, end: usize, pos: P) -> Self {
|
||||
Spanned {
|
||||
node,
|
||||
start,
|
||||
end,
|
||||
pos,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.end - self.start
|
||||
}
|
||||
}
|
||||
|
||||
// Position struct
|
||||
#[derive(Debug, PartialEq, Clone, Copy, Default)]
|
||||
pub struct Position {
|
||||
pub line: usize,
|
||||
pub col: usize,
|
||||
pub size: usize,
|
||||
}
|
||||
|
||||
// new function for Position
|
||||
impl Position {
|
||||
pub fn new(line: usize, col: usize, size: usize) -> Self {
|
||||
Self { line, col, size }
|
||||
}
|
||||
}
|
||||
|
||||
// display trait implementation for Position
|
||||
impl std::fmt::Display for Position {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"line: {}, col: {}, size: {}",
|
||||
self.line, self.col, self.size
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// display trait implementation for Token
|
||||
impl std::fmt::Display for Token<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Word<'input> {
|
||||
Let,
|
||||
Const,
|
||||
Fn,
|
||||
If,
|
||||
Else,
|
||||
Return,
|
||||
Match,
|
||||
For,
|
||||
While,
|
||||
Break,
|
||||
Continue,
|
||||
True,
|
||||
False,
|
||||
Null,
|
||||
Action,
|
||||
Enum,
|
||||
Impl,
|
||||
Import,
|
||||
None,
|
||||
Struct,
|
||||
Effect,
|
||||
When,
|
||||
Use,
|
||||
From,
|
||||
Where,
|
||||
Self_,
|
||||
Ident(&'input str),
|
||||
FnIdent(&'input str),
|
||||
Any(&'input str),
|
||||
}
|
||||
|
||||
impl<'input> Word<'input> {
|
||||
fn chars(&self) -> Chars<'_> {
|
||||
match self {
|
||||
Word::Let => "let".chars(),
|
||||
Word::Const => "const".chars(),
|
||||
Word::Fn => "fn".chars(),
|
||||
Word::If => "if".chars(),
|
||||
Word::Else => "else".chars(),
|
||||
Word::Return => "return".chars(),
|
||||
Word::Match => "match".chars(),
|
||||
Word::For => "for".chars(),
|
||||
Word::While => "while".chars(),
|
||||
Word::Break => "break".chars(),
|
||||
Word::Continue => "continue".chars(),
|
||||
Word::True => "true".chars(),
|
||||
Word::False => "false".chars(),
|
||||
Word::Null => "null".chars(),
|
||||
Word::When => "when".chars(),
|
||||
Word::Ident(ident) => ident.chars(),
|
||||
Word::FnIdent(ident) => ident.chars(),
|
||||
Word::Any(word) => word.chars(),
|
||||
Word::Action => "action".chars(),
|
||||
Word::Enum => "enum".chars(),
|
||||
Word::Impl => "impl".chars(),
|
||||
Word::Import => "import".chars(),
|
||||
Word::None => "none".chars(),
|
||||
Word::Struct => "struct".chars(),
|
||||
Word::Effect => "effect".chars(),
|
||||
Word::Use => "use".chars(),
|
||||
Word::From => "from".chars(),
|
||||
Word::Where => "where".chars(),
|
||||
Word::Self_ => "self".chars(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// token types debug
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token<'input> {
|
||||
// Operators
|
||||
Pipe, // |
|
||||
Ampersand, // &
|
||||
Semicolon, // ;
|
||||
Equals, // =
|
||||
// Redirections
|
||||
LessThan, // <
|
||||
GreaterThan, // >
|
||||
// Identifiers
|
||||
Variable(Variable<'input>), // $a-z, $A-Z, $0-9, $_
|
||||
// Literals
|
||||
Word(Word<'input>), // a-z, A-Z, 0-9, _
|
||||
String(&'input str), // "..."
|
||||
// Comments
|
||||
Comment(&'input str), // #
|
||||
// Numbers
|
||||
Integer(i64), // 0-9
|
||||
Float(f64), // 0-9
|
||||
// Special
|
||||
Eof, // EOF
|
||||
NewLine, // \n
|
||||
LeftParen, // (
|
||||
RightParen, // )
|
||||
LeftBrace, // {
|
||||
RightBrace, // }
|
||||
LeftBracket, // [
|
||||
RightBracket, // ]
|
||||
Comma, // ,
|
||||
Dot, // .
|
||||
Colon, // :
|
||||
Underscore, // _
|
||||
Minus, // -
|
||||
Plus, // +
|
||||
Arrow, // ->
|
||||
FatArrow, // =>
|
||||
Divide, // /
|
||||
Multiply, // *
|
||||
Percent, // %
|
||||
Dollar, // $
|
||||
Exclamation, // !
|
||||
Question, // ?
|
||||
Tilde, // ~
|
||||
At, // @
|
||||
Caret, // ^
|
||||
Shebang, // #!
|
||||
}
|
||||
|
||||
impl<'input> Token<'input> {
|
||||
fn to_chars(&'input self) -> Chars<'input> {
|
||||
match self {
|
||||
Token::Pipe => "|".chars(),
|
||||
Token::Ampersand => "&".chars(),
|
||||
Token::Semicolon => ";".chars(),
|
||||
Token::Equals => "=".chars(),
|
||||
Token::LessThan => "<".chars(),
|
||||
Token::GreaterThan => ">".chars(),
|
||||
Token::Variable(identifier) => {
|
||||
// Implement the conversion to chars for Variable
|
||||
// based on its fields
|
||||
"".chars()
|
||||
}
|
||||
Token::Word(word) => word.chars(),
|
||||
Token::String(string) => string.chars(),
|
||||
Token::Comment(comment) => comment.chars(),
|
||||
Token::Integer(number) => "".chars(),
|
||||
Token::Float(number) => "".chars(),
|
||||
Token::Eof => "".chars(),
|
||||
Token::NewLine => "\n".chars(),
|
||||
Token::LeftParen => "(".chars(),
|
||||
Token::RightParen => ")".chars(),
|
||||
Token::LeftBrace => "{".chars(),
|
||||
Token::RightBrace => "}".chars(),
|
||||
Token::LeftBracket => "[".chars(),
|
||||
Token::RightBracket => "]".chars(),
|
||||
Token::Comma => ",".chars(),
|
||||
Token::Colon => ":".chars(),
|
||||
Token::Underscore => "_".chars(),
|
||||
Token::Minus => "-".chars(),
|
||||
Token::Plus => "+".chars(),
|
||||
Token::Arrow => "->".chars(),
|
||||
Token::FatArrow => "=>".chars(),
|
||||
Token::Divide => "/".chars(),
|
||||
Token::Multiply => "*".chars(),
|
||||
Token::Percent => "%".chars(),
|
||||
Token::Dollar => "$".chars(),
|
||||
Token::Exclamation => "!".chars(),
|
||||
Token::Question => "?".chars(),
|
||||
Token::Tilde => "~".chars(),
|
||||
Token::At => "@".chars(),
|
||||
Token::Caret => "^".chars(),
|
||||
Token::Dot => ".".chars(),
|
||||
Token::Shebang => "#!".chars(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> Iterator for Token<'input> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.to_chars().next()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Lexer<'input> {
|
||||
input: &'input str,
|
||||
pos: usize,
|
||||
line: usize,
|
||||
col: usize,
|
||||
state: State,
|
||||
buffer: String,
|
||||
peekable: Peekable<Chars<'input>>,
|
||||
last_char: Option<char>,
|
||||
}
|
||||
|
||||
impl<'input> Lexer<'input> {
|
||||
pub fn new(input: &'input str, pos: usize) -> Self {
|
||||
Self {
|
||||
input,
|
||||
pos,
|
||||
line: 1,
|
||||
col: 1,
|
||||
state: State::Program,
|
||||
buffer: String::new(),
|
||||
peekable: input.chars().peekable(),
|
||||
last_char: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
enum Quotation {
|
||||
Single,
|
||||
Double,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
enum State {
|
||||
Comment,
|
||||
Eof,
|
||||
NewLine,
|
||||
String(Quotation),
|
||||
Op,
|
||||
Variable,
|
||||
Word,
|
||||
Number,
|
||||
Program,
|
||||
Shebang,
|
||||
Any,
|
||||
}
|
||||
macro_rules! set_state {
|
||||
($self:expr, $state:expr;) => {{
|
||||
$self.state = $state;
|
||||
}};
|
||||
}
|
||||
macro_rules! emit {
|
||||
($self:expr, $state:expr => ?) => {{
|
||||
let r = $self.emit_buffer().unwrap();
|
||||
$self.buffer.clear();
|
||||
emit!($self, $state => r)
|
||||
}};
|
||||
($self:expr, $state:expr => $token:expr) => {{
|
||||
let start = $self.pos;
|
||||
match $token {
|
||||
Token::Integer (number ) => {
|
||||
for c in number.to_string().chars() {
|
||||
debug!("c: {}", c);
|
||||
$self.advance(c);
|
||||
}
|
||||
}
|
||||
Token::Float ( number ) => {
|
||||
for c in number.to_string().chars() {
|
||||
$self.advance(c);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
for c in $token.to_chars() {
|
||||
$self.advance(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
let end = $self.pos;
|
||||
let pos = Position::new(
|
||||
$self.line,
|
||||
$self.col - $self.buffer.len() - 1,
|
||||
end - start,
|
||||
);
|
||||
$self.state = $state;
|
||||
let token = $token;
|
||||
let token = Spanned::new(token, start, end, pos);
|
||||
Ok(token)
|
||||
}};
|
||||
}
|
||||
|
||||
// Lexer trait implementation
|
||||
impl<'input> Lexer<'input> {
|
||||
pub fn input(&self) -> &'input str {
|
||||
self.input
|
||||
}
|
||||
|
||||
fn push(&mut self) -> bool {
|
||||
let c = self.peekable.next().unwrap();
|
||||
self.buffer.push(c);
|
||||
let finished = (self.pos as i32) + self.buffer.len() as i32 >= self.input.len() as i32;
|
||||
finished
|
||||
}
|
||||
|
||||
fn ignore(&mut self) -> bool {
|
||||
let c = self.peekable.next().unwrap();
|
||||
self.advance(c)
|
||||
}
|
||||
|
||||
fn advance(&mut self, c: char) -> bool {
|
||||
if self.pos + 1 > self.input.len() {
|
||||
unreachable!("pos: {}, input.len: {}", self.pos, self.input.len());
|
||||
}
|
||||
self.pos += 1;
|
||||
self.last_char = Some(c);
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
let finished = self.pos >= self.input.len();
|
||||
finished
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
return self.peekable.peek().copied();
|
||||
}
|
||||
|
||||
// emit emit's the current token
|
||||
fn emit_buffer(&mut self) -> Result<Token<'input>> {
|
||||
let start = self.pos;
|
||||
let end = self.pos + self.buffer.len();
|
||||
|
||||
match self.state {
|
||||
// these states cannot emit tokens
|
||||
State::Program => Err(LexicalError::InvalidStateEmission(State::Program)),
|
||||
State::Op => Ok(match self.buffer.chars().next().unwrap() {
|
||||
'(' => Token::LeftParen,
|
||||
')' => Token::RightParen,
|
||||
'{' => Token::LeftBrace,
|
||||
'}' => Token::RightBrace,
|
||||
'>' => Token::GreaterThan,
|
||||
'<' => Token::LessThan,
|
||||
'|' => Token::Pipe,
|
||||
'&' => Token::Ampersand,
|
||||
';' => Token::Semicolon,
|
||||
',' => Token::Comma,
|
||||
':' => Token::Colon,
|
||||
'_' => Token::Underscore,
|
||||
'+' => Token::Plus,
|
||||
'*' => Token::Multiply,
|
||||
'[' => Token::LeftBracket,
|
||||
']' => Token::RightBracket,
|
||||
'%' => Token::Percent,
|
||||
'@' => Token::At,
|
||||
'/' => Token::Divide,
|
||||
'-' => {
|
||||
if self.buffer.len() == 1 {
|
||||
Token::Minus
|
||||
} else if self.buffer == "->" {
|
||||
Token::Arrow
|
||||
} else {
|
||||
unreachable!("unexpected character: {}", self.buffer)
|
||||
}
|
||||
}
|
||||
'=' => Token::Equals,
|
||||
_ => unreachable!(
|
||||
"unexpected character: {} in state: {:?}",
|
||||
self.buffer, self.state
|
||||
),
|
||||
}),
|
||||
State::Any => Err(LexicalError::InvalidStateEmission(State::Any)),
|
||||
// these states can emit tokens
|
||||
State::Comment => {
|
||||
let comment = self
|
||||
.input
|
||||
.get(start..end)
|
||||
.ok_or(LexicalError::UnexpectedEndOfInput)?;
|
||||
Ok(Token::Comment(comment))
|
||||
}
|
||||
State::Variable => {
|
||||
let variable = self.buffer.clone();
|
||||
let identifier = match variable.as_str() {
|
||||
"$$" => Variable::ProcessID,
|
||||
"$?" => Variable::LastCommandExitStatus,
|
||||
"$!" => Variable::LastBackgroundProcessID,
|
||||
"$-" => Variable::CurrentOptionFlags,
|
||||
"$0" => Variable::ShellName,
|
||||
"$#" => Variable::PositionalParametersCount,
|
||||
_ => {
|
||||
if variable.starts_with('$') && variable.len() > 1 {
|
||||
let number = variable[1..]
|
||||
.parse()
|
||||
.map_err(|_| LexicalError::InvalidVariableFormat)?;
|
||||
Variable::PositionalParameter(number)
|
||||
} else {
|
||||
let var = self
|
||||
.input
|
||||
.get(start..end)
|
||||
.ok_or(LexicalError::UnexpectedEndOfInput)?;
|
||||
Variable::Parameter(var)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Token::Variable(identifier))
|
||||
}
|
||||
State::Word => {
|
||||
let word = self
|
||||
.input
|
||||
.get(start..end)
|
||||
.ok_or(LexicalError::UnexpectedEndOfInput)?;
|
||||
let word = match word {
|
||||
"let" => Word::Let,
|
||||
"const" => Word::Const,
|
||||
"fn" => Word::Fn,
|
||||
"if" => Word::If,
|
||||
"else" => Word::Else,
|
||||
"return" => Word::Return,
|
||||
"match" => Word::Match,
|
||||
"for" => Word::For,
|
||||
"while" => Word::While,
|
||||
"break" => Word::Break,
|
||||
"continue" => Word::Continue,
|
||||
"true" => Word::True,
|
||||
"false" => Word::False,
|
||||
"null" => Word::Null,
|
||||
"action" => Word::Action,
|
||||
"enum" => Word::Enum,
|
||||
"impl" => Word::Impl,
|
||||
"import" => Word::Import,
|
||||
"none" => Word::None,
|
||||
"struct" => Word::Struct,
|
||||
"effect" => Word::Effect,
|
||||
"when" => Word::When,
|
||||
"use" => Word::Use,
|
||||
"from" => Word::From,
|
||||
"where" => Word::Where,
|
||||
"self" => Word::Self_,
|
||||
_ => {
|
||||
Word::Ident(word)
|
||||
// }
|
||||
}
|
||||
};
|
||||
Ok(Token::Word(word))
|
||||
}
|
||||
State::String(Quotation) => {
|
||||
let last_char = self.buffer.chars().last();
|
||||
let quote = if Quotation == Quotation::Double {
|
||||
Some('"')
|
||||
} else {
|
||||
Some('\'')
|
||||
};
|
||||
if last_char != quote {
|
||||
panic!("expected: {:?}, got: {:?}", quote, last_char);
|
||||
return Err(LexicalError::UnterminatedString);
|
||||
}
|
||||
let string = self
|
||||
.input
|
||||
.get(start..end)
|
||||
.expect("shoulld've done something");
|
||||
Ok(Token::String(string))
|
||||
}
|
||||
State::Number => {
|
||||
let number = self.buffer.clone();
|
||||
if number.contains('.') {
|
||||
let float = number
|
||||
.parse()
|
||||
.map_err(|_| LexicalError::InvalidNumberFormat)?;
|
||||
|
||||
Ok(Token::Float(float))
|
||||
} else if number.starts_with("0x") {
|
||||
let integer = i64::from_str_radix(&number[2..], 16)
|
||||
.map_err(|_| LexicalError::InvalidNumberFormat)?;
|
||||
Ok(Token::Integer(integer))
|
||||
} else {
|
||||
let integer = number
|
||||
.parse()
|
||||
.map_err(|_| LexicalError::InvalidNumberFormat)?;
|
||||
Ok(Token::Integer(integer))
|
||||
}
|
||||
}
|
||||
State::NewLine => Ok(Token::NewLine),
|
||||
State::Eof => Ok(Token::Eof),
|
||||
State::Shebang => Ok(Token::Shebang),
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_ws(&mut self) -> Result<()> {
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
' ' => {
|
||||
self.ignore();
|
||||
}
|
||||
'\t' => {
|
||||
self.ignore();
|
||||
}
|
||||
'#' => {
|
||||
set_state!(self, State::Comment;);
|
||||
return Ok(());
|
||||
}
|
||||
'"' => {
|
||||
set_state!(self, State::String(Quotation::Double););
|
||||
return Ok(());
|
||||
}
|
||||
'\'' => {
|
||||
set_state!(self, State::String(Quotation::Single););
|
||||
return Ok(());
|
||||
}
|
||||
'$' => {
|
||||
set_state!(self, State::Variable;);
|
||||
return Ok(());
|
||||
}
|
||||
'a'..='z' | 'A'..='Z' | '_' => {
|
||||
set_state!(self, State::Word;);
|
||||
return Ok(());
|
||||
}
|
||||
'0'..='9' => {
|
||||
set_state!(self, State::Number;);
|
||||
return Ok(());
|
||||
}
|
||||
'\n' => {
|
||||
set_state!(self, State::NewLine;);
|
||||
return Ok(());
|
||||
}
|
||||
'(' | ')' | '{' | '}' | '>' | '<' | '|' | '&' | ';' | ',' | ':' | '+' | '*'
|
||||
| '[' | ']' | '%' | '@' | '/' | '-' | '=' | '!' => {
|
||||
set_state!(self, State::Op;);
|
||||
debug!("to state: {:?}", self.state);
|
||||
return Ok(());
|
||||
}
|
||||
_ => {
|
||||
return Err(LexicalError::UnexpectedCharacter(c))?;
|
||||
}
|
||||
}
|
||||
if self.pos >= self.input.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if self.pos >= self.input.len() {
|
||||
set_state!(self, State::Eof;);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn consume_op(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
if let Some(c) = self.peek() {
|
||||
debug!("consume_op: {}", c);
|
||||
if self.state != State::Op {
|
||||
return Err(LexicalError::InvalidStateEmission(self.state))?;
|
||||
}
|
||||
match c {
|
||||
'(' | ')' | '{' | '}' | '>' | '<' | '|' | '&' | ';' | ',' | ':' | '_' | '+'
|
||||
| '/' | '*' | '[' | ']' | '%' | '@' => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
'=' => {
|
||||
self.push();
|
||||
if let Some('>') = self.peek() {
|
||||
self.push();
|
||||
return emit!(self, State::Any => ?);
|
||||
} else {
|
||||
let state = if self.pos == self.input.len() {
|
||||
State::Eof
|
||||
} else {
|
||||
State::Any
|
||||
};
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
} // - and ->
|
||||
'-' => {
|
||||
self.push();
|
||||
match self.peek() {
|
||||
Some('>') => {
|
||||
self.push();
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
Some('0'..='9') => {
|
||||
set_state!(self, State::Number;);
|
||||
return self.consume_number();
|
||||
}
|
||||
_ => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
}
|
||||
}
|
||||
'/' => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
match self.peek() {
|
||||
Some(' ') => {
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
_ => {
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
}
|
||||
} // / and /directory/file
|
||||
'!' => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
if let Some('#') = self.peek() {
|
||||
self.push();
|
||||
return emit!(self, State::Any => ?);
|
||||
} else {
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("unexpected character: '{}'", c);
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
}
|
||||
}
|
||||
emit!(self, self.state=> Token::Eof)
|
||||
}
|
||||
|
||||
// comment state
|
||||
fn consume_comment(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
loop {
|
||||
match self.peek() {
|
||||
Some('!') => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
set_state!(self, State::Shebang;);
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
Some('\n') => {
|
||||
return emit!(self, State::NewLine => ?);
|
||||
}
|
||||
// if the option is none, break
|
||||
None => {
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
_ => {
|
||||
if self.push() {
|
||||
return emit!(self, State::Eof => ?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// consume word
|
||||
fn consume_word(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '.' | '/' | '_' => {
|
||||
if self.push() {
|
||||
return emit!(self, State::Eof => ?);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
|
||||
// consume number
|
||||
fn consume_number(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
debug!("consume_number");
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'0'..='9' => {
|
||||
if self.push() {
|
||||
debug!("finished");
|
||||
return emit!(self, State::Eof => ?);
|
||||
}
|
||||
}
|
||||
// . is only allowed once
|
||||
'.' => {
|
||||
if self.buffer.contains('.') {
|
||||
break;
|
||||
} else if self.push() {
|
||||
// this is a violation as it is not a number
|
||||
// so panic
|
||||
return Err(LexicalError::InvalidNumberFormat);
|
||||
}
|
||||
}
|
||||
// if the first character is a 0, then the next character can be x
|
||||
'x' => {
|
||||
if self.buffer.starts_with('0') {
|
||||
if self.push() {
|
||||
debug!("buffer: {}", self.buffer);
|
||||
return emit!(self, State::Number => ?);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// also hex numbers, only if the buffer starts with 0x
|
||||
'a'..='f' | 'A'..='F' => {
|
||||
if self.buffer.starts_with("0x") {
|
||||
if self.push() {
|
||||
debug!("buffer: {}", self.buffer);
|
||||
return emit!(self, State::Number => ?);
|
||||
}
|
||||
}
|
||||
// handle scientific notation
|
||||
else if self.buffer.contains(".") && c == 'e' {
|
||||
if self.push() {
|
||||
debug!("buffer: {}", self.buffer);
|
||||
return emit!(self, State::Number => ?);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
debug!("breaking");
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
}
|
||||
}
|
||||
return emit!(self, State::Eof => ?);
|
||||
}
|
||||
|
||||
fn consume_newline(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
match self.peek() {
|
||||
Some('\n') => {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
_ => {
|
||||
return emit!(self, State::Any => Token::NewLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume_string_literal(&mut self, quotation: Quotation) -> Result<Spanned<Token<'input>>> {
|
||||
// loop until the you see the same quotation mark as the one you started with
|
||||
// or if you see an escape character
|
||||
self.push();
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'"' => {
|
||||
if quotation == Quotation::Double {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
}
|
||||
'\'' => {
|
||||
if quotation == Quotation::Single {
|
||||
let state = if self.push() { State::Eof } else { State::Any };
|
||||
return emit!(self, state => ?);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if self.push() {
|
||||
self.state = State::Eof;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("unexpected state: {:?}", self.state);
|
||||
}
|
||||
|
||||
fn consume_variable(&mut self) -> Result<Spanned<Token<'input>>> {
|
||||
// ignore $
|
||||
self.ignore();
|
||||
while let Some(c) = self.peek() {
|
||||
match c {
|
||||
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '.' => {
|
||||
if self.push() {
|
||||
return emit!(self, State::Any => ?);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return emit!(self, State::Op => ?);
|
||||
}
|
||||
}
|
||||
|
||||
// Iterator Trait implementation for self<
|
||||
impl<'input> Iterator for Lexer<'input> {
|
||||
type Item = Spanned<Token<'input>>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.pos >= self.input.len() && self.state != State::Eof {
|
||||
self.state = State::Eof;
|
||||
return None;
|
||||
} else if self.pos >= self.input.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.state == State::Program || self.state == State::Any {
|
||||
self.skip_ws().unwrap();
|
||||
}
|
||||
let res = match self.state {
|
||||
State::Op => self.consume_op(),
|
||||
State::Comment => self.consume_comment(),
|
||||
State::Eof => {
|
||||
return None;
|
||||
}
|
||||
State::NewLine => self.consume_newline(),
|
||||
State::String(quotation) => self.consume_string_literal(quotation),
|
||||
State::Variable => self.consume_variable(),
|
||||
State::Word => self.consume_word(),
|
||||
State::Number => self.consume_number(),
|
||||
State::Any | State::Program => unreachable!(),
|
||||
State::Shebang => todo!(),
|
||||
};
|
||||
debug!(
|
||||
">>> state: {:?}, res: {:?}, pos: {}, line: {}, col: {}",
|
||||
self.state, res, self.pos, self.line, self.col
|
||||
);
|
||||
self.buffer.clear();
|
||||
match res {
|
||||
Ok(token) => {
|
||||
match token.node {
|
||||
Token::Eof => {
|
||||
return None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
return Some(token);
|
||||
}
|
||||
_ => {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
panic!("unexpected state: {:?}", self.state);
|
||||
}
|
||||
}
|
||||
|
||||
struct TokenStreamDisplay<'input>(Vec<Spanned<Token<'input>>>);
|
||||
|
||||
impl Display for TokenStreamDisplay<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
for token in &self.0 {
|
||||
write!(
|
||||
f,
|
||||
"- {}, {}:{}\n",
|
||||
token.node, token.pos.line, token.pos.col
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> From<Vec<Spanned<Token<'input>>>> for TokenStreamDisplay<'input> {
|
||||
fn from(tokens: Vec<Spanned<Token<'input>>>) -> Self {
|
||||
TokenStreamDisplay(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
mod lexer_prop_tests;
|
||||
mod lexer_snap_tests;
|
||||
|
||||
pub struct TripleIterator<'input>(Lexer<'input>);
|
||||
|
||||
impl<'input> TripleIterator<'input> {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
TripleIterator(Lexer::new(input, 0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'input> Iterator for TripleIterator<'input> {
|
||||
type Item = (usize, Token<'input>, usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let token = self.0.next()?;
|
||||
debug!("token: {:?}", token);
|
||||
Some((token.start, token.node, token.end))
|
||||
}
|
||||
}
|
33
src/lib.rs
Normal file
33
src/lib.rs
Normal file
|
@ -0,0 +1,33 @@
|
|||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod compiler;
|
||||
|
||||
use compiler::text;
|
||||
use parser::ast;
|
||||
|
||||
use crate::compiler::ir;
|
||||
|
||||
#[salsa::jar(db = Db)]
|
||||
pub struct Jar(
|
||||
compiler::compile,
|
||||
compiler::compile_effect,
|
||||
compiler::add_imports,
|
||||
text::to_spans,
|
||||
text::calculate_line_lengths,
|
||||
text::Span,
|
||||
text::Spanned,
|
||||
text::Position,
|
||||
text::SourceMap,
|
||||
text::SourceProgram,
|
||||
ir::Program,
|
||||
ir::Function,
|
||||
ir::InternedEffect,
|
||||
ir::Symbol,
|
||||
ir::EffectDef,
|
||||
ir::Import,
|
||||
ir::Mangled,
|
||||
);
|
||||
|
||||
pub trait Db: salsa::DbWithJar<Jar> {}
|
||||
|
||||
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}
|
188
src/parser/ast.rs
Normal file
188
src/parser/ast.rs
Normal file
|
@ -0,0 +1,188 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use proptest::prelude::*;
|
||||
|
||||
pub const ANON_FN_NAME: &str = "anonymous";
|
||||
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub struct Ident(pub String, pub Option<Vec<Ident>>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct StringLit(pub String);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Binding(pub Ident, pub Box<Expression>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Literal {
|
||||
Bool(bool),
|
||||
Float(f64),
|
||||
Integer(i64),
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Keyword {
|
||||
None,
|
||||
Some,
|
||||
Let,
|
||||
Action,
|
||||
Saga,
|
||||
Fn,
|
||||
If,
|
||||
Else,
|
||||
Match,
|
||||
Arrow,
|
||||
Struct,
|
||||
SelfValue,
|
||||
When,
|
||||
Effect,
|
||||
Impl,
|
||||
Use,
|
||||
From,
|
||||
Where,
|
||||
Self_,
|
||||
}
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Value {
|
||||
Literal(Literal),
|
||||
Ident(Ident),
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Block<T>(pub Vec<T>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Tuple<T>(pub Vec<T>);
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Array<T>(pub Vec<T>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct BinaryOperation {
|
||||
pub lhs: Box<Expression>,
|
||||
pub op: Operator,
|
||||
pub rhs: Box<Expression>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct FnCall(pub Ident, pub Vec<Box<Expression>>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Expression {
|
||||
BinaryExpression(BinaryOperation),
|
||||
Bool(bool),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
Ident(Ident),
|
||||
Binding(Binding),
|
||||
FnCall(FnCall),
|
||||
String(String),
|
||||
FnDef(FnDef),
|
||||
ShellCommand(Vec<Ident>, Vec<Box<Expression>>),
|
||||
EffectDef(EffectDef),
|
||||
StructDef(StructDef),
|
||||
UseDef(UseDef),
|
||||
Keyword(Keyword),
|
||||
ImplDef(ImplDef),
|
||||
Branch(Branch),
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Field(pub Ident, pub Ident);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum FnArg {
|
||||
Reciever,
|
||||
Field(Field)
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Prototype {
|
||||
pub name: Ident,
|
||||
pub args: Vec<FnArg>,
|
||||
pub ret: Option<Ident>,
|
||||
pub effects: Vec<Ident>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct FnDef(
|
||||
pub Prototype,
|
||||
pub Block<Box<Expression>>,
|
||||
pub Vec<(Ident, Block<Box<Expression>>)>,
|
||||
);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub enum Whitespace {
|
||||
Space,
|
||||
Tab,
|
||||
Newline,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug, Clone)]
|
||||
pub enum Operator {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Modulo,
|
||||
Increment,
|
||||
Decrement,
|
||||
Maybe,
|
||||
Not,
|
||||
Neg,
|
||||
}
|
||||
|
||||
impl Arbitrary for Operator {
|
||||
type Parameters = ();
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(_args: ()) -> Self::Strategy {
|
||||
prop_oneof![
|
||||
Just(Operator::Add),
|
||||
Just(Operator::Sub),
|
||||
Just(Operator::Mul),
|
||||
Just(Operator::Div),
|
||||
]
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Operator {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let op = match self {
|
||||
Operator::Add => "+",
|
||||
Operator::Sub => "-",
|
||||
Operator::Mul => "*",
|
||||
Operator::Div => "/",
|
||||
Operator::Modulo => "%",
|
||||
Operator::Increment => "++",
|
||||
Operator::Decrement => "--",
|
||||
Operator::Maybe => "?",
|
||||
Operator::Not => "!",
|
||||
Operator::Neg => "-",
|
||||
};
|
||||
write!(f, "{}", op)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct StructDef(pub Ident, pub Block<Field>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct FnIdent(pub Ident);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct EffectDef(pub Ident, pub Vec<Ident>, pub Block<Prototype>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct UseDef(pub Vec<Ident>, pub Ident);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct ImplDef(pub Ident, pub Option<Ident>, pub Block<Box<Expression>>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Branch(pub Box<Expression>, pub Vec<(Expression, Block<Box<Expression>>)>);
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Module(pub Vec<Box<Expression>>);
|
33
src/parser/errors.rs
Normal file
33
src/parser/errors.rs
Normal file
|
@ -0,0 +1,33 @@
|
|||
pub fn pretty_errors<'input>(src: &'input str, errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>>) -> String {
|
||||
let mut pretty = String::new();
|
||||
let mut last_end = 0;
|
||||
for error in errors {
|
||||
match error.error {
|
||||
lalrpop_util::ParseError::InvalidToken { location } => todo!(),
|
||||
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => todo!(),
|
||||
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
|
||||
// find the line and column of the start and end tokens,
|
||||
// and print the line with a caret pointing to the error
|
||||
let start = token.0;
|
||||
let end = token.2;
|
||||
let start_line = src[..start].rfind('\n').map_or(0, |i| i + 1);
|
||||
let end_line = src[end..].find('\n').map_or(src.len(), |i| end + i);
|
||||
let line = &src[start_line..end_line];
|
||||
let start_col = start - start_line;
|
||||
let end_col = end - start_line;
|
||||
// pretty.push_str(&src[last_end..start]);
|
||||
pretty.push_str(&format!("error: unexpected token {:?}, expected one of {:?}\n", token.1, expected));
|
||||
pretty.push_str(&line);
|
||||
pretty.push_str("\n");
|
||||
pretty.push_str(&" ".repeat(start_col));
|
||||
pretty.push_str(&"^".repeat(end_col - start_col));
|
||||
last_end = end;
|
||||
},
|
||||
lalrpop_util::ParseError::ExtraToken { token } => todo!(),
|
||||
lalrpop_util::ParseError::User { error } => todo!(),
|
||||
};
|
||||
|
||||
}
|
||||
// pretty.push_str(&src[last_end..]);
|
||||
pretty
|
||||
}
|
8
src/parser/mod.rs
Normal file
8
src/parser/mod.rs
Normal file
|
@ -0,0 +1,8 @@
|
|||
use lalrpop_util::lalrpop_mod;
|
||||
|
||||
mod parser_snap_tests;
|
||||
mod string;
|
||||
pub mod ast;
|
||||
mod errors;
|
||||
|
||||
pub mod src;
|
111
src/parser/parser_snap_tests.rs
Normal file
111
src/parser/parser_snap_tests.rs
Normal file
|
@ -0,0 +1,111 @@
|
|||
use crate::lexer::Lexer;
|
||||
use crate::parser::errors::pretty_errors;
|
||||
use insta::assert_snapshot;
|
||||
use okstd::prelude::*;
|
||||
|
||||
#[cfg(test)]
|
||||
#[okstd::test]
|
||||
fn test_empty_parser() {
|
||||
let input = " ";
|
||||
let mut errors = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
assert!(errors.is_empty());
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()), @r###"
|
||||
Module(
|
||||
[],
|
||||
)
|
||||
"###);
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_call_parser_with_multiple_args_and_strings() {
|
||||
let input = "fn some()[] {let a = some_fnExpr(1, \"2\", 3)}";
|
||||
let mut errors = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
if !errors.is_empty() {
|
||||
panic!("{}", pretty_errors(&input, errors));
|
||||
}
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()));
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_fn_def_parser() {
|
||||
let input = r#"fn call(a:b, b:c) [throws, awaits, execs] {
|
||||
call(1+1)
|
||||
let a = 1
|
||||
} when throws {
|
||||
raise(1)
|
||||
}"#;
|
||||
let mut errors = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
assert!(errors.is_empty());
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()));
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_effect() {
|
||||
let input = r#"effect VM: async + throws + execs {
|
||||
catch() []
|
||||
await<T>(f: Future<T>) [] -> T
|
||||
exec(arg0: string, args: stringvec) []
|
||||
}"#;
|
||||
let mut errors = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
assert!(errors.is_empty());
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()));
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_struct_parser() {
|
||||
let input = r#"struct VM {
|
||||
a: string
|
||||
b: string
|
||||
}"#;
|
||||
let mut errors = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
assert!(errors.is_empty());
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()));
|
||||
}
|
||||
|
||||
#[okstd::test]
|
||||
fn test_enum_parser() {
|
||||
let input = r#"use { exec } from host
|
||||
|
||||
effect Make: async + throws + execs + reads + writes {
|
||||
catch() [throws]
|
||||
await<T>(f: Future<T>) [async, throws] -> T
|
||||
exec(arg0: string, args: stringvec) [Make] -> i32
|
||||
read(name: string) [reads] -> string
|
||||
write(name: string, value: string) [writes]
|
||||
}
|
||||
|
||||
struct Local {
|
||||
host: host
|
||||
}
|
||||
|
||||
impl Make for Local {
|
||||
fn catch(self) [throws] {
|
||||
}
|
||||
fn await<T>(f: Future<T>) [async, trhows] -> T {
|
||||
yield()
|
||||
}
|
||||
fn exec(self, arg0: string, args: vec<string>) [Vm] -> i32 {
|
||||
self.host.read("jobserver")
|
||||
if self.host.exec(arg0, args) {
|
||||
raise(1)
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
let mut errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>> = vec![];
|
||||
let wrapper = crate::lexer::TripleIterator::new(input);
|
||||
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
|
||||
if !errors.is_empty() {
|
||||
panic!("{}", pretty_errors(&input, errors));
|
||||
}
|
||||
assert_snapshot!(format!("{:#?}", t.unwrap()));
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
EffectDef(
|
||||
EffectDef(
|
||||
Ident(
|
||||
"VM",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"execs",
|
||||
None,
|
||||
),
|
||||
],
|
||||
Block(
|
||||
[
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"catch",
|
||||
None,
|
||||
),
|
||||
args: [],
|
||||
ret: None,
|
||||
effects: [],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"await",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"f",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"Future",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"stringvec",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: None,
|
||||
effects: [],
|
||||
},
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
|
@ -0,0 +1,384 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
UseDef(
|
||||
UseDef(
|
||||
[
|
||||
Ident(
|
||||
"crosmvm",
|
||||
None,
|
||||
),
|
||||
],
|
||||
Ident(
|
||||
"std",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
EffectDef(
|
||||
EffectDef(
|
||||
Ident(
|
||||
"Vm",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"execs",
|
||||
None,
|
||||
),
|
||||
],
|
||||
Block(
|
||||
[
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"catch",
|
||||
None,
|
||||
),
|
||||
args: [],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"await",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"f",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"Future",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"stringvec",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"i32",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"Vm",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
StructDef(
|
||||
StructDef(
|
||||
Ident(
|
||||
"coopvm",
|
||||
None,
|
||||
),
|
||||
Block(
|
||||
[],
|
||||
),
|
||||
),
|
||||
),
|
||||
ImplDef(
|
||||
ImplDef(
|
||||
Ident(
|
||||
"Vm",
|
||||
None,
|
||||
),
|
||||
Some(
|
||||
Ident(
|
||||
"coopvm",
|
||||
None,
|
||||
),
|
||||
),
|
||||
Block(
|
||||
[
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"catch",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Reciever,
|
||||
],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"await",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"f",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"Future",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"trhows",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"yield",
|
||||
None,
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Reciever,
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"vec",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"i32",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"Vm",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
Branch(
|
||||
Branch(
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"self.exec",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Ident(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
),
|
||||
Ident(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
[
|
||||
(
|
||||
Bool(
|
||||
true,
|
||||
),
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"raise",
|
||||
None,
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
|
@ -0,0 +1,492 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
assertion_line: 110
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
UseDef(
|
||||
UseDef(
|
||||
[
|
||||
Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
],
|
||||
Ident(
|
||||
"host",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
EffectDef(
|
||||
EffectDef(
|
||||
Ident(
|
||||
"Make",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"execs",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"reads",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"writes",
|
||||
None,
|
||||
),
|
||||
],
|
||||
Block(
|
||||
[
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"catch",
|
||||
None,
|
||||
),
|
||||
args: [],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"await",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"f",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"Future",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"stringvec",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"i32",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"Make",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"read",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"name",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"reads",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"write",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"name",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"value",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"writes",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
StructDef(
|
||||
StructDef(
|
||||
Ident(
|
||||
"Local",
|
||||
None,
|
||||
),
|
||||
Block(
|
||||
[
|
||||
Field(
|
||||
Ident(
|
||||
"host",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"host",
|
||||
None,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
ImplDef(
|
||||
ImplDef(
|
||||
Ident(
|
||||
"Make",
|
||||
None,
|
||||
),
|
||||
Some(
|
||||
Ident(
|
||||
"Local",
|
||||
None,
|
||||
),
|
||||
),
|
||||
Block(
|
||||
[
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"catch",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Reciever,
|
||||
],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"await",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"f",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"Future",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"T",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"async",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"trhows",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"yield",
|
||||
None,
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"exec",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Reciever,
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"vec",
|
||||
Some(
|
||||
[
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: Some(
|
||||
Ident(
|
||||
"i32",
|
||||
None,
|
||||
),
|
||||
),
|
||||
effects: [
|
||||
Ident(
|
||||
"Vm",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"self.host.read",
|
||||
None,
|
||||
),
|
||||
[
|
||||
String(
|
||||
"jobserver",
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
Branch(
|
||||
Branch(
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"self.host.exec",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Ident(
|
||||
Ident(
|
||||
"arg0",
|
||||
None,
|
||||
),
|
||||
),
|
||||
Ident(
|
||||
Ident(
|
||||
"args",
|
||||
None,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
[
|
||||
(
|
||||
Bool(
|
||||
true,
|
||||
),
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"raise",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Integer(
|
||||
1,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
|
@ -0,0 +1,53 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"some",
|
||||
None,
|
||||
),
|
||||
args: [],
|
||||
ret: None,
|
||||
effects: [],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
Binding(
|
||||
Binding(
|
||||
Ident(
|
||||
"a",
|
||||
None,
|
||||
),
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"some_fnExpr",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Integer(
|
||||
1,
|
||||
),
|
||||
String(
|
||||
"2",
|
||||
),
|
||||
Integer(
|
||||
3,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[],
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
|
@ -0,0 +1,120 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
FnDef(
|
||||
FnDef(
|
||||
Prototype {
|
||||
name: Ident(
|
||||
"call",
|
||||
None,
|
||||
),
|
||||
args: [
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"a",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"b",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Field(
|
||||
Ident(
|
||||
"b",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"c",
|
||||
None,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
ret: None,
|
||||
effects: [
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"awaits",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"execs",
|
||||
None,
|
||||
),
|
||||
],
|
||||
},
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"call",
|
||||
None,
|
||||
),
|
||||
[
|
||||
BinaryExpression(
|
||||
BinaryOperation {
|
||||
lhs: Integer(
|
||||
1,
|
||||
),
|
||||
op: Add,
|
||||
rhs: Integer(
|
||||
1,
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
Binding(
|
||||
Binding(
|
||||
Ident(
|
||||
"a",
|
||||
None,
|
||||
),
|
||||
Integer(
|
||||
1,
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
[
|
||||
(
|
||||
Ident(
|
||||
"throws",
|
||||
None,
|
||||
),
|
||||
Block(
|
||||
[
|
||||
FnCall(
|
||||
FnCall(
|
||||
Ident(
|
||||
"raise",
|
||||
None,
|
||||
),
|
||||
[
|
||||
Integer(
|
||||
1,
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
source: src/parser/parser_snap_tests.rs
|
||||
expression: "format!(\"{:#?}\", t.unwrap())"
|
||||
---
|
||||
Module(
|
||||
[
|
||||
StructDef(
|
||||
StructDef(
|
||||
Ident(
|
||||
"VM",
|
||||
None,
|
||||
),
|
||||
Block(
|
||||
[
|
||||
Field(
|
||||
Ident(
|
||||
"a",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
Field(
|
||||
Ident(
|
||||
"b",
|
||||
None,
|
||||
),
|
||||
Ident(
|
||||
"string",
|
||||
None,
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
362
src/parser/src.lalrpop
Normal file
362
src/parser/src.lalrpop
Normal file
|
@ -0,0 +1,362 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
use crate::parser::string::apply_string_escapes;
|
||||
use super::ast::*;
|
||||
use lalrpop_util::{ErrorRecovery, ParseError};
|
||||
use crate::lexer::{Position, Token, Word, Variable};
|
||||
use okstd::prelude::*;
|
||||
|
||||
#[LALR]
|
||||
grammar<'input, 'err>(errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, &'static str>>);
|
||||
|
||||
extern {
|
||||
type Location = usize;
|
||||
|
||||
enum Token<'input> {
|
||||
// Operators
|
||||
"|" => Token::Pipe, // |
|
||||
"&" => Token::Ampersand, // &
|
||||
";" => Token::Semicolon, // ;
|
||||
"=" => Token::Equals, // =
|
||||
// Redirections
|
||||
"<" => Token::LessThan, // <
|
||||
">" => Token::GreaterThan, // >
|
||||
// Identifiers
|
||||
// "param" => Variable::Parameter(<&'input str>), // var
|
||||
// "param_default" => Variable::ParameterDefault(<&'input str>, <&'input str>), // var = value
|
||||
// "positional_param" => Variable::PositionalParameter(<usize>), // $var
|
||||
// Literals
|
||||
"true" => Token::Word(Word::True), // true
|
||||
"none" => Token::Word(Word::None), // none
|
||||
"false" => Token::Word(Word::False), // false
|
||||
"null" => Token::Word(Word::Null), // null
|
||||
"fn" => Token::Word(Word::Fn), // fn
|
||||
"if" => Token::Word(Word::If), // if
|
||||
"else" => Token::Word(Word::Else), // else
|
||||
"match" => Token::Word(Word::Match), // match
|
||||
"let" => Token::Word(Word::Let), // let
|
||||
"import" => Token::Word(Word::Import), // import
|
||||
"action" => Token::Word(Word::Action), // action
|
||||
"struct" => Token::Word(Word::Struct), // struct
|
||||
"enum" => Token::Word(Word::Enum), // enum
|
||||
"effect" => Token::Word(Word::Effect), // trait
|
||||
"impl" => Token::Word(Word::Impl), // impl
|
||||
"when" => Token::Word(Word::When), // when
|
||||
"use" => Token::Word(Word::Use), // use
|
||||
"from" => Token::Word(Word::From), // from
|
||||
"where" => Token::Word(Word::Where), // where
|
||||
"self" => Token::Word(Word::Self_), // self
|
||||
"for" => Token::Word(Word::For), // for
|
||||
|
||||
"#!" => Token::Shebang, // #!
|
||||
|
||||
"ident" => Token::Word(Word::Ident(<&'input str>)), // a-z, A-Z, 0-9, _
|
||||
"string" => Token::String(<&'input str>), // "..."
|
||||
// Comments
|
||||
"comment" => Token::Comment(<&'input str>), // #
|
||||
// Numbers
|
||||
"int" => Token::Integer(<i64>), // 0-9
|
||||
"float" => Token::Float(<f64>), // [0-9]*.0-9+
|
||||
// Special
|
||||
"eof" => Token::Eof, // EOF
|
||||
"\n" => Token::NewLine, // \n
|
||||
"(" => Token::LeftParen, // (
|
||||
")" => Token::RightParen, // )
|
||||
"{" => Token::LeftBrace, // {
|
||||
"}" => Token::RightBrace, // }
|
||||
"[" => Token::LeftBracket, // [
|
||||
"]" => Token::RightBracket, // ]
|
||||
"," => Token::Comma, // ,
|
||||
":" => Token::Colon, // :
|
||||
"." => Token::Dot, // .
|
||||
"-" => Token::Minus, // -
|
||||
"+" => Token::Plus, // +
|
||||
"/" => Token::Divide, // /
|
||||
"*" => Token::Multiply, // *
|
||||
"%" => Token::Percent, // %
|
||||
"$" => Token::Dollar, // $
|
||||
"!" => Token::Exclamation, // !
|
||||
"?" => Token::Question, // ?
|
||||
"~" => Token::Tilde, // ~
|
||||
"@" => Token::At, // @
|
||||
"^" => Token::Caret, // ^
|
||||
"->" => Token::Arrow, // ->
|
||||
"=>" => Token::FatArrow, // =>
|
||||
}
|
||||
}
|
||||
|
||||
Spanned<T>: (usize, T, usize) = {
|
||||
<@L> <T> <@R> => (<>)
|
||||
};
|
||||
|
||||
Path<T>: Vec<Ident> = {
|
||||
<mut v:("/" <T>)*> <e:T?> => match e {
|
||||
None => v,
|
||||
Some(e) => {
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Lines<T>: Vec<T> = {
|
||||
<mut v:(<T> "\n")*> <e:T?> => match e {
|
||||
None => v,
|
||||
Some(e) => {
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Comma<T>: Vec<T> = { // (0)
|
||||
<mut v:(<T> ",")*> <e:T?> => match e { // (1)
|
||||
None=> v,
|
||||
Some(e) => {
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Plus<T>: Vec<T> = {
|
||||
<mut v:(<T> "+")*> <e:T?> => match e { // (1)
|
||||
None=> v,
|
||||
Some(e) => {
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
None: Expression = "none" => Expression::Keyword(Keyword::None);
|
||||
When: Expression = "when" => Expression::Keyword(Keyword::When);
|
||||
Fn: Expression = "fn" => Expression::Keyword(Keyword::Fn);
|
||||
Let: Expression = "let" => Expression::Keyword(Keyword::Let);
|
||||
Effect: Expression = "effect" => Expression::Keyword(Keyword::Effect);
|
||||
Struct: Expression = "struct" => Expression::Keyword(Keyword::Struct);
|
||||
Impl: Expression = "impl" => Expression::Keyword(Keyword::Impl);
|
||||
Use: Expression = "use" => Expression::Keyword(Keyword::Use);
|
||||
From: Expression = "from" => Expression::Keyword(Keyword::From);
|
||||
Where: Expression = "where" => Expression::Keyword(Keyword::Where);
|
||||
Self_: Expression = "self" => Expression::Keyword(Keyword::Self_);
|
||||
Else: Expression = "else" => Expression::Keyword(Keyword::Else);
|
||||
If: Expression = "if" => Expression::Keyword(Keyword::If);
|
||||
True: Expression = "true" => Expression::Bool(true);
|
||||
False: Expression = "false" => Expression::Bool(false);
|
||||
|
||||
Consecutive<L, R>: bool = {
|
||||
Consecutive<L, R> <lhs:Spanned<L>> <rhs:Spanned<R>> => match l.2 == r.0,
|
||||
L => false
|
||||
};
|
||||
|
||||
Ident: Ident = {
|
||||
<i:"ident"> => Ident(i.to_string(), None),
|
||||
};
|
||||
|
||||
IdentWithGenerics: Ident = {
|
||||
<i:"ident"> "<" <g:Comma<Ident>> ">" => Ident(i.to_string(), Some(g)),
|
||||
};
|
||||
|
||||
IdentOrIdentWithGenerics: Ident = {
|
||||
<i:Ident> => i,
|
||||
<i:IdentWithGenerics> => i,
|
||||
};
|
||||
|
||||
Punctuated<T, Token>: Vec<T> = {
|
||||
<mut v:(<T> <Token>)*> <e:T?> => match e {
|
||||
None => v,
|
||||
Some(e) => {
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Atom: Value = {
|
||||
#[precedence(level="0")]
|
||||
<i:"int"> => Value::Literal(Literal::Integer(i)),
|
||||
<f:"float"> => Value::Literal(Literal::Float(f)),
|
||||
<s:"string"> => {
|
||||
let start = 1;
|
||||
let end = s.len() - 1;
|
||||
Value::Literal(Literal::String(s.get(start..end).expect(format!("malformed string {s}, strings must be quoted").as_str()).to_string()))
|
||||
},
|
||||
#[precedence(level="1")]
|
||||
<i:Ident> => Value::Ident(i),
|
||||
};
|
||||
|
||||
Expression: Box<Expression> = {
|
||||
#[precedence(level="1")]
|
||||
Term,
|
||||
|
||||
#[precedence(level="2")] #[assoc(side="left")]
|
||||
<lhs:Expression> "*" <rhs:Expression> => {
|
||||
Box::new(Expression::BinaryExpression(BinaryOperation {
|
||||
lhs,
|
||||
op: Operator::Mul,
|
||||
rhs
|
||||
}))
|
||||
},
|
||||
<lhs:Expression> "/" <rhs:Expression> => {
|
||||
Box::new(Expression::BinaryExpression(BinaryOperation {
|
||||
lhs,
|
||||
op: Operator::Div,
|
||||
rhs
|
||||
}))
|
||||
},
|
||||
|
||||
#[precedence(level="3")] #[assoc(side="left")]
|
||||
<lhs:Expression> "+" <rhs:Expression> => {
|
||||
Box::new(Expression::BinaryExpression(BinaryOperation {
|
||||
lhs,
|
||||
op: Operator::Add,
|
||||
rhs
|
||||
}))
|
||||
},
|
||||
<lhs:Expression> "-" <rhs:Expression> => {
|
||||
Box::new(Expression::BinaryExpression(BinaryOperation {
|
||||
lhs,
|
||||
op: Operator::Sub,
|
||||
rhs
|
||||
}))
|
||||
},
|
||||
}
|
||||
|
||||
Field: Field = {
|
||||
<name:Ident> ":" <ty:IdentOrIdentWithGenerics> => Field(name, ty)
|
||||
}
|
||||
|
||||
TypeParameters: Vec<Ident> =
|
||||
"<" <Comma<Ident>> ">";
|
||||
|
||||
FnArg: FnArg = {
|
||||
<self_:Self_> => FnArg::Reciever,
|
||||
<field:Field> => FnArg::Field(field),
|
||||
}
|
||||
|
||||
Prototype: Prototype = {
|
||||
<name:IdentOrIdentWithGenerics> "("<args:Comma<FnArg>> ")" "[" <effects:Comma<Ident>> "]" <ret:("->" Ident)?> => {
|
||||
let ret = match ret {
|
||||
None => None,
|
||||
Some(r) => Some(r.1),
|
||||
};
|
||||
Prototype{name, args, ret, effects}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String: Box<Expression> = {
|
||||
<s:"string"> => {
|
||||
let start = 1;
|
||||
let end = s.len() - 1;
|
||||
Box::new(Expression::String(s.get(start..end).expect(format!("malformed string {s}, strings must be quoted").as_str()).to_string()))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
FnCall: Box<Expression> = {
|
||||
<name:IdentOrIdentWithGenerics> "(" <args:Comma<Expression>> ")" <i:("[" <Ident> "]")?> => Box::new(Expression::FnCall(FnCall(name, args)))
|
||||
}
|
||||
|
||||
Term: Box<Expression> = {
|
||||
<s:String> => <>,
|
||||
<val:"int"> => {
|
||||
Box::new(Expression::Integer(val))
|
||||
},
|
||||
<i:Ident> => {
|
||||
Box::new(Expression::Ident(i))
|
||||
},
|
||||
<f:FnCall> => <>,
|
||||
<true_:True> => Box::new(<>),
|
||||
<false_:False> => Box::new(<>),
|
||||
"(" <Expression> ")",
|
||||
}
|
||||
|
||||
Block<T>: Block<T> = {
|
||||
"{" ("\n"?) <lines:Lines<T>> "}" => Block(lines)
|
||||
}
|
||||
|
||||
WhenBlock: (Ident,Block<Box<Expression>>) = {
|
||||
When <i:Ident> <lines:Block<Statement>> => (i, lines)
|
||||
}
|
||||
|
||||
|
||||
FnDef: Box<Expression> = {
|
||||
Fn <proto:Prototype> <block:Block<Statement>> <handlers:(WhenBlock)*> => Box::new(Expression::FnDef(FnDef(proto, block, handlers))),
|
||||
}
|
||||
|
||||
EffectDef: Box<Expression> = {
|
||||
Effect <i:Ident> ":" <effects:Plus<Ident>> <block:Block<Prototype>> => Box::new(Expression::EffectDef(EffectDef(i, effects,block))),
|
||||
}
|
||||
|
||||
ShellComponent: Box<Expression> = {
|
||||
<s:"string"> => Box::new(Expression::String((s).to_string())),
|
||||
<i:Ident> => Box::new(Expression::Ident(i)),
|
||||
}
|
||||
|
||||
ShebangLine: Box<Expression> = {
|
||||
"#!" <path:("/" Ident)*> <components:(ShellComponent)*> => Box::new(Expression::ShellCommand(path.into_iter().map(|i| i.1.into()).collect(), components)),
|
||||
}
|
||||
|
||||
StructDef: Box<Expression> = {
|
||||
Struct <i:Ident> <fields:Block<Field>> => Box::new(Expression::StructDef(StructDef(i, fields))),
|
||||
}
|
||||
|
||||
IfDef: Box<Expression> = {
|
||||
If <cond:Expression> <if_:Block<Statement>> => {
|
||||
let branch = Branch (
|
||||
cond,
|
||||
vec![
|
||||
(Expression::Bool(true), if_),
|
||||
]
|
||||
);
|
||||
Box::new(Expression::Branch(branch))
|
||||
},
|
||||
If <cond:Expression> <if_:Block<Statement>> Else <else_:Block<Statement>> => {
|
||||
let branch = Branch (
|
||||
cond,
|
||||
vec![
|
||||
(Expression::Bool(true), if_),
|
||||
(Expression::Bool(false), else_),
|
||||
]
|
||||
);
|
||||
Box::new(Expression::Branch(branch))
|
||||
},
|
||||
}
|
||||
|
||||
Statement: Box<Expression> = {
|
||||
#[precedence(level="0")]
|
||||
<FnCall> => <>,
|
||||
#[precedence(level="1")]
|
||||
Let <name:Ident> "=" <value:Expression> => Box::new(Expression::Binding(Binding(name, value))),
|
||||
<IfDef> => <>,
|
||||
}
|
||||
|
||||
UseDef: Box<Expression> = {
|
||||
Use "{" <imports:Comma<Ident>> "}" From <i:Ident> => Box::new(Expression::UseDef(UseDef(imports, i))),
|
||||
}
|
||||
|
||||
ImplDef: Box<Expression> = {
|
||||
Impl <i:Ident> <t:("for" Ident)?> <lines:Block<FnDef>> => Box::new(Expression::ImplDef(ImplDef(i, t.map(|t| t.1), lines))),
|
||||
}
|
||||
|
||||
|
||||
TopLevel: Box<Expression> = {
|
||||
<FnDef> => <>,
|
||||
<EffectDef> => <>,
|
||||
<StructDef> => <>,
|
||||
<UseDef> => <>,
|
||||
<ImplDef> => <>,
|
||||
}
|
||||
|
||||
pub Source: Module = {
|
||||
<expr:("\n"* TopLevel)*> => Module(expr.into_iter().map(|e| e.1).collect()),
|
||||
! => {
|
||||
errors.push(<>);
|
||||
Module(vec![])
|
||||
}
|
||||
};
|
12968
src/parser/src.rs
Normal file
12968
src/parser/src.rs
Normal file
File diff suppressed because it is too large
Load diff
46
src/parser/string.rs
Normal file
46
src/parser/string.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use std::str::Chars;
|
||||
use stringzilla::{sz, StringZilla};
|
||||
use syn::spanned::Spanned as _;
|
||||
|
||||
use crate::lexer::{self, Spanned};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ParseError {
|
||||
UnexpectedChar(char),
|
||||
UnterminatedString,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
ParseError::UnexpectedChar(c) => write!(f, "unexpected character: '{}'", c),
|
||||
ParseError::UnterminatedString => write!(f, "unterminated string"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseError {}
|
||||
|
||||
pub fn apply_string_escapes(content: &str) -> std::borrow::Cow<str> {
|
||||
let s = syn::LitStr::new(content, content.span());
|
||||
s.token().to_string().into()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_apply_string_escapes() {
|
||||
assert_eq!(apply_string_escapes(r#"hello"#), "\"hello\"");
|
||||
}
|
||||
#[test]
|
||||
fn test_apply_string_escapes_with_escaped_quote() {
|
||||
assert_eq!(apply_string_escapes(r#"hello" world"#), r#""hello\" world""#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_apply_string_escapes_with_escaped_backslash() {
|
||||
assert_eq!(apply_string_escapes(r#"hello\" world"#), r#""hello\\\" world""#);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue