initial commit

This commit is contained in:
sevki 2024-05-24 12:15:04 +01:00
commit 6673a24701
40 changed files with 22290 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
book
target

2281
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

31
Cargo.toml Normal file
View file

@ -0,0 +1,31 @@
[package]
name = "libsrc"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = ["lalrpop"]
[build-dependencies]
lalrpop = { version = "0.20.2", optional = true }
anyhow = "1.0.45"
phf_codegen = "0.10"
tiny-keccak = { version = "2", features = ["sha3"] }
[dependencies]
salsa = { git = "https://github.com/salsa-rs/salsa.git", branch = "master", package = "salsa-2022" }
salsa-macros = { git = "https://github.com/salsa-rs/salsa.git", branch = "master", package = "salsa-2022-macros" }
insta = "1.38.0"
lalrpop = "0.20.2"
lalrpop-util = { version = "0.20.2", features = ["lexer", "unicode"] }
okstd = { version = "0.1.0", path = "../okstd", features = [
], default-features = false }
proptest = "1.4.0"
stringzilla = "3.8.1"
syn = "2.0.60"
bitflags = "2.5.0"
[dev-dependencies]
insta = "1.38.0"
proptest = "1.4.0"

17
book.toml Normal file
View file

@ -0,0 +1,17 @@
[book]
authors = ["@sevki"]
language = "en"
multilingual = true
src = "docs"
# additional css https://raw.githubusercontent.com/oknotokcomputer/okcss/main/ok.css
[output.html]
additional-css = ["ok.css"]
theme = "../theme"
default-theme = "dark"
git-repository-url = "https://github.com/oknotokcomputer/roar"
preferred-dark-theme = "rust"
[preprocessor.svgbob]
[preprocessor.alerts]

100
build.rs Normal file
View file

@ -0,0 +1,100 @@
use core::panic;
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::PathBuf;
use tiny_keccak::{Hasher, Sha3};
const SOURCE: &str = "src/parser/src.lalrpop";
const TARGET: &str = "parser/src.rs";
fn main() -> anyhow::Result<()> {
println!("cargo:rerun-if-changed={SOURCE}");
try_lalrpop(SOURCE, TARGET)?;
Ok(())
}
fn requires_lalrpop(source: &str, target: &str) -> bool {
let target = if let Ok(target) = File::open(target) {
target
} else {
println!("cargo:warning={TARGET} doesn't exist. regenerate.");
return true;
};
let sha_prefix = "// sha3: ";
let sha3_line = BufReader::with_capacity(128, target)
.lines()
.find_map(|line| {
let line = line.unwrap();
line.starts_with(sha_prefix).then_some(line)
})
.expect("no sha3 line?");
let expected_sha3_str = sha3_line.strip_prefix(sha_prefix).unwrap();
let actual_sha3 = {
let mut hasher = Sha3::v256();
let mut f = BufReader::new(File::open(source).unwrap());
let mut line = String::new();
while f.read_line(&mut line).unwrap() != 0 {
if line.ends_with('\n') {
line.pop();
if line.ends_with('\r') {
line.pop();
}
}
hasher.update(line.as_bytes());
hasher.update(b"\n");
line.clear();
}
let mut hash = [0u8; 32];
hasher.finalize(&mut hash);
hash
};
let eq = sha_equal(expected_sha3_str, &actual_sha3);
if !eq {
println!("cargo:warning={TARGET} hash expected: {expected_sha3_str}");
let mut actual_sha3_str = String::new();
for byte in actual_sha3 {
write!(actual_sha3_str, "{byte:02x}").unwrap();
}
println!("cargo:warning={TARGET} hash actual: {actual_sha3_str}");
}
!eq
}
fn try_lalrpop(source: &str, target: &str) -> anyhow::Result<()> {
if !requires_lalrpop(source, target) {
return Ok(());
}
#[cfg(feature = "lalrpop")]
{
lalrpop::process_root().expect("running lalrpop failed");
let full_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(SOURCE);
let path = full_path.to_str().unwrap();
println!("cargo:rerun-if-changed={}", path);
let p = lalrpop::Configuration::new()
.generate_in_source_tree()
.process_file(path).expect("msg");
Ok(())
}
#[cfg(not(feature = "lalrpop"))]
panic!("try: cargo build --manifest-path=compiler/parser/Cargo.toml --features=lalrpop");
}
fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
if expected_sha3_str.len() != 64 {
panic!("lalrpop version? hash bug is fixed in 0.19.8");
}
let mut expected_sha3 = [0u8; 32];
for (i, b) in expected_sha3.iter_mut().enumerate() {
*b = u8::from_str_radix(&expected_sha3_str[i * 2..][..2], 16).unwrap();
}
*actual_sha3 == expected_sha3
}

43
docs/0intro.md Normal file
View file

@ -0,0 +1,43 @@
<img src="taocp.png" align="right" width="200px" />
# src Language
`src` is a domain specific language for manipulating source code and building, progressively distiributed apps or [PDA](https://fistfulofbytes.com/progressive-distributed-apps/).
It draws a lot of inspiration from [Effekt](https://www.effekt-lang.org/) and [Koka](https://koka-lang.github.io/koka/doc/kokaspec.html) languages.
`src` is main aim is to provide a gradually distributed programming
environment for building software.
It tries to achive these goals by providing a thin veneer over the operating systems `libc` or equivalent by treating the syscalls to the operating system as effects.
Therefore the operating system becomes the [effect handler](https://effect-handlers.org/) for the execution environment.
```src
use { host } from std
effect Make: async + throws + execs + reads + writes {
catch() [throws]
await<T>(f: Future<T>) [async, throws] -> T
exec(arg0: string, args: stringvec) [Make] -> i32
}
struct Local {
host: host
}
impl Make for Local {
fn catch(self) [throws] {
}
fn await<T>(f: Future<T>) [async, trhows] -> T {
yield()
}
fn exec(self, arg0: string, args: vec<string>) [Vm] -> i32 {
self.host.read("jobserver").await
if self.host.exec(arg0, args) {
raise(1)
}
}
}
```

5
docs/SUMMARY.md Normal file
View file

@ -0,0 +1,5 @@
# Summary
- [Intro](0intro.md)
- [Examples](examples.md)
- [Language](language/0intro.md)

30
docs/examples.md Normal file
View file

@ -0,0 +1,30 @@
# Examples
```src
use { native_fs, native_exec } from host
use { fs } from std
struct Innitguv {
fs: native_fs,
exec: native_exec
current_pid: i32
}
impl Exec for Innitguv {
fn exec(&self, arg0: str, args: vec<str>) [nd, exec, await] -> i32 {
let path = arg0
let pid = self.exec.exec(path, args)
if pid == -1 {
return -1
}
self.current_pid = pid
yield()
}
}
impl Actor for Innitguv {
fn recv(&self, msg: Message) [recv, await] {
self.exec(msg.path, msg.args)
}
}
```

6
docs/language/0intro.md Normal file
View file

@ -0,0 +1,6 @@
# Language
## Specification
```rust,ignore
{{#include ../../src/parser/src.lalrpop}}
```

BIN
docs/taocp.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 822 KiB

2982
ok.css Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,16 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 1c83167e024771d976df58234003dac45ba031f887e42fa2b061acf1a1370cb2 # shrinks to string = "\"#A\""
cc fcd774e0a6ebcde805b7b7e5f476e7f93c232d572f0bf4c04e24da0e58cb1888 # shrinks to rnd = ("yzzwjqmyhiqofkfmuwzwibirqlbm", 7)
cc f8cfea8e580b693c74835373498e4d90e0fb5999d6e8829e428af48ad1c41dea # shrinks to rnd = ("ebnicpjotxbnflxi", 4)
cc 260dbc051bb574f18d0222da79697035cf1f9a0385d6e35082558a2173098aa4 # shrinks to rnd = ("djmooxmjjdocvtacweycdtky", 8)
cc 3f2143c462bdd5113f5580c05acab2508c0ad23c9bc09811fc3b7477101c48ed # shrinks to rnd = ("aa", 1)
cc 57751377005060ccb986d37941310b6a76eb50bab6bd26e774c42e319b01bdff # shrinks to rnd = ("a", 1)
cc 1bb75d4dad1c2903dfbdc6ebc91c09af7fa197de6a0d454f4bf94e4ca2107ef7 # shrinks to rnd = ("pxqoxoktdyppluvr", 3)
cc 861504c1bde04633ced8c7c2ba53300b17c5c33a33576455f31ea0381cd84822 # shrinks to rnd = ("ugetuobzjwwggmlvfaldp", 3)
cc 61d685f319f26003a845170ab7aec02e26eb3696b0531bbc4eed3d9d139e5ed6 # shrinks to rnd = ("a", 1)
cc f708e7be5415e8f960e050cd92279d78c905068f4307bdd97208910e5ed6f037 # shrinks to rnd = ("spvytpknoqtgglxefqbi", 1)

View file

@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 539eafd927ae06585e452c563dbd935506987f19333d5d97a7e47e769cb4fb78 # shrinks to input = "¡"

View file

@ -0,0 +1,14 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 53ed407a7836b80d1aeb8237943360dc417a3b4ff1d09cccf98ad30d4e5daf0b # shrinks to s = "self"
cc 25c567f936eb3aef0681be73d32fc06584db01b9cb8be5ee01e5d99b75f183e5 # shrinks to s = 0.0
cc 2c4f316af5e79461d0e55217d80b990b0bf3f5857bca9dc0116c15a113e721ea # shrinks to s = ("26496402", "8", "+")
cc 10570f46a35ba69ae6f950da4381e32d5eea576a431fa733c68fe0de437bd1a9 # shrinks to s = 0, n = 0
cc 849502590009964980943dc6b028f19129755e96a35e61fef820103c7a47141e # shrinks to s = "true", n = 208438984
cc 3c31cefc4aa84bb451917c892bd0210df9d1c09c9860763bc56eb19fc93b660e # shrinks to s = "true", n = 0
cc 8d077c98fec19684acd476a9013ce5c9c19e6f0f9bad1b187eddea4d4103ac21 # shrinks to s = "true"
cc 8e3bd3186714efb1f2255d04318cb0f90a271cf4fdcba60a302b9210f41366cf # shrinks to s = ("-379538130", "0", "+")

View file

@ -0,0 +1,14 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc f4b12d9127d638ad4a31ad4c95b20d10138f371fe1ae926525e66fdac917c823 # shrinks to s = 0
cc 8479b82e178bfabc6b68933111be0bf965b5e6b258f0eac472b431518f39fca3 # shrinks to s = "\""
cc 92cd10d9b071770069af466193eec50a9363a1a56469c922f05a71a3d25f2b20 # shrinks to s = "\\"
cc 1b5e71738bc9301b8164e86be87ffa4819af75b3fab471597050fb8bbf044fbe # shrinks to s = "null"
cc 39e17d0930f1f7ceae8389177f28a2c43b3233541c7a49854fc23635fcb8f216 # shrinks to s = ("a", "18447000000000000000")
cc d4e50e6c8c2a24006e7a78ad8b3d23f24089e6f7eaa32f1e42afd83eb82a10fb # shrinks to s = ("4043826871", "2280852354", "*")
cc c64ace410e7d4926e2910b3161690270c72c6dd321820e02791b879ed656437d # shrinks to s = "^"
cc 6dde019e8b018f1bcf0eee378c4f0b24a2a412d357011a64770eca7ec4c17e6d # shrinks to s = "true", n = "\"\""

0
src/compiler/checks.rs Normal file
View file

64
src/compiler/db.rs Normal file
View file

@ -0,0 +1,64 @@
use std::sync::{Arc, Mutex};
use salsa::DebugWithDb;
use crate::{
lexer::{self, Lexer, Token},
Db,
};
use super::text::SourceProgram;
#[derive(Default)]
#[salsa::db(crate::Jar)]
pub(crate) struct Database {
storage: salsa::Storage<Self>,
// The logs are only used for testing and demonstrating reuse:
//
logs: Option<Arc<Mutex<Vec<String>>>>,
}
impl Database {
/// Enable logging of each salsa event.
#[cfg(test)]
pub fn enable_logging(self) -> Self {
assert!(self.logs.is_none());
Self {
storage: self.storage,
logs: Some(Default::default()),
}
}
#[cfg(test)]
pub fn take_logs(&mut self) -> Vec<String> {
if let Some(logs) = &self.logs {
std::mem::take(&mut *logs.lock().unwrap())
} else {
panic!("logs not enabled");
}
}
}
impl salsa::Database for Database {
fn salsa_event(&self, event: salsa::Event) {
eprintln!("Event: {event:?}");
// Log interesting events, if logging is enabled
if let Some(logs) = &self.logs {
// don't log boring events
if let salsa::EventKind::WillExecute { .. } = event.kind {
logs.lock()
.unwrap()
.push(format!("Event: {:?}", event.debug(self)));
}
}
}
}
impl salsa::ParallelDatabase for Database {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Database {
storage: self.storage.snapshot(),
logs: self.logs.clone(),
})
}
}

76
src/compiler/errors.rs Normal file
View file

@ -0,0 +1,76 @@
use std::ops::Range;
use crate::Db;
use super::text::SourceProgram;
pub struct Errors<'a>(Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>);
impl<'a> From<Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>>
for Errors<'a>
{
fn from(
errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token<'a>, &'a str>>,
) -> Self {
Self(errors)
}
}
impl<'a> IntoIterator for Errors<'a> {
type Item = Range<usize>;
type IntoIter = <Vec<std::ops::Range<usize>> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.0
.into_iter()
.map(|error| match error.error {
lalrpop_util::ParseError::InvalidToken { location } => location..location,
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => {
location..location
}
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => token.0..token.2,
lalrpop_util::ParseError::ExtraToken { token } => token.0..token.2,
lalrpop_util::ParseError::User { error } => todo!(),
})
.collect::<Vec<_>>()
.into_iter()
}
}
fn handle_errors(errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>>, src: &str) -> String {
let mut pretty = String::new();
let mut last_end = 0;
for error in errors {
match error.error {
lalrpop_util::ParseError::InvalidToken { location } => todo!(),
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => todo!(),
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
// find the line and column of the start and end tokens,
// and print the line with a caret pointing to the error
let start = token.0;
let end = token.2;
let start_line = src[..start].rfind('\n').map_or(0, |i| i + 1);
let end_line = src[end..].find('\n').map_or(src.len(), |i| end + i);
let line = &src[start_line..end_line];
let start_col = start - start_line;
let end_col = end - start_line;
// pretty.push_str(&src[last_end..start]);
pretty.push_str(&format!("error: unexpected token {:?}, expected one of {:?}\n", token.1, expected));
pretty.push_str(&line);
pretty.push_str("\n");
pretty.push_str(&" ".repeat(start_col));
pretty.push_str(&"^".repeat(end_col - start_col));
last_end = end;
},
lalrpop_util::ParseError::ExtraToken { token } => todo!(),
lalrpop_util::ParseError::User { error } => todo!(),
};
}
// pretty.push_str(&src[last_end..]);
pretty
}

63
src/compiler/ir.rs Normal file
View file

@ -0,0 +1,63 @@
#![allow(clippy::needless_borrow)]
use salsa::*;
use std::{
array::IntoIter,
collections::BTreeMap,
path::Iter,
sync::{Arc, Mutex},
};
use crate::{parser::ast};
#[salsa::tracked]
pub struct Program {
#[return_ref]
pub modul: Vec<Function>,
#[return_ref]
pub symbols: BTreeMap<Mangled, Symbol>,
}
#[salsa::tracked]
pub struct Function {
#[return_ref]
pub name: String,
#[return_ref]
pub body: Vec<Box<Function>>,
#[return_ref]
pub effects: Vec<InternedEffect>,
}
#[salsa::interned]
pub struct InternedEffect {
pub effect: String,
}
#[salsa::interned]
pub struct Symbol {
#[return_ref]
pub symbol: Mangled,
}
#[salsa::tracked]
pub struct EffectDef {
#[return_ref]
pub effect: ast::EffectDef,
}
#[salsa::tracked]
pub struct Import {
#[return_ref]
pub imports: Vec<String>,
#[return_ref]
pub module: String,
}
#[salsa::interned]
pub struct Mangled {
#[return_ref]
pub mangled: String,
}

81
src/compiler/mod.rs Normal file
View file

@ -0,0 +1,81 @@
use std::{
collections::BTreeMap,
ops::{Range, RangeBounds},
};
use crate::{
compiler::{errors::Errors, text::{Position, Span, Spanned}},
parser::ast::{self, EffectDef, Module},
Db,
};
use self::text::SourceProgram;
mod db;
mod errors;
pub mod ir;
mod tests;
pub mod text;
#[salsa::tracked]
pub fn compile(db: &dyn Db, src: SourceProgram) -> ir::Program {
let mut errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>> = vec![];
let wrapper = crate::lexer::TripleIterator::new(src.text(db));
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
// let mut errors_in_positions: Vec<ir::Position> = vec![];
if !errors.is_empty() {
for error_range in Into::<Errors>::into(errors) {
text::to_spans(db, src);
}
panic!();
}
let modul = t.unwrap();
let mut symbol_table = BTreeMap::new();
for toplevel in modul.0 {
match *toplevel {
ast::Expression::BinaryExpression(_) => todo!(),
ast::Expression::Bool(_) => todo!(),
ast::Expression::Integer(_) => todo!(),
ast::Expression::Float(_) => todo!(),
ast::Expression::Ident(_) => todo!(),
ast::Expression::Binding(_) => todo!(),
ast::Expression::FnCall(_) => todo!(),
ast::Expression::String(_) => todo!(),
ast::Expression::FnDef(_) => {}
ast::Expression::ShellCommand(_, _) => todo!(),
ast::Expression::EffectDef(_) => todo!(),
ast::Expression::StructDef(_) => todo!(),
ast::Expression::UseDef(usedef) => {
let import =
ir::Import::new(db, usedef.0.into_iter().map(|x| x.0).collect(), usedef.1 .0);
for import in add_imports(db, import) {
symbol_table.insert(import, ir::Symbol::new(db, import));
}
}
ast::Expression::Keyword(_) => todo!(),
ast::Expression::ImplDef(_) => todo!(),
ast::Expression::Branch(_) => todo!(),
ast::Expression::Error => todo!(),
}
}
let program = ir::Program::new(db, vec![], symbol_table);
program
}
#[salsa::tracked]
pub fn compile_effect(db: &dyn Db, effect: ir::EffectDef) {}
#[salsa::tracked]
pub fn add_imports(db: &dyn Db, import: ir::Import) -> Vec<ir::Mangled> {
let mut mangled = vec![];
for imp in import.imports(db) {
mangled.push(ir::Mangled::new(
db,
format!("{}_{}", import.module(db), imp),
));
}
mangled
}

0
src/compiler/std/mod.rs Normal file
View file

42
src/compiler/tests.rs Normal file
View file

@ -0,0 +1,42 @@
#[cfg(test)]
#[okstd::test]
fn debug() {
use salsa::{database::AsSalsaDatabase, storage::HasJarsDyn};
use super::{db, text::SourceProgram};
let src = r#"use { native_fs, native_exec } from host
use { fs } from std
struct Innitguv {
fs: native_fs,
exec: native_exec
current_pid: i32
}
impl Exec for Innitguv {
fn exec(&self, arg0: str, args: vec<str>) [nd, exec, await] -> i32 {
let path = arg0
let pid = self.exec.exec(path, args)
if pid == -1 {
raise(-1)
}
self.current_pid = pid
yield()
}
}
impl Actor for Innitguv {
fn recv(&self, msg: Message) [recv, await] {
self.exec(msg.path, msg.args)
}
}"#;
let db = &crate::compiler::db::Database::default().enable_logging();
let prog = SourceProgram::new(db, src.to_string());
let res = super::compile(db, prog);
println!("{:?}", prog);
println!("{:?}", res.symbols(db));
let modul = res.modul(db);
println!("{:?}", modul);
}

155
src/compiler/text.rs Normal file
View file

@ -0,0 +1,155 @@
use std::ops::Range;
use bitflags::bitflags;
use crate::Db;
/// Represents the source program text.
#[salsa::input]
pub struct SourceProgram {
#[return_ref]
pub text: String,
}
/// Represents a spanned piece of code.
#[salsa::interned]
pub struct Spanned {
/// The span of the code.
#[return_ref]
pub span: Span,
/// The source program associated with the code.
#[return_ref]
pub src: SourceProgram,
/// The position of the code in the source program.
#[return_ref]
pub pos: Position,
}
/// Represents a span of text.
#[salsa::interned]
pub struct Span {
/// The range of the span in the source program text.
pub span: (usize, usize),
}
/// Represents a position in the source code.
#[salsa::interned]
pub struct Position {
/// The line number of the position.
l: usize,
/// The column number of the position.
c: usize,
}
/// Represents the source map of the program.
#[salsa::tracked]
pub struct SourceMap {
#[return_ref]
pub tokens: Vec<Spanned>,
}
#[salsa::tracked]
pub fn calculate_line_lengths(db: &dyn Db, src: SourceProgram) -> Vec<usize> {
src.text(db).lines().map(|x| x.len()).collect()
}
// spanoverlap is a bitflag that is used to determine how two spans overlap
// it is used to determine if a token is within a line
// it is rare a token will span multiple lines but it is possible
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct SpanOverlap: u8 {
const NONE = 0b0000;
const START = 0b0001;
const END = 0b0010;
const BOTH = 0b0011;
}
}
#[inline]
fn cmp_range<T: Ord>(a: &Range<T>, b: &Range<T>) -> SpanOverlap {
let mut overlap = SpanOverlap::NONE;
if a.contains(&b.start) {
overlap |= SpanOverlap::START;
}
if a.contains(&b.end) {
overlap |= SpanOverlap::END;
}
overlap
}
/// todo(sevki): split this into two functions
#[salsa::tracked]
pub fn to_spans(db: &dyn Db, src: SourceProgram) -> SourceMap {
let line_lengths: Vec<Range<usize>> = calculate_line_lengths(db, src)
.into_iter()
.scan(0, |acc, x| {
let range = *acc..*acc + x;
*acc += x;
Some(range)
})
.collect();
// reverse the line lengths and make it peakable essentially
// turinging it into a stack
let mut line_lengths = line_lengths.into_iter().enumerate().rev().peekable();
let mut spans = vec![];
let lexer = crate::lexer::Lexer::new(src.text(db), 0);
// this is sort of a zip~ish operation.
// we have to arrays that we are iterating over. One is build cheaply, the line lengths
// and the other is built expensively, the lexer.
// Lexer tokens have a start and end position, and we want to map these to the line lengths
// first we iterate over the lexer tokens
for token in lexer {
let size = token.end - token.start;
// then we peek at the first line
let mut start: Option<(usize, usize)> = None;
loop {
if let Some((line_no, span)) = line_lengths.clone().peek() {
// if the token is within the line
let overlap = cmp_range(&span, &(token.start..token.end));
if overlap == SpanOverlap::NONE && start.is_none() {
// if the token is not within the line
line_lengths.next();
}
if overlap == SpanOverlap::START || overlap == SpanOverlap::BOTH {
// if the token is within the line
start = Some((*line_no, span.start));
// we do not need to iterate more.
break;
}
}
}
if start.is_none() {
// if the token is not within the line
break;
}
let start = start.unwrap();
let leading_chars = src.text(db).get(start.1..token.start);
let column = leading_chars.map(|x| x.chars().count()).unwrap_or(0);
/*
```text
1,1 7
| |
# Intro
8 lorem ipsum dolor sit amet
13 byte start
6th column, 2nd line
```
*/
spans.push(Spanned::new(
db,
Span::new(db, (token.start, token.end)),
src,
Position::new(db, start.0, column),
));
}
SourceMap::new(db, spans)
}

View file

@ -0,0 +1,12 @@
use super::*;
use proptest::{num::i32, prelude::*};
proptest! {
#[test]
fn test_strings(rnd in ("[a-z]+", 1..10)) {
let input = format!(r#"let {} = "{}""#, rnd.0, rnd.1);
let lexer = Lexer::new(&input, 0);
let tokens: Vec<Spanned<Token, Position>> = lexer.collect();
assert_eq!(tokens.len(), 4);
}
}

View file

@ -0,0 +1,306 @@
use crate::lexer::{Lexer, TokenStreamDisplay};
use insta::assert_snapshot;
use okstd::prelude::*;
#[cfg(test)]
#[okstd::test]
fn test_empty_lexer() {
let input = " ";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
"###);
}
#[okstd::test]
fn test_1_plus_1() {
let input = "1 + 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Integer(1), 1:1
- Plus, 1:3
- Integer(1), 1:5
"###);
}
#[okstd::test]
fn test_1_plus_1_plus_1() {
let input = "1 + 1 + 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Integer(1), 1:1
- Plus, 1:3
- Integer(1), 1:5
- Plus, 1:7
- Integer(1), 1:9
"###);
}
#[okstd::test]
fn test_1_plus_1_plus_1_plus_1() {
let input = "1 + 1 / 1 % 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Integer(1), 1:1
- Plus, 1:3
- Integer(1), 1:5
- Divide, 1:7
- Integer(1), 1:9
- Percent, 1:11
- Integer(1), 1:13
"###);
}
#[okstd::test]
fn test_let_a_equals_1() {
let input = "let a = 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Let), 1:3
- Word(Ident("a")), 1:5
- Equals, 1:7
- Integer(1), 1:9
"###);
}
#[okstd::test]
fn test_let_a_equals_1_plus_1() {
let input = "let a = 1 + 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Let), 1:3
- Word(Ident("a")), 1:5
- Equals, 1:7
- Integer(1), 1:9
- Plus, 1:11
- Integer(1), 1:13
"###);
}
#[okstd::test]
fn test_let_a_equals_1_plus_3_point_14() {
let input = "let a = 1 + 3.14";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Let), 1:3
- Word(Ident("a")), 1:5
- Equals, 1:7
- Integer(1), 1:9
- Plus, 1:11
- Float(3.14), 1:16
"###);
}
#[okstd::test]
fn test_let_a_equals_1_plus_3_point_14_plus_1() {
let input = "let a = 1 + 3.14 + 1";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Let), 1:3
- Word(Ident("a")), 1:5
- Equals, 1:7
- Integer(1), 1:9
- Plus, 1:11
- Float(3.14), 1:16
- Plus, 1:18
- Integer(1), 1:20
"###);
}
#[okstd::test]
fn test_fn_foo() {
let input = "fn foo() {}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- RightParen, 1:8
- LeftBrace, 1:10
- RightBrace, 1:11
"###);
}
#[okstd::test]
fn test_fn_foo_bar() {
let input = "fn foo(bar) {}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- RightParen, 1:11
- LeftBrace, 1:13
- RightBrace, 1:14
"###);
}
#[okstd::test]
fn test_fn_foo_bar_baz() {
let input = "fn foo(bar, baz) {
}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- Comma, 1:11
- Word(Ident("baz")), 1:15
- RightParen, 1:16
- LeftBrace, 1:18
- NewLine, 2:0
- NewLine, 3:0
- RightBrace, 3:1
"###);
}
#[okstd::test]
fn test_fn_foo_bar_baz_qux() {
let input = "fn foo(bar, baz) {
qux()
}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- Comma, 1:11
- Word(Ident("baz")), 1:15
- RightParen, 1:16
- LeftBrace, 1:18
- NewLine, 2:0
- Word(Ident("qux")), 2:7
- LeftParen, 2:8
- RightParen, 2:9
- NewLine, 3:0
- RightBrace, 3:1
"###);
}
#[okstd::test]
fn test_fn_foo_bar_baz_qux_quux() {
let input = "fn foo(bar, baz) {
qux(quux)
}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- Comma, 1:11
- Word(Ident("baz")), 1:15
- RightParen, 1:16
- LeftBrace, 1:18
- NewLine, 2:0
- Word(Ident("qux")), 2:7
- LeftParen, 2:8
- Word(Ident("quux")), 2:12
- RightParen, 2:13
- NewLine, 3:0
- RightBrace, 3:1
"###);
}
#[okstd::test]
fn test_fn_foo_bar_baz_qux_quux_quuz() {
let input = "fn foo(bar, baz) {
qux(quux, 3.14,0xdeadbeef)
}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- Comma, 1:11
- Word(Ident("baz")), 1:15
- RightParen, 1:16
- LeftBrace, 1:18
- NewLine, 2:0
- Word(Ident("qux")), 2:7
- LeftParen, 2:8
- Word(Ident("quux")), 2:12
- Comma, 2:13
- Float(3.14), 2:18
- Comma, 2:19
- Integer(3735928559), 2:29
- RightParen, 2:30
- NewLine, 3:0
- RightBrace, 3:1
"###);
}
#[okstd::test]
fn test_func_with_genetics() {
let input = "fn foo<T>(bar: T)[throws, awaits, execs] {
qux()
}";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Fn), 1:2
- Word(Ident("foo")), 1:6
- LessThan, 1:7
- Word(Ident("T")), 1:8
- GreaterThan, 1:9
- LeftParen, 1:10
- Word(Ident("bar")), 1:13
- Colon, 1:14
- Word(Ident("T")), 1:16
- RightParen, 1:17
- LeftBracket, 1:18
- Word(Ident("throws")), 1:24
- Comma, 1:25
- Word(Ident("awaits")), 1:32
- Comma, 1:33
- Word(Ident("execs")), 1:39
- RightBracket, 1:40
- LeftBrace, 1:42
- NewLine, 2:0
- Word(Ident("qux")), 2:7
- LeftParen, 2:8
- RightParen, 2:9
- NewLine, 3:0
- RightBrace, 3:1
"###);
}
#[okstd::test]
fn test_func_call_with_genetics() {
let input = "foo<T>(bar: T)[vm]";
let lexer = Lexer::new(input, 0);
let actual_tokens = lexer.map_while(|t| Some(t)).collect::<Vec<_>>();
assert_snapshot!(TokenStreamDisplay::from(actual_tokens), @r###"
- Word(Ident("foo")), 1:3
- LessThan, 1:4
- Word(Ident("T")), 1:5
- GreaterThan, 1:6
- LeftParen, 1:7
- Word(Ident("bar")), 1:10
- Colon, 1:11
- Word(Ident("T")), 1:13
- RightParen, 1:14
- LeftBracket, 1:15
- Word(Ident("vm")), 1:17
- RightBracket, 1:18
"###);
}

987
src/lexer/mod.rs Normal file
View file

@ -0,0 +1,987 @@
/*
lexer.rs is a lexer for the src language
*/
use std::{fmt::Display, iter::Iterator, iter::Peekable, str::Chars};
use lalrpop_util::{
lexer::Token as LAToken,
state_machine::{ParserDefinition, TokenTriple},
};
use okstd::prelude::*;
use syn::token;
// Identifier
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum Variable<'input> {
// $$ is the process ID of the shell
ProcessID,
// $! is the process ID of the last background command
LastBackgroundProcessID,
// $? is the exit status of the last command executed
LastCommandExitStatus,
// $- is the current option flags as specified upon invocation, by the set built-in command, or by the shell invocation environment
CurrentOptionFlags,
// $@ is the positional parameters, starting from one
PositionalParameters,
// $# is the number of positional parameters in decimal
PositionalParametersCount,
// $0 is the name of the shell or shell script
ShellName,
// $1...$9 are the positional parameters, starting from zero
PositionalParameter(usize),
// ${parameter} is the value of the variable parameter
Parameter(&'input str),
// ${parameter:-word} is the value of the variable parameter if it is set; otherwise, the expansion of word is substituted
ParameterDefault(&'input str, &'input str),
}
impl<'input> Display for Variable<'input> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Variable::ProcessID => write!(f, "$$"),
Variable::LastBackgroundProcessID => write!(f, "$!"),
Variable::LastCommandExitStatus => write!(f, "$?"),
Variable::CurrentOptionFlags => write!(f, "$-"),
Variable::PositionalParameters => write!(f, "$@"),
Variable::PositionalParametersCount => write!(f, "$#"),
Variable::ShellName => write!(f, "$0"),
Variable::PositionalParameter(i) => write!(f, "${}", i),
Variable::Parameter(p) => write!(f, "${}", p),
Variable::ParameterDefault(p, w) => write!(f, "${}:{}", p, w),
}
}
}
// LexicalError
#[derive(Debug, PartialEq, Clone)]
pub enum LexicalError {
// Unexpected character
UnexpectedCharacter(char),
// Unterminated string
UnterminatedString,
// Invalid number format
InvalidNumberFormat,
// Invalid variable format
InvalidVariableFormat,
// Unexpected end of input
UnexpectedEndOfInput,
// Invalid state emission
InvalidStateEmission(State),
}
type Result<T> = std::result::Result<T, LexicalError>;
#[derive(Debug, PartialEq, Clone)]
pub struct Spanned<T, P = Position> {
pub node: T,
pub start: usize,
pub end: usize,
pub pos: P,
}
impl Display for Spanned<Token<'_>> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{:?}, start: {} end: {}",
self.node, self.start, self.end
)
}
}
impl<T, P> Spanned<T, P> {
pub fn new(node: T, start: usize, end: usize, pos: P) -> Self {
Spanned {
node,
start,
end,
pos,
}
}
pub fn len(&self) -> usize {
self.end - self.start
}
}
// Position struct
#[derive(Debug, PartialEq, Clone, Copy, Default)]
pub struct Position {
pub line: usize,
pub col: usize,
pub size: usize,
}
// new function for Position
impl Position {
pub fn new(line: usize, col: usize, size: usize) -> Self {
Self { line, col, size }
}
}
// display trait implementation for Position
impl std::fmt::Display for Position {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"line: {}, col: {}, size: {}",
self.line, self.col, self.size
)
}
}
// display trait implementation for Token
impl std::fmt::Display for Token<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Word<'input> {
Let,
Const,
Fn,
If,
Else,
Return,
Match,
For,
While,
Break,
Continue,
True,
False,
Null,
Action,
Enum,
Impl,
Import,
None,
Struct,
Effect,
When,
Use,
From,
Where,
Self_,
Ident(&'input str),
FnIdent(&'input str),
Any(&'input str),
}
impl<'input> Word<'input> {
fn chars(&self) -> Chars<'_> {
match self {
Word::Let => "let".chars(),
Word::Const => "const".chars(),
Word::Fn => "fn".chars(),
Word::If => "if".chars(),
Word::Else => "else".chars(),
Word::Return => "return".chars(),
Word::Match => "match".chars(),
Word::For => "for".chars(),
Word::While => "while".chars(),
Word::Break => "break".chars(),
Word::Continue => "continue".chars(),
Word::True => "true".chars(),
Word::False => "false".chars(),
Word::Null => "null".chars(),
Word::When => "when".chars(),
Word::Ident(ident) => ident.chars(),
Word::FnIdent(ident) => ident.chars(),
Word::Any(word) => word.chars(),
Word::Action => "action".chars(),
Word::Enum => "enum".chars(),
Word::Impl => "impl".chars(),
Word::Import => "import".chars(),
Word::None => "none".chars(),
Word::Struct => "struct".chars(),
Word::Effect => "effect".chars(),
Word::Use => "use".chars(),
Word::From => "from".chars(),
Word::Where => "where".chars(),
Word::Self_ => "self".chars(),
}
}
}
// token types debug
#[derive(Debug, PartialEq, Clone)]
pub enum Token<'input> {
// Operators
Pipe, // |
Ampersand, // &
Semicolon, // ;
Equals, // =
// Redirections
LessThan, // <
GreaterThan, // >
// Identifiers
Variable(Variable<'input>), // $a-z, $A-Z, $0-9, $_
// Literals
Word(Word<'input>), // a-z, A-Z, 0-9, _
String(&'input str), // "..."
// Comments
Comment(&'input str), // #
// Numbers
Integer(i64), // 0-9
Float(f64), // 0-9
// Special
Eof, // EOF
NewLine, // \n
LeftParen, // (
RightParen, // )
LeftBrace, // {
RightBrace, // }
LeftBracket, // [
RightBracket, // ]
Comma, // ,
Dot, // .
Colon, // :
Underscore, // _
Minus, // -
Plus, // +
Arrow, // ->
FatArrow, // =>
Divide, // /
Multiply, // *
Percent, // %
Dollar, // $
Exclamation, // !
Question, // ?
Tilde, // ~
At, // @
Caret, // ^
Shebang, // #!
}
impl<'input> Token<'input> {
fn to_chars(&'input self) -> Chars<'input> {
match self {
Token::Pipe => "|".chars(),
Token::Ampersand => "&".chars(),
Token::Semicolon => ";".chars(),
Token::Equals => "=".chars(),
Token::LessThan => "<".chars(),
Token::GreaterThan => ">".chars(),
Token::Variable(identifier) => {
// Implement the conversion to chars for Variable
// based on its fields
"".chars()
}
Token::Word(word) => word.chars(),
Token::String(string) => string.chars(),
Token::Comment(comment) => comment.chars(),
Token::Integer(number) => "".chars(),
Token::Float(number) => "".chars(),
Token::Eof => "".chars(),
Token::NewLine => "\n".chars(),
Token::LeftParen => "(".chars(),
Token::RightParen => ")".chars(),
Token::LeftBrace => "{".chars(),
Token::RightBrace => "}".chars(),
Token::LeftBracket => "[".chars(),
Token::RightBracket => "]".chars(),
Token::Comma => ",".chars(),
Token::Colon => ":".chars(),
Token::Underscore => "_".chars(),
Token::Minus => "-".chars(),
Token::Plus => "+".chars(),
Token::Arrow => "->".chars(),
Token::FatArrow => "=>".chars(),
Token::Divide => "/".chars(),
Token::Multiply => "*".chars(),
Token::Percent => "%".chars(),
Token::Dollar => "$".chars(),
Token::Exclamation => "!".chars(),
Token::Question => "?".chars(),
Token::Tilde => "~".chars(),
Token::At => "@".chars(),
Token::Caret => "^".chars(),
Token::Dot => ".".chars(),
Token::Shebang => "#!".chars(),
}
}
}
impl<'input> Iterator for Token<'input> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
self.to_chars().next()
}
}
pub struct Lexer<'input> {
input: &'input str,
pos: usize,
line: usize,
col: usize,
state: State,
buffer: String,
peekable: Peekable<Chars<'input>>,
last_char: Option<char>,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str, pos: usize) -> Self {
Self {
input,
pos,
line: 1,
col: 1,
state: State::Program,
buffer: String::new(),
peekable: input.chars().peekable(),
last_char: None,
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
enum Quotation {
Single,
Double,
}
#[derive(Debug, PartialEq, Clone, Copy)]
enum State {
Comment,
Eof,
NewLine,
String(Quotation),
Op,
Variable,
Word,
Number,
Program,
Shebang,
Any,
}
macro_rules! set_state {
($self:expr, $state:expr;) => {{
$self.state = $state;
}};
}
macro_rules! emit {
($self:expr, $state:expr => ?) => {{
let r = $self.emit_buffer().unwrap();
$self.buffer.clear();
emit!($self, $state => r)
}};
($self:expr, $state:expr => $token:expr) => {{
let start = $self.pos;
match $token {
Token::Integer (number ) => {
for c in number.to_string().chars() {
debug!("c: {}", c);
$self.advance(c);
}
}
Token::Float ( number ) => {
for c in number.to_string().chars() {
$self.advance(c);
}
}
_ => {
for c in $token.to_chars() {
$self.advance(c);
}
}
}
let end = $self.pos;
let pos = Position::new(
$self.line,
$self.col - $self.buffer.len() - 1,
end - start,
);
$self.state = $state;
let token = $token;
let token = Spanned::new(token, start, end, pos);
Ok(token)
}};
}
// Lexer trait implementation
impl<'input> Lexer<'input> {
pub fn input(&self) -> &'input str {
self.input
}
fn push(&mut self) -> bool {
let c = self.peekable.next().unwrap();
self.buffer.push(c);
let finished = (self.pos as i32) + self.buffer.len() as i32 >= self.input.len() as i32;
finished
}
fn ignore(&mut self) -> bool {
let c = self.peekable.next().unwrap();
self.advance(c)
}
fn advance(&mut self, c: char) -> bool {
if self.pos + 1 > self.input.len() {
unreachable!("pos: {}, input.len: {}", self.pos, self.input.len());
}
self.pos += 1;
self.last_char = Some(c);
if c == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
let finished = self.pos >= self.input.len();
finished
}
fn peek(&mut self) -> Option<char> {
return self.peekable.peek().copied();
}
// emit emit's the current token
fn emit_buffer(&mut self) -> Result<Token<'input>> {
let start = self.pos;
let end = self.pos + self.buffer.len();
match self.state {
// these states cannot emit tokens
State::Program => Err(LexicalError::InvalidStateEmission(State::Program)),
State::Op => Ok(match self.buffer.chars().next().unwrap() {
'(' => Token::LeftParen,
')' => Token::RightParen,
'{' => Token::LeftBrace,
'}' => Token::RightBrace,
'>' => Token::GreaterThan,
'<' => Token::LessThan,
'|' => Token::Pipe,
'&' => Token::Ampersand,
';' => Token::Semicolon,
',' => Token::Comma,
':' => Token::Colon,
'_' => Token::Underscore,
'+' => Token::Plus,
'*' => Token::Multiply,
'[' => Token::LeftBracket,
']' => Token::RightBracket,
'%' => Token::Percent,
'@' => Token::At,
'/' => Token::Divide,
'-' => {
if self.buffer.len() == 1 {
Token::Minus
} else if self.buffer == "->" {
Token::Arrow
} else {
unreachable!("unexpected character: {}", self.buffer)
}
}
'=' => Token::Equals,
_ => unreachable!(
"unexpected character: {} in state: {:?}",
self.buffer, self.state
),
}),
State::Any => Err(LexicalError::InvalidStateEmission(State::Any)),
// these states can emit tokens
State::Comment => {
let comment = self
.input
.get(start..end)
.ok_or(LexicalError::UnexpectedEndOfInput)?;
Ok(Token::Comment(comment))
}
State::Variable => {
let variable = self.buffer.clone();
let identifier = match variable.as_str() {
"$$" => Variable::ProcessID,
"$?" => Variable::LastCommandExitStatus,
"$!" => Variable::LastBackgroundProcessID,
"$-" => Variable::CurrentOptionFlags,
"$0" => Variable::ShellName,
"$#" => Variable::PositionalParametersCount,
_ => {
if variable.starts_with('$') && variable.len() > 1 {
let number = variable[1..]
.parse()
.map_err(|_| LexicalError::InvalidVariableFormat)?;
Variable::PositionalParameter(number)
} else {
let var = self
.input
.get(start..end)
.ok_or(LexicalError::UnexpectedEndOfInput)?;
Variable::Parameter(var)
}
}
};
Ok(Token::Variable(identifier))
}
State::Word => {
let word = self
.input
.get(start..end)
.ok_or(LexicalError::UnexpectedEndOfInput)?;
let word = match word {
"let" => Word::Let,
"const" => Word::Const,
"fn" => Word::Fn,
"if" => Word::If,
"else" => Word::Else,
"return" => Word::Return,
"match" => Word::Match,
"for" => Word::For,
"while" => Word::While,
"break" => Word::Break,
"continue" => Word::Continue,
"true" => Word::True,
"false" => Word::False,
"null" => Word::Null,
"action" => Word::Action,
"enum" => Word::Enum,
"impl" => Word::Impl,
"import" => Word::Import,
"none" => Word::None,
"struct" => Word::Struct,
"effect" => Word::Effect,
"when" => Word::When,
"use" => Word::Use,
"from" => Word::From,
"where" => Word::Where,
"self" => Word::Self_,
_ => {
Word::Ident(word)
// }
}
};
Ok(Token::Word(word))
}
State::String(Quotation) => {
let last_char = self.buffer.chars().last();
let quote = if Quotation == Quotation::Double {
Some('"')
} else {
Some('\'')
};
if last_char != quote {
panic!("expected: {:?}, got: {:?}", quote, last_char);
return Err(LexicalError::UnterminatedString);
}
let string = self
.input
.get(start..end)
.expect("shoulld've done something");
Ok(Token::String(string))
}
State::Number => {
let number = self.buffer.clone();
if number.contains('.') {
let float = number
.parse()
.map_err(|_| LexicalError::InvalidNumberFormat)?;
Ok(Token::Float(float))
} else if number.starts_with("0x") {
let integer = i64::from_str_radix(&number[2..], 16)
.map_err(|_| LexicalError::InvalidNumberFormat)?;
Ok(Token::Integer(integer))
} else {
let integer = number
.parse()
.map_err(|_| LexicalError::InvalidNumberFormat)?;
Ok(Token::Integer(integer))
}
}
State::NewLine => Ok(Token::NewLine),
State::Eof => Ok(Token::Eof),
State::Shebang => Ok(Token::Shebang),
}
}
fn skip_ws(&mut self) -> Result<()> {
while let Some(c) = self.peek() {
match c {
' ' => {
self.ignore();
}
'\t' => {
self.ignore();
}
'#' => {
set_state!(self, State::Comment;);
return Ok(());
}
'"' => {
set_state!(self, State::String(Quotation::Double););
return Ok(());
}
'\'' => {
set_state!(self, State::String(Quotation::Single););
return Ok(());
}
'$' => {
set_state!(self, State::Variable;);
return Ok(());
}
'a'..='z' | 'A'..='Z' | '_' => {
set_state!(self, State::Word;);
return Ok(());
}
'0'..='9' => {
set_state!(self, State::Number;);
return Ok(());
}
'\n' => {
set_state!(self, State::NewLine;);
return Ok(());
}
'(' | ')' | '{' | '}' | '>' | '<' | '|' | '&' | ';' | ',' | ':' | '+' | '*'
| '[' | ']' | '%' | '@' | '/' | '-' | '=' | '!' => {
set_state!(self, State::Op;);
debug!("to state: {:?}", self.state);
return Ok(());
}
_ => {
return Err(LexicalError::UnexpectedCharacter(c))?;
}
}
if self.pos >= self.input.len() {
break;
}
}
if self.pos >= self.input.len() {
set_state!(self, State::Eof;);
}
Ok(())
}
fn consume_op(&mut self) -> Result<Spanned<Token<'input>>> {
if let Some(c) = self.peek() {
debug!("consume_op: {}", c);
if self.state != State::Op {
return Err(LexicalError::InvalidStateEmission(self.state))?;
}
match c {
'(' | ')' | '{' | '}' | '>' | '<' | '|' | '&' | ';' | ',' | ':' | '_' | '+'
| '/' | '*' | '[' | ']' | '%' | '@' => {
let state = if self.push() { State::Eof } else { State::Any };
return emit!(self, state => ?);
}
'=' => {
self.push();
if let Some('>') = self.peek() {
self.push();
return emit!(self, State::Any => ?);
} else {
let state = if self.pos == self.input.len() {
State::Eof
} else {
State::Any
};
return emit!(self, state => ?);
}
} // - and ->
'-' => {
self.push();
match self.peek() {
Some('>') => {
self.push();
return emit!(self, State::Any => ?);
}
Some('0'..='9') => {
set_state!(self, State::Number;);
return self.consume_number();
}
_ => {
let state = if self.push() { State::Eof } else { State::Any };
return emit!(self, state => ?);
}
}
}
'/' => {
let state = if self.push() { State::Eof } else { State::Any };
match self.peek() {
Some(' ') => {
return emit!(self, state => ?);
}
_ => {
return emit!(self, state => ?);
}
}
} // / and /directory/file
'!' => {
let state = if self.push() { State::Eof } else { State::Any };
if let Some('#') = self.peek() {
self.push();
return emit!(self, State::Any => ?);
} else {
return emit!(self, state => ?);
}
}
_ => {
panic!("unexpected character: '{}'", c);
return emit!(self, State::Any => ?);
}
}
}
emit!(self, self.state=> Token::Eof)
}
// comment state
fn consume_comment(&mut self) -> Result<Spanned<Token<'input>>> {
loop {
match self.peek() {
Some('!') => {
let state = if self.push() { State::Eof } else { State::Any };
set_state!(self, State::Shebang;);
return emit!(self, state => ?);
}
Some('\n') => {
return emit!(self, State::NewLine => ?);
}
// if the option is none, break
None => {
return emit!(self, State::Any => ?);
}
_ => {
if self.push() {
return emit!(self, State::Eof => ?);
}
}
}
}
}
// consume word
fn consume_word(&mut self) -> Result<Spanned<Token<'input>>> {
while let Some(c) = self.peek() {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '.' | '/' | '_' => {
if self.push() {
return emit!(self, State::Eof => ?);
}
}
_ => {
break;
}
}
}
return emit!(self, State::Any => ?);
}
// consume number
fn consume_number(&mut self) -> Result<Spanned<Token<'input>>> {
debug!("consume_number");
while let Some(c) = self.peek() {
match c {
'0'..='9' => {
if self.push() {
debug!("finished");
return emit!(self, State::Eof => ?);
}
}
// . is only allowed once
'.' => {
if self.buffer.contains('.') {
break;
} else if self.push() {
// this is a violation as it is not a number
// so panic
return Err(LexicalError::InvalidNumberFormat);
}
}
// if the first character is a 0, then the next character can be x
'x' => {
if self.buffer.starts_with('0') {
if self.push() {
debug!("buffer: {}", self.buffer);
return emit!(self, State::Number => ?);
}
} else {
break;
}
}
// also hex numbers, only if the buffer starts with 0x
'a'..='f' | 'A'..='F' => {
if self.buffer.starts_with("0x") {
if self.push() {
debug!("buffer: {}", self.buffer);
return emit!(self, State::Number => ?);
}
}
// handle scientific notation
else if self.buffer.contains(".") && c == 'e' {
if self.push() {
debug!("buffer: {}", self.buffer);
return emit!(self, State::Number => ?);
}
} else {
break;
}
}
_ => {
debug!("breaking");
return emit!(self, State::Any => ?);
}
}
}
return emit!(self, State::Eof => ?);
}
fn consume_newline(&mut self) -> Result<Spanned<Token<'input>>> {
match self.peek() {
Some('\n') => {
let state = if self.push() { State::Eof } else { State::Any };
return emit!(self, state => ?);
}
_ => {
return emit!(self, State::Any => Token::NewLine);
}
}
}
fn consume_string_literal(&mut self, quotation: Quotation) -> Result<Spanned<Token<'input>>> {
// loop until the you see the same quotation mark as the one you started with
// or if you see an escape character
self.push();
while let Some(c) = self.peek() {
match c {
'"' => {
if quotation == Quotation::Double {
let state = if self.push() { State::Eof } else { State::Any };
return emit!(self, state => ?);
}
}
'\'' => {
if quotation == Quotation::Single {
let state = if self.push() { State::Eof } else { State::Any };
return emit!(self, state => ?);
}
}
_ => {
if self.push() {
self.state = State::Eof;
break;
}
}
}
}
panic!("unexpected state: {:?}", self.state);
}
fn consume_variable(&mut self) -> Result<Spanned<Token<'input>>> {
// ignore $
self.ignore();
while let Some(c) = self.peek() {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '.' => {
if self.push() {
return emit!(self, State::Any => ?);
}
}
_ => {
break;
}
}
}
return emit!(self, State::Op => ?);
}
}
// Iterator Trait implementation for self<
impl<'input> Iterator for Lexer<'input> {
type Item = Spanned<Token<'input>>;
fn next(&mut self) -> Option<Self::Item> {
if self.pos >= self.input.len() && self.state != State::Eof {
self.state = State::Eof;
return None;
} else if self.pos >= self.input.len() {
return None;
}
if self.state == State::Program || self.state == State::Any {
self.skip_ws().unwrap();
}
let res = match self.state {
State::Op => self.consume_op(),
State::Comment => self.consume_comment(),
State::Eof => {
return None;
}
State::NewLine => self.consume_newline(),
State::String(quotation) => self.consume_string_literal(quotation),
State::Variable => self.consume_variable(),
State::Word => self.consume_word(),
State::Number => self.consume_number(),
State::Any | State::Program => unreachable!(),
State::Shebang => todo!(),
};
debug!(
">>> state: {:?}, res: {:?}, pos: {}, line: {}, col: {}",
self.state, res, self.pos, self.line, self.col
);
self.buffer.clear();
match res {
Ok(token) => {
match token.node {
Token::Eof => {
return None;
}
_ => {}
}
return Some(token);
}
_ => {
return None;
}
}
panic!("unexpected state: {:?}", self.state);
}
}
struct TokenStreamDisplay<'input>(Vec<Spanned<Token<'input>>>);
impl Display for TokenStreamDisplay<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for token in &self.0 {
write!(
f,
"- {}, {}:{}\n",
token.node, token.pos.line, token.pos.col
)?;
}
Ok(())
}
}
impl<'input> From<Vec<Spanned<Token<'input>>>> for TokenStreamDisplay<'input> {
fn from(tokens: Vec<Spanned<Token<'input>>>) -> Self {
TokenStreamDisplay(tokens)
}
}
mod lexer_prop_tests;
mod lexer_snap_tests;
pub struct TripleIterator<'input>(Lexer<'input>);
impl<'input> TripleIterator<'input> {
pub fn new(input: &'input str) -> Self {
TripleIterator(Lexer::new(input, 0))
}
}
impl<'input> Iterator for TripleIterator<'input> {
type Item = (usize, Token<'input>, usize);
fn next(&mut self) -> Option<Self::Item> {
let token = self.0.next()?;
debug!("token: {:?}", token);
Some((token.start, token.node, token.end))
}
}

33
src/lib.rs Normal file
View file

@ -0,0 +1,33 @@
pub mod lexer;
pub mod parser;
pub mod compiler;
use compiler::text;
use parser::ast;
use crate::compiler::ir;
#[salsa::jar(db = Db)]
pub struct Jar(
compiler::compile,
compiler::compile_effect,
compiler::add_imports,
text::to_spans,
text::calculate_line_lengths,
text::Span,
text::Spanned,
text::Position,
text::SourceMap,
text::SourceProgram,
ir::Program,
ir::Function,
ir::InternedEffect,
ir::Symbol,
ir::EffectDef,
ir::Import,
ir::Mangled,
);
pub trait Db: salsa::DbWithJar<Jar> {}
impl<DB> Db for DB where DB: ?Sized + salsa::DbWithJar<Jar> {}

188
src/parser/ast.rs Normal file
View file

@ -0,0 +1,188 @@
use std::fmt::Display;
use proptest::prelude::*;
pub const ANON_FN_NAME: &str = "anonymous";
#[derive(PartialEq, Debug, Clone)]
pub struct Ident(pub String, pub Option<Vec<Ident>>);
#[derive(PartialEq, Debug)]
pub struct StringLit(pub String);
#[derive(PartialEq, Debug)]
pub struct Binding(pub Ident, pub Box<Expression>);
#[derive(PartialEq, Debug)]
pub enum Literal {
Bool(bool),
Float(f64),
Integer(i64),
String(String),
}
#[derive(PartialEq, Debug)]
pub enum Keyword {
None,
Some,
Let,
Action,
Saga,
Fn,
If,
Else,
Match,
Arrow,
Struct,
SelfValue,
When,
Effect,
Impl,
Use,
From,
Where,
Self_,
}
#[derive(PartialEq, Debug)]
pub enum Value {
Literal(Literal),
Ident(Ident),
}
#[derive(PartialEq, Debug)]
pub struct Block<T>(pub Vec<T>);
#[derive(PartialEq, Debug)]
pub struct Tuple<T>(pub Vec<T>);
#[derive(PartialEq, Debug)]
pub struct Array<T>(pub Vec<T>);
#[derive(PartialEq, Debug)]
pub struct BinaryOperation {
pub lhs: Box<Expression>,
pub op: Operator,
pub rhs: Box<Expression>,
}
#[derive(PartialEq, Debug)]
pub struct FnCall(pub Ident, pub Vec<Box<Expression>>);
#[derive(PartialEq, Debug)]
pub enum Expression {
BinaryExpression(BinaryOperation),
Bool(bool),
Integer(i64),
Float(f64),
Ident(Ident),
Binding(Binding),
FnCall(FnCall),
String(String),
FnDef(FnDef),
ShellCommand(Vec<Ident>, Vec<Box<Expression>>),
EffectDef(EffectDef),
StructDef(StructDef),
UseDef(UseDef),
Keyword(Keyword),
ImplDef(ImplDef),
Branch(Branch),
Error,
}
#[derive(PartialEq, Debug)]
pub struct Field(pub Ident, pub Ident);
#[derive(PartialEq, Debug)]
pub enum FnArg {
Reciever,
Field(Field)
}
#[derive(PartialEq, Debug)]
pub struct Prototype {
pub name: Ident,
pub args: Vec<FnArg>,
pub ret: Option<Ident>,
pub effects: Vec<Ident>,
}
#[derive(PartialEq, Debug)]
pub struct FnDef(
pub Prototype,
pub Block<Box<Expression>>,
pub Vec<(Ident, Block<Box<Expression>>)>,
);
#[derive(PartialEq, Debug)]
pub enum Whitespace {
Space,
Tab,
Newline,
}
#[derive(PartialEq, Debug, Clone)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
Modulo,
Increment,
Decrement,
Maybe,
Not,
Neg,
}
impl Arbitrary for Operator {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_args: ()) -> Self::Strategy {
prop_oneof![
Just(Operator::Add),
Just(Operator::Sub),
Just(Operator::Mul),
Just(Operator::Div),
]
.boxed()
}
}
impl Display for Operator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let op = match self {
Operator::Add => "+",
Operator::Sub => "-",
Operator::Mul => "*",
Operator::Div => "/",
Operator::Modulo => "%",
Operator::Increment => "++",
Operator::Decrement => "--",
Operator::Maybe => "?",
Operator::Not => "!",
Operator::Neg => "-",
};
write!(f, "{}", op)
}
}
#[derive(PartialEq, Debug)]
pub struct StructDef(pub Ident, pub Block<Field>);
#[derive(PartialEq, Debug)]
pub struct FnIdent(pub Ident);
#[derive(PartialEq, Debug)]
pub struct EffectDef(pub Ident, pub Vec<Ident>, pub Block<Prototype>);
#[derive(PartialEq, Debug)]
pub struct UseDef(pub Vec<Ident>, pub Ident);
#[derive(PartialEq, Debug)]
pub struct ImplDef(pub Ident, pub Option<Ident>, pub Block<Box<Expression>>);
#[derive(PartialEq, Debug)]
pub struct Branch(pub Box<Expression>, pub Vec<(Expression, Block<Box<Expression>>)>);
#[derive(PartialEq, Debug)]
pub struct Module(pub Vec<Box<Expression>>);

33
src/parser/errors.rs Normal file
View file

@ -0,0 +1,33 @@
pub fn pretty_errors<'input>(src: &'input str, errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>>) -> String {
let mut pretty = String::new();
let mut last_end = 0;
for error in errors {
match error.error {
lalrpop_util::ParseError::InvalidToken { location } => todo!(),
lalrpop_util::ParseError::UnrecognizedEof { location, expected } => todo!(),
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
// find the line and column of the start and end tokens,
// and print the line with a caret pointing to the error
let start = token.0;
let end = token.2;
let start_line = src[..start].rfind('\n').map_or(0, |i| i + 1);
let end_line = src[end..].find('\n').map_or(src.len(), |i| end + i);
let line = &src[start_line..end_line];
let start_col = start - start_line;
let end_col = end - start_line;
// pretty.push_str(&src[last_end..start]);
pretty.push_str(&format!("error: unexpected token {:?}, expected one of {:?}\n", token.1, expected));
pretty.push_str(&line);
pretty.push_str("\n");
pretty.push_str(&" ".repeat(start_col));
pretty.push_str(&"^".repeat(end_col - start_col));
last_end = end;
},
lalrpop_util::ParseError::ExtraToken { token } => todo!(),
lalrpop_util::ParseError::User { error } => todo!(),
};
}
// pretty.push_str(&src[last_end..]);
pretty
}

8
src/parser/mod.rs Normal file
View file

@ -0,0 +1,8 @@
use lalrpop_util::lalrpop_mod;
mod parser_snap_tests;
mod string;
pub mod ast;
mod errors;
pub mod src;

View file

@ -0,0 +1,111 @@
use crate::lexer::Lexer;
use crate::parser::errors::pretty_errors;
use insta::assert_snapshot;
use okstd::prelude::*;
#[cfg(test)]
#[okstd::test]
fn test_empty_parser() {
let input = " ";
let mut errors = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
assert!(errors.is_empty());
assert_snapshot!(format!("{:#?}", t.unwrap()), @r###"
Module(
[],
)
"###);
}
#[okstd::test]
fn test_fn_call_parser_with_multiple_args_and_strings() {
let input = "fn some()[] {let a = some_fnExpr(1, \"2\", 3)}";
let mut errors = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
if !errors.is_empty() {
panic!("{}", pretty_errors(&input, errors));
}
assert_snapshot!(format!("{:#?}", t.unwrap()));
}
#[okstd::test]
fn test_fn_def_parser() {
let input = r#"fn call(a:b, b:c) [throws, awaits, execs] {
call(1+1)
let a = 1
} when throws {
raise(1)
}"#;
let mut errors = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
assert!(errors.is_empty());
assert_snapshot!(format!("{:#?}", t.unwrap()));
}
#[okstd::test]
fn test_effect() {
let input = r#"effect VM: async + throws + execs {
catch() []
await<T>(f: Future<T>) [] -> T
exec(arg0: string, args: stringvec) []
}"#;
let mut errors = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
assert!(errors.is_empty());
assert_snapshot!(format!("{:#?}", t.unwrap()));
}
#[okstd::test]
fn test_struct_parser() {
let input = r#"struct VM {
a: string
b: string
}"#;
let mut errors = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
assert!(errors.is_empty());
assert_snapshot!(format!("{:#?}", t.unwrap()));
}
#[okstd::test]
fn test_enum_parser() {
let input = r#"use { exec } from host
effect Make: async + throws + execs + reads + writes {
catch() [throws]
await<T>(f: Future<T>) [async, throws] -> T
exec(arg0: string, args: stringvec) [Make] -> i32
read(name: string) [reads] -> string
write(name: string, value: string) [writes]
}
struct Local {
host: host
}
impl Make for Local {
fn catch(self) [throws] {
}
fn await<T>(f: Future<T>) [async, trhows] -> T {
yield()
}
fn exec(self, arg0: string, args: vec<string>) [Vm] -> i32 {
self.host.read("jobserver")
if self.host.exec(arg0, args) {
raise(1)
}
}
}"#;
let mut errors: Vec<lalrpop_util::ErrorRecovery<usize, crate::lexer::Token, &str>> = vec![];
let wrapper = crate::lexer::TripleIterator::new(input);
let t = crate::parser::src::SourceParser::new().parse(&mut errors, wrapper);
if !errors.is_empty() {
panic!("{}", pretty_errors(&input, errors));
}
assert_snapshot!(format!("{:#?}", t.unwrap()));
}

View file

@ -0,0 +1,118 @@
---
source: src/parser/parser_snap_tests.rs
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
EffectDef(
EffectDef(
Ident(
"VM",
None,
),
[
Ident(
"async",
None,
),
Ident(
"throws",
None,
),
Ident(
"execs",
None,
),
],
Block(
[
Prototype {
name: Ident(
"catch",
None,
),
args: [],
ret: None,
effects: [],
},
Prototype {
name: Ident(
"await",
Some(
[
Ident(
"T",
None,
),
],
),
),
args: [
Field(
Field(
Ident(
"f",
None,
),
Ident(
"Future",
Some(
[
Ident(
"T",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"T",
None,
),
),
effects: [],
},
Prototype {
name: Ident(
"exec",
None,
),
args: [
Field(
Field(
Ident(
"arg0",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"args",
None,
),
Ident(
"stringvec",
None,
),
),
),
],
ret: None,
effects: [],
},
],
),
),
),
],
)

View file

@ -0,0 +1,384 @@
---
source: src/parser/parser_snap_tests.rs
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
UseDef(
UseDef(
[
Ident(
"crosmvm",
None,
),
],
Ident(
"std",
None,
),
),
),
EffectDef(
EffectDef(
Ident(
"Vm",
None,
),
[
Ident(
"async",
None,
),
Ident(
"throws",
None,
),
Ident(
"execs",
None,
),
],
Block(
[
Prototype {
name: Ident(
"catch",
None,
),
args: [],
ret: None,
effects: [
Ident(
"throws",
None,
),
],
},
Prototype {
name: Ident(
"await",
Some(
[
Ident(
"T",
None,
),
],
),
),
args: [
Field(
Field(
Ident(
"f",
None,
),
Ident(
"Future",
Some(
[
Ident(
"T",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"T",
None,
),
),
effects: [
Ident(
"async",
None,
),
Ident(
"throws",
None,
),
],
},
Prototype {
name: Ident(
"exec",
None,
),
args: [
Field(
Field(
Ident(
"arg0",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"args",
None,
),
Ident(
"stringvec",
None,
),
),
),
],
ret: Some(
Ident(
"i32",
None,
),
),
effects: [
Ident(
"Vm",
None,
),
],
},
],
),
),
),
StructDef(
StructDef(
Ident(
"coopvm",
None,
),
Block(
[],
),
),
),
ImplDef(
ImplDef(
Ident(
"Vm",
None,
),
Some(
Ident(
"coopvm",
None,
),
),
Block(
[
FnDef(
FnDef(
Prototype {
name: Ident(
"catch",
None,
),
args: [
Reciever,
],
ret: None,
effects: [
Ident(
"throws",
None,
),
],
},
Block(
[],
),
[],
),
),
FnDef(
FnDef(
Prototype {
name: Ident(
"await",
Some(
[
Ident(
"T",
None,
),
],
),
),
args: [
Field(
Field(
Ident(
"f",
None,
),
Ident(
"Future",
Some(
[
Ident(
"T",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"T",
None,
),
),
effects: [
Ident(
"async",
None,
),
Ident(
"trhows",
None,
),
],
},
Block(
[
FnCall(
FnCall(
Ident(
"yield",
None,
),
[],
),
),
],
),
[],
),
),
FnDef(
FnDef(
Prototype {
name: Ident(
"exec",
None,
),
args: [
Reciever,
Field(
Field(
Ident(
"arg0",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"args",
None,
),
Ident(
"vec",
Some(
[
Ident(
"string",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"i32",
None,
),
),
effects: [
Ident(
"Vm",
None,
),
],
},
Block(
[
Branch(
Branch(
FnCall(
FnCall(
Ident(
"self.exec",
None,
),
[
Ident(
Ident(
"arg0",
None,
),
),
Ident(
Ident(
"args",
None,
),
),
],
),
),
[
(
Bool(
true,
),
Block(
[
FnCall(
FnCall(
Ident(
"raise",
None,
),
[],
),
),
],
),
),
],
),
),
],
),
[],
),
),
],
),
),
),
],
)

View file

@ -0,0 +1,492 @@
---
source: src/parser/parser_snap_tests.rs
assertion_line: 110
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
UseDef(
UseDef(
[
Ident(
"exec",
None,
),
],
Ident(
"host",
None,
),
),
),
EffectDef(
EffectDef(
Ident(
"Make",
None,
),
[
Ident(
"async",
None,
),
Ident(
"throws",
None,
),
Ident(
"execs",
None,
),
Ident(
"reads",
None,
),
Ident(
"writes",
None,
),
],
Block(
[
Prototype {
name: Ident(
"catch",
None,
),
args: [],
ret: None,
effects: [
Ident(
"throws",
None,
),
],
},
Prototype {
name: Ident(
"await",
Some(
[
Ident(
"T",
None,
),
],
),
),
args: [
Field(
Field(
Ident(
"f",
None,
),
Ident(
"Future",
Some(
[
Ident(
"T",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"T",
None,
),
),
effects: [
Ident(
"async",
None,
),
Ident(
"throws",
None,
),
],
},
Prototype {
name: Ident(
"exec",
None,
),
args: [
Field(
Field(
Ident(
"arg0",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"args",
None,
),
Ident(
"stringvec",
None,
),
),
),
],
ret: Some(
Ident(
"i32",
None,
),
),
effects: [
Ident(
"Make",
None,
),
],
},
Prototype {
name: Ident(
"read",
None,
),
args: [
Field(
Field(
Ident(
"name",
None,
),
Ident(
"string",
None,
),
),
),
],
ret: Some(
Ident(
"string",
None,
),
),
effects: [
Ident(
"reads",
None,
),
],
},
Prototype {
name: Ident(
"write",
None,
),
args: [
Field(
Field(
Ident(
"name",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"value",
None,
),
Ident(
"string",
None,
),
),
),
],
ret: None,
effects: [
Ident(
"writes",
None,
),
],
},
],
),
),
),
StructDef(
StructDef(
Ident(
"Local",
None,
),
Block(
[
Field(
Ident(
"host",
None,
),
Ident(
"host",
None,
),
),
],
),
),
),
ImplDef(
ImplDef(
Ident(
"Make",
None,
),
Some(
Ident(
"Local",
None,
),
),
Block(
[
FnDef(
FnDef(
Prototype {
name: Ident(
"catch",
None,
),
args: [
Reciever,
],
ret: None,
effects: [
Ident(
"throws",
None,
),
],
},
Block(
[],
),
[],
),
),
FnDef(
FnDef(
Prototype {
name: Ident(
"await",
Some(
[
Ident(
"T",
None,
),
],
),
),
args: [
Field(
Field(
Ident(
"f",
None,
),
Ident(
"Future",
Some(
[
Ident(
"T",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"T",
None,
),
),
effects: [
Ident(
"async",
None,
),
Ident(
"trhows",
None,
),
],
},
Block(
[
FnCall(
FnCall(
Ident(
"yield",
None,
),
[],
),
),
],
),
[],
),
),
FnDef(
FnDef(
Prototype {
name: Ident(
"exec",
None,
),
args: [
Reciever,
Field(
Field(
Ident(
"arg0",
None,
),
Ident(
"string",
None,
),
),
),
Field(
Field(
Ident(
"args",
None,
),
Ident(
"vec",
Some(
[
Ident(
"string",
None,
),
],
),
),
),
),
],
ret: Some(
Ident(
"i32",
None,
),
),
effects: [
Ident(
"Vm",
None,
),
],
},
Block(
[
FnCall(
FnCall(
Ident(
"self.host.read",
None,
),
[
String(
"jobserver",
),
],
),
),
Branch(
Branch(
FnCall(
FnCall(
Ident(
"self.host.exec",
None,
),
[
Ident(
Ident(
"arg0",
None,
),
),
Ident(
Ident(
"args",
None,
),
),
],
),
),
[
(
Bool(
true,
),
Block(
[
FnCall(
FnCall(
Ident(
"raise",
None,
),
[
Integer(
1,
),
],
),
),
],
),
),
],
),
),
],
),
[],
),
),
],
),
),
),
],
)

View file

@ -0,0 +1,53 @@
---
source: src/parser/parser_snap_tests.rs
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
FnDef(
FnDef(
Prototype {
name: Ident(
"some",
None,
),
args: [],
ret: None,
effects: [],
},
Block(
[
Binding(
Binding(
Ident(
"a",
None,
),
FnCall(
FnCall(
Ident(
"some_fnExpr",
None,
),
[
Integer(
1,
),
String(
"2",
),
Integer(
3,
),
],
),
),
),
),
],
),
[],
),
),
],
)

View file

@ -0,0 +1,120 @@
---
source: src/parser/parser_snap_tests.rs
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
FnDef(
FnDef(
Prototype {
name: Ident(
"call",
None,
),
args: [
Field(
Field(
Ident(
"a",
None,
),
Ident(
"b",
None,
),
),
),
Field(
Field(
Ident(
"b",
None,
),
Ident(
"c",
None,
),
),
),
],
ret: None,
effects: [
Ident(
"throws",
None,
),
Ident(
"awaits",
None,
),
Ident(
"execs",
None,
),
],
},
Block(
[
FnCall(
FnCall(
Ident(
"call",
None,
),
[
BinaryExpression(
BinaryOperation {
lhs: Integer(
1,
),
op: Add,
rhs: Integer(
1,
),
},
),
],
),
),
Binding(
Binding(
Ident(
"a",
None,
),
Integer(
1,
),
),
),
],
),
[
(
Ident(
"throws",
None,
),
Block(
[
FnCall(
FnCall(
Ident(
"raise",
None,
),
[
Integer(
1,
),
],
),
),
],
),
),
],
),
),
],
)

View file

@ -0,0 +1,40 @@
---
source: src/parser/parser_snap_tests.rs
expression: "format!(\"{:#?}\", t.unwrap())"
---
Module(
[
StructDef(
StructDef(
Ident(
"VM",
None,
),
Block(
[
Field(
Ident(
"a",
None,
),
Ident(
"string",
None,
),
),
Field(
Ident(
"b",
None,
),
Ident(
"string",
None,
),
),
],
),
),
),
],
)

362
src/parser/src.lalrpop Normal file
View file

@ -0,0 +1,362 @@
use std::str::FromStr;
use crate::parser::string::apply_string_escapes;
use super::ast::*;
use lalrpop_util::{ErrorRecovery, ParseError};
use crate::lexer::{Position, Token, Word, Variable};
use okstd::prelude::*;
#[LALR]
grammar<'input, 'err>(errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, &'static str>>);
extern {
type Location = usize;
enum Token<'input> {
// Operators
"|" => Token::Pipe, // |
"&" => Token::Ampersand, // &
";" => Token::Semicolon, // ;
"=" => Token::Equals, // =
// Redirections
"<" => Token::LessThan, // <
">" => Token::GreaterThan, // >
// Identifiers
// "param" => Variable::Parameter(<&'input str>), // var
// "param_default" => Variable::ParameterDefault(<&'input str>, <&'input str>), // var = value
// "positional_param" => Variable::PositionalParameter(<usize>), // $var
// Literals
"true" => Token::Word(Word::True), // true
"none" => Token::Word(Word::None), // none
"false" => Token::Word(Word::False), // false
"null" => Token::Word(Word::Null), // null
"fn" => Token::Word(Word::Fn), // fn
"if" => Token::Word(Word::If), // if
"else" => Token::Word(Word::Else), // else
"match" => Token::Word(Word::Match), // match
"let" => Token::Word(Word::Let), // let
"import" => Token::Word(Word::Import), // import
"action" => Token::Word(Word::Action), // action
"struct" => Token::Word(Word::Struct), // struct
"enum" => Token::Word(Word::Enum), // enum
"effect" => Token::Word(Word::Effect), // trait
"impl" => Token::Word(Word::Impl), // impl
"when" => Token::Word(Word::When), // when
"use" => Token::Word(Word::Use), // use
"from" => Token::Word(Word::From), // from
"where" => Token::Word(Word::Where), // where
"self" => Token::Word(Word::Self_), // self
"for" => Token::Word(Word::For), // for
"#!" => Token::Shebang, // #!
"ident" => Token::Word(Word::Ident(<&'input str>)), // a-z, A-Z, 0-9, _
"string" => Token::String(<&'input str>), // "..."
// Comments
"comment" => Token::Comment(<&'input str>), // #
// Numbers
"int" => Token::Integer(<i64>), // 0-9
"float" => Token::Float(<f64>), // [0-9]*.0-9+
// Special
"eof" => Token::Eof, // EOF
"\n" => Token::NewLine, // \n
"(" => Token::LeftParen, // (
")" => Token::RightParen, // )
"{" => Token::LeftBrace, // {
"}" => Token::RightBrace, // }
"[" => Token::LeftBracket, // [
"]" => Token::RightBracket, // ]
"," => Token::Comma, // ,
":" => Token::Colon, // :
"." => Token::Dot, // .
"-" => Token::Minus, // -
"+" => Token::Plus, // +
"/" => Token::Divide, // /
"*" => Token::Multiply, // *
"%" => Token::Percent, // %
"$" => Token::Dollar, // $
"!" => Token::Exclamation, // !
"?" => Token::Question, // ?
"~" => Token::Tilde, // ~
"@" => Token::At, // @
"^" => Token::Caret, // ^
"->" => Token::Arrow, // ->
"=>" => Token::FatArrow, // =>
}
}
Spanned<T>: (usize, T, usize) = {
<@L> <T> <@R> => (<>)
};
Path<T>: Vec<Ident> = {
<mut v:("/" <T>)*> <e:T?> => match e {
None => v,
Some(e) => {
v.push(e);
v
}
}
}
Lines<T>: Vec<T> = {
<mut v:(<T> "\n")*> <e:T?> => match e {
None => v,
Some(e) => {
v.push(e);
v
}
}
}
Comma<T>: Vec<T> = { // (0)
<mut v:(<T> ",")*> <e:T?> => match e { // (1)
None=> v,
Some(e) => {
v.push(e);
v
}
}
};
Plus<T>: Vec<T> = {
<mut v:(<T> "+")*> <e:T?> => match e { // (1)
None=> v,
Some(e) => {
v.push(e);
v
}
}
};
None: Expression = "none" => Expression::Keyword(Keyword::None);
When: Expression = "when" => Expression::Keyword(Keyword::When);
Fn: Expression = "fn" => Expression::Keyword(Keyword::Fn);
Let: Expression = "let" => Expression::Keyword(Keyword::Let);
Effect: Expression = "effect" => Expression::Keyword(Keyword::Effect);
Struct: Expression = "struct" => Expression::Keyword(Keyword::Struct);
Impl: Expression = "impl" => Expression::Keyword(Keyword::Impl);
Use: Expression = "use" => Expression::Keyword(Keyword::Use);
From: Expression = "from" => Expression::Keyword(Keyword::From);
Where: Expression = "where" => Expression::Keyword(Keyword::Where);
Self_: Expression = "self" => Expression::Keyword(Keyword::Self_);
Else: Expression = "else" => Expression::Keyword(Keyword::Else);
If: Expression = "if" => Expression::Keyword(Keyword::If);
True: Expression = "true" => Expression::Bool(true);
False: Expression = "false" => Expression::Bool(false);
Consecutive<L, R>: bool = {
Consecutive<L, R> <lhs:Spanned<L>> <rhs:Spanned<R>> => match l.2 == r.0,
L => false
};
Ident: Ident = {
<i:"ident"> => Ident(i.to_string(), None),
};
IdentWithGenerics: Ident = {
<i:"ident"> "<" <g:Comma<Ident>> ">" => Ident(i.to_string(), Some(g)),
};
IdentOrIdentWithGenerics: Ident = {
<i:Ident> => i,
<i:IdentWithGenerics> => i,
};
Punctuated<T, Token>: Vec<T> = {
<mut v:(<T> <Token>)*> <e:T?> => match e {
None => v,
Some(e) => {
v.push(e);
v
}
}
};
Atom: Value = {
#[precedence(level="0")]
<i:"int"> => Value::Literal(Literal::Integer(i)),
<f:"float"> => Value::Literal(Literal::Float(f)),
<s:"string"> => {
let start = 1;
let end = s.len() - 1;
Value::Literal(Literal::String(s.get(start..end).expect(format!("malformed string {s}, strings must be quoted").as_str()).to_string()))
},
#[precedence(level="1")]
<i:Ident> => Value::Ident(i),
};
Expression: Box<Expression> = {
#[precedence(level="1")]
Term,
#[precedence(level="2")] #[assoc(side="left")]
<lhs:Expression> "*" <rhs:Expression> => {
Box::new(Expression::BinaryExpression(BinaryOperation {
lhs,
op: Operator::Mul,
rhs
}))
},
<lhs:Expression> "/" <rhs:Expression> => {
Box::new(Expression::BinaryExpression(BinaryOperation {
lhs,
op: Operator::Div,
rhs
}))
},
#[precedence(level="3")] #[assoc(side="left")]
<lhs:Expression> "+" <rhs:Expression> => {
Box::new(Expression::BinaryExpression(BinaryOperation {
lhs,
op: Operator::Add,
rhs
}))
},
<lhs:Expression> "-" <rhs:Expression> => {
Box::new(Expression::BinaryExpression(BinaryOperation {
lhs,
op: Operator::Sub,
rhs
}))
},
}
Field: Field = {
<name:Ident> ":" <ty:IdentOrIdentWithGenerics> => Field(name, ty)
}
TypeParameters: Vec<Ident> =
"<" <Comma<Ident>> ">";
FnArg: FnArg = {
<self_:Self_> => FnArg::Reciever,
<field:Field> => FnArg::Field(field),
}
Prototype: Prototype = {
<name:IdentOrIdentWithGenerics> "("<args:Comma<FnArg>> ")" "[" <effects:Comma<Ident>> "]" <ret:("->" Ident)?> => {
let ret = match ret {
None => None,
Some(r) => Some(r.1),
};
Prototype{name, args, ret, effects}
}
}
String: Box<Expression> = {
<s:"string"> => {
let start = 1;
let end = s.len() - 1;
Box::new(Expression::String(s.get(start..end).expect(format!("malformed string {s}, strings must be quoted").as_str()).to_string()))
},
}
FnCall: Box<Expression> = {
<name:IdentOrIdentWithGenerics> "(" <args:Comma<Expression>> ")" <i:("[" <Ident> "]")?> => Box::new(Expression::FnCall(FnCall(name, args)))
}
Term: Box<Expression> = {
<s:String> => <>,
<val:"int"> => {
Box::new(Expression::Integer(val))
},
<i:Ident> => {
Box::new(Expression::Ident(i))
},
<f:FnCall> => <>,
<true_:True> => Box::new(<>),
<false_:False> => Box::new(<>),
"(" <Expression> ")",
}
Block<T>: Block<T> = {
"{" ("\n"?) <lines:Lines<T>> "}" => Block(lines)
}
WhenBlock: (Ident,Block<Box<Expression>>) = {
When <i:Ident> <lines:Block<Statement>> => (i, lines)
}
FnDef: Box<Expression> = {
Fn <proto:Prototype> <block:Block<Statement>> <handlers:(WhenBlock)*> => Box::new(Expression::FnDef(FnDef(proto, block, handlers))),
}
EffectDef: Box<Expression> = {
Effect <i:Ident> ":" <effects:Plus<Ident>> <block:Block<Prototype>> => Box::new(Expression::EffectDef(EffectDef(i, effects,block))),
}
ShellComponent: Box<Expression> = {
<s:"string"> => Box::new(Expression::String((s).to_string())),
<i:Ident> => Box::new(Expression::Ident(i)),
}
ShebangLine: Box<Expression> = {
"#!" <path:("/" Ident)*> <components:(ShellComponent)*> => Box::new(Expression::ShellCommand(path.into_iter().map(|i| i.1.into()).collect(), components)),
}
StructDef: Box<Expression> = {
Struct <i:Ident> <fields:Block<Field>> => Box::new(Expression::StructDef(StructDef(i, fields))),
}
IfDef: Box<Expression> = {
If <cond:Expression> <if_:Block<Statement>> => {
let branch = Branch (
cond,
vec![
(Expression::Bool(true), if_),
]
);
Box::new(Expression::Branch(branch))
},
If <cond:Expression> <if_:Block<Statement>> Else <else_:Block<Statement>> => {
let branch = Branch (
cond,
vec![
(Expression::Bool(true), if_),
(Expression::Bool(false), else_),
]
);
Box::new(Expression::Branch(branch))
},
}
Statement: Box<Expression> = {
#[precedence(level="0")]
<FnCall> => <>,
#[precedence(level="1")]
Let <name:Ident> "=" <value:Expression> => Box::new(Expression::Binding(Binding(name, value))),
<IfDef> => <>,
}
UseDef: Box<Expression> = {
Use "{" <imports:Comma<Ident>> "}" From <i:Ident> => Box::new(Expression::UseDef(UseDef(imports, i))),
}
ImplDef: Box<Expression> = {
Impl <i:Ident> <t:("for" Ident)?> <lines:Block<FnDef>> => Box::new(Expression::ImplDef(ImplDef(i, t.map(|t| t.1), lines))),
}
TopLevel: Box<Expression> = {
<FnDef> => <>,
<EffectDef> => <>,
<StructDef> => <>,
<UseDef> => <>,
<ImplDef> => <>,
}
pub Source: Module = {
<expr:("\n"* TopLevel)*> => Module(expr.into_iter().map(|e| e.1).collect()),
! => {
errors.push(<>);
Module(vec![])
}
};

12968
src/parser/src.rs Normal file

File diff suppressed because it is too large Load diff

46
src/parser/string.rs Normal file
View file

@ -0,0 +1,46 @@
use std::str::Chars;
use stringzilla::{sz, StringZilla};
use syn::spanned::Spanned as _;
use crate::lexer::{self, Spanned};
#[derive(Debug, PartialEq)]
pub enum ParseError {
UnexpectedChar(char),
UnterminatedString,
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
ParseError::UnexpectedChar(c) => write!(f, "unexpected character: '{}'", c),
ParseError::UnterminatedString => write!(f, "unterminated string"),
}
}
}
impl std::error::Error for ParseError {}
pub fn apply_string_escapes(content: &str) -> std::borrow::Cow<str> {
let s = syn::LitStr::new(content, content.span());
s.token().to_string().into()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_apply_string_escapes() {
assert_eq!(apply_string_escapes(r#"hello"#), "\"hello\"");
}
#[test]
fn test_apply_string_escapes_with_escaped_quote() {
assert_eq!(apply_string_escapes(r#"hello" world"#), r#""hello\" world""#);
}
#[test]
fn test_apply_string_escapes_with_escaped_backslash() {
assert_eq!(apply_string_escapes(r#"hello\" world"#), r#""hello\\\" world""#);
}
}