From 6f6613419f1511c5637c9f69b3caa5ae838270b9 Mon Sep 17 00:00:00 2001 From: Cody Date: Wed, 7 Jun 2023 03:28:40 -0500 Subject: Moving over from a VM interpreter to natively compiled w/ LLVM --- .gitignore | 1 + Cargo.lock | 240 +++++++++--- Cargo.toml | 7 +- README.md | 4 +- crates/sloth/Cargo.toml | 14 - crates/sloth/src/compiler/mod.rs | 75 ---- crates/sloth/src/compiler/symbol.rs | 79 ---- crates/sloth/src/lexer.rs | 559 ---------------------------- crates/sloth/src/main.rs | 76 ---- crates/sloth/src/parser/ast.rs | 115 ------ crates/sloth/src/parser/expr.rs | 261 ------------- crates/sloth/src/parser/mod.rs | 57 --- crates/sloth/src/parser/stmt.rs | 646 --------------------------------- crates/sloth_asm/Cargo.toml | 8 - crates/sloth_asm/src/lib.rs | 1 - crates/sloth_bytecode/Cargo.toml | 9 - crates/sloth_bytecode/src/lib.rs | 76 ---- crates/sloth_vm/Cargo.toml | 12 - crates/sloth_vm/src/lib.rs | 155 -------- crates/sloth_vm/src/native.rs | 19 - crates/sloth_vm/src/sloth_std/file.rs | 83 ----- crates/sloth_vm/src/sloth_std/misc.rs | 39 -- crates/sloth_vm/src/sloth_std/mod.rs | 43 --- crates/sloth_vm/src/sloth_std/rand.rs | 48 --- crates/sloth_vm/src/sloth_std/stdio.rs | 91 ----- crates/sloth_vm/src/sloth_std/term.rs | 41 --- crates/sloth_vm/src/sloth_std/time.rs | 29 -- crates/sloth_vm/src/value.rs | 53 --- crates/sloth_vm/src/vm.rs | 610 ------------------------------- flake.nix | 10 + sloth/Cargo.toml | 11 + sloth/src/compiler/mod.rs | 131 +++++++ sloth/src/lexer.rs | 559 ++++++++++++++++++++++++++++ sloth/src/main.rs | 43 +++ sloth/src/parser/ast.rs | 115 ++++++ sloth/src/parser/expr.rs | 261 +++++++++++++ sloth/src/parser/mod.rs | 57 +++ sloth/src/parser/stmt.rs | 646 +++++++++++++++++++++++++++++++++ test.c | 11 + test.sloth | 7 + 40 files changed, 2041 insertions(+), 3261 deletions(-) delete mode 100644 crates/sloth/Cargo.toml delete mode 100644 crates/sloth/src/compiler/mod.rs delete mode 100644 crates/sloth/src/compiler/symbol.rs delete mode 100644 crates/sloth/src/lexer.rs delete mode 100644 crates/sloth/src/main.rs delete mode 100644 crates/sloth/src/parser/ast.rs delete mode 100644 crates/sloth/src/parser/expr.rs delete mode 100644 crates/sloth/src/parser/mod.rs delete mode 100644 crates/sloth/src/parser/stmt.rs delete mode 100644 crates/sloth_asm/Cargo.toml delete mode 100644 crates/sloth_asm/src/lib.rs delete mode 100644 crates/sloth_bytecode/Cargo.toml delete mode 100644 crates/sloth_bytecode/src/lib.rs delete mode 100644 crates/sloth_vm/Cargo.toml delete mode 100644 crates/sloth_vm/src/lib.rs delete mode 100644 crates/sloth_vm/src/native.rs delete mode 100644 crates/sloth_vm/src/sloth_std/file.rs delete mode 100644 crates/sloth_vm/src/sloth_std/misc.rs delete mode 100644 crates/sloth_vm/src/sloth_std/mod.rs delete mode 100644 crates/sloth_vm/src/sloth_std/rand.rs delete mode 100644 crates/sloth_vm/src/sloth_std/stdio.rs delete mode 100644 crates/sloth_vm/src/sloth_std/term.rs delete mode 100644 crates/sloth_vm/src/sloth_std/time.rs delete mode 100644 crates/sloth_vm/src/value.rs delete mode 100644 crates/sloth_vm/src/vm.rs create mode 100644 sloth/Cargo.toml create mode 100644 sloth/src/compiler/mod.rs create mode 100644 sloth/src/lexer.rs create mode 100644 sloth/src/main.rs create mode 100644 sloth/src/parser/ast.rs create mode 100644 sloth/src/parser/expr.rs create mode 100644 sloth/src/parser/mod.rs create mode 100644 sloth/src/parser/stmt.rs create mode 100644 test.c create mode 100644 test.sloth diff --git a/.gitignore b/.gitignore index b1b16ef..982e9ba 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules/ # Added by cargo /target +/*.o diff --git a/Cargo.lock b/Cargo.lock index 908146c..7f5d485 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,10 +3,31 @@ version = 3 [[package]] -name = "byteorder" -version = "1.4.3" +name = "aho-corasick" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" @@ -21,14 +42,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] -name = "getrandom" -version = "0.2.9" +name = "inkwell" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "3f4fcb4a4fa0b8f7b4178e24e6317d6f8b95ab500d8e6e1bd4283b6860e369c1" dependencies = [ - "cfg-if", + "either", + "inkwell_internals", "libc", - "wasi", + "llvm-sys", + "once_cell", + "parking_lot", +] + +[[package]] +name = "inkwell_internals" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b185e7d068d6820411502efa14d8fbf010750485399402156b72dd2a548ef8e9" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -40,23 +75,75 @@ dependencies = [ "either", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" -version = "0.2.142" +version = "0.2.146" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" + +[[package]] +name = "llvm-sys" +version = "150.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "58b2ce8adf5b4b7f4652994f522ea2639ad388f6ab6b85b229750decf2782d8a" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver", +] + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] -name = "ppv-lite86" -version = "0.2.17" +name = "parking_lot" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] [[package]] name = "proc-macro2" @@ -77,65 +164,57 @@ dependencies = [ ] [[package]] -name = "rand" -version = "0.8.5" +name = "redox_syscall" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "libc", - "rand_chacha", - "rand_core", + "bitflags", ] [[package]] -name = "rand_chacha" -version = "0.3.1" +name = "regex" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ - "ppv-lite86", - "rand_core", + "aho-corasick", + "memchr", + "regex-syntax", ] [[package]] -name = "rand_core" -version = "0.6.4" +name = "regex-syntax" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] -name = "sloth" -version = "0.1.0" -dependencies = [ - "itertools", - "libc", - "sloth_bytecode", - "sloth_vm", - "thiserror", -] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] -name = "sloth_asm" -version = "0.1.0" +name = "semver" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] -name = "sloth_bytecode" +name = "sloth" version = "0.1.0" dependencies = [ - "byteorder", + "inkwell", + "itertools", + "thiserror", ] [[package]] -name = "sloth_vm" -version = "0.1.0" -dependencies = [ - "once_cell", - "rand", - "sloth_bytecode", -] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "syn" @@ -175,7 +254,58 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/Cargo.toml b/Cargo.toml index 118519e..cd103f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,5 @@ [workspace] -members = [ - "crates/sloth", - "crates/sloth_asm", - "crates/sloth_bytecode", - "crates/sloth_vm", -] +members = [ "sloth" ] [workspace.package] license = "MIT OR Apache-2.0" diff --git a/README.md b/README.md index b7793f3..1f154d2 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ Sloth is an interpreted high level language written in Rust. ## Build -To build sloth is easy, just run `cargo build` and you will have your own version of sloth! +In order to build sloth you will need a valid install of LLVM 15.0.1, you can download LLVM from your package manager or you can download and build LLVM from source from their [downloads page](https://releases.llvm.org/). + +After acquiring LLVM just run `cargo build` and you will have your own version of the sloth compiler! ## Disclaimer Sloth is in very early development is NOT meant to be used for actual projects yet. Feel free to contribute to the project via Pull Request and open issues if you can. Thank you for using sloth! diff --git a/crates/sloth/Cargo.toml b/crates/sloth/Cargo.toml deleted file mode 100644 index 8f49802..0000000 --- a/crates/sloth/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "sloth" - -license.workspace = true -version.workspace = true -edition.workspace = true - -[dependencies] -sloth_vm = { path = "../sloth_vm" } -sloth_bytecode = { path = "../sloth_bytecode" } - -itertools = "0.10.5" -libc = "0.2.142" -thiserror = "1.0.40" diff --git a/crates/sloth/src/compiler/mod.rs b/crates/sloth/src/compiler/mod.rs deleted file mode 100644 index 873f1a2..0000000 --- a/crates/sloth/src/compiler/mod.rs +++ /dev/null @@ -1,75 +0,0 @@ -#![allow(unused)] - -pub mod symbol; - -use std::collections::HashMap; - -use sloth_bytecode::Opcode; - -use self::symbol::{Function, Symbol, SymbolTable, SymbolTableStack, SymbolType}; -use crate::parser::ast::{BinaryOp, Expr, Literal, Stmt, UnaryOp}; - -// Modules: -// Symbols (Functions, Constants) -// -// Functions: -// Symbols (Functions, Variables, Constants) - -pub enum CompilerMode { - Module, - Function, -} - -pub struct Compiler { - symbols: SymbolTableStack, - mode: CompilerMode, -} - -pub struct CompileOrder { - code: Vec, -} - -impl Compiler { - fn new() -> Self { - Self { - symbols: SymbolTableStack::default(), - mode: CompilerMode::Module, - } - } - - fn compile(&mut self, code: Vec) { - let mut queue = Vec::::new(); - - for stmt in code { - match stmt { - Stmt::DefineFunction { - ident, - args, - body, - return_type, - } => { - self.symbols.push_symbol(ident, Symbol { - typ: SymbolType::Function(Function { - arity: args.len() as u8, - returns_value: return_type.is_some(), - }), - }); - - todo!() - } - - _ => panic!("Failed to compile module due to unexpected statement"), - } - } - } - - fn compile_function(&mut self, code: Vec) -> Function { - unimplemented!() - } -} - -pub fn generate_symbols() -> SymbolTable { - let mut table = SymbolTable::default(); - // - todo!() -} diff --git a/crates/sloth/src/compiler/symbol.rs b/crates/sloth/src/compiler/symbol.rs deleted file mode 100644 index b10918f..0000000 --- a/crates/sloth/src/compiler/symbol.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::collections::HashMap; - -pub struct SymbolTableStack { - inner: Vec, -} - -impl SymbolTableStack { - pub fn push_scope(&mut self) { - self.inner.push(SymbolTable::default()); - } - - pub fn pop_scope(&mut self) -> bool { - if self.inner.len() == 1 { - return false; - } - - self.inner.pop(); - true - } - - pub fn get_symbol(&self, identifier: &str) -> Option<&Symbol> { - for table in self.inner.iter().rev() { - if let Some(symbol) = table.get(identifier) { - return Some(symbol); - } - } - - None - } - - pub fn push_symbol(&mut self, identifier: impl Into, symbol: Symbol) { - let table = self - .inner - .last_mut() - .expect("Symbol table stack should always have at least 1 table"); - table.insert(identifier.into(), symbol); - } -} - -impl Default for SymbolTableStack { - fn default() -> Self { - Self { - inner: vec![SymbolTable::default()], - } - } -} - -// x 0x00 -// - x 0x01 -// - y 0x02 -// y 0x01 - -pub type SymbolTable = HashMap; - -pub struct Symbol { - pub typ: SymbolType, -} - -pub enum SymbolType { - Function(Function), - Variable(Variable), - Constant(Constant), -} - -pub struct Function { - pub arity: u8, - pub returns_value: bool, - // TODO: Types -} - -pub struct Variable { - pub idx: u16, - // TODO: Types -} - -pub struct Constant { - pub idx: u16, - // TODO: Types -} diff --git a/crates/sloth/src/lexer.rs b/crates/sloth/src/lexer.rs deleted file mode 100644 index 0afaf1c..0000000 --- a/crates/sloth/src/lexer.rs +++ /dev/null @@ -1,559 +0,0 @@ -#![allow(dead_code)] - -//! TODO: Lexing Regex Literals - -use std::str::Chars; - -use thiserror::Error; - -#[derive(Debug, Clone, PartialEq, Error)] -pub enum LexerError { - #[error("Unexpected token")] - UnexpectedToken, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum TokenType { - // Meta - DocComment, - Comment, - - // Brackets - OpeningParen, // ( - ClosingParen, // ) - OpeningBracket, // [ - ClosingBracket, // ] - OpeningBrace, // { - ClosingBrace, // } - - // Operators - Plus, // + - PlusPlus, // ++ - Minus, // - - Star, // * - StarStar, // ** - Slash, // / - Perc, // % - Tilde, // ~ - - PlusEq, // += - PlusPlusEq, // ++= - MinusEq, // -= - StarEq, // *= - StarStarEq, // **= - SlashEq, // /= - PercEq, // %= - TildeEq, // ~= - - Amp, // & - AmpAmp, // && - Pipe, // | - PipePipe, // || - Caret, // ^ - - Eq, // = - EqEq, // == - Bang, // ! - BangBang, // !! - BangEq, // != - - Lt, // < - LtLt, // << - LtEq, // <= - LtLtEq, // <<= - Gt, // > - GtGt, // >> - GtEq, // >= - GtGtEq, // >>= - - Comma, - - Question, // ? - QuestionDot, // ?. - QuestionQuestion, // ?? - Dot, // . - DotDot, // .. - - Colon, // : - ColonColon, // :: - SemiColon, // ; - - Arrow, // -> - FatArrow, // => - - // Keywords - Val, - Var, - - Fn, - Return, - - If, - Else, - - While, - For, - In, - - Loop, - Break, - Continue, - - As, - - // Literals - Integer(i128), - Float(f64), - Boolean(bool), - Character(char), - String(String), - Regex(String), - - Identifier(String), - - // Utility - Error(LexerError), -} - -#[derive(Debug, Default, Clone, Copy)] -pub struct Location { - index: usize, - pub row: u32, - pub col: u32, -} - -impl Location { - fn advance(&mut self, len: usize, newline: bool) { - if newline { - self.row += 1; - self.col = 0; - } else { - self.col += 1; - } - self.index += len; - } -} - -#[derive(Debug)] -pub struct Token<'a> { - pub tt: TokenType, - pub lexeme: &'a str, - - start: Location, - end: Location, -} - -pub struct Lexer<'a> { - source: &'a [u8], - window: [char; 3], - chars: Chars<'a>, - - start: Location, - current: Location, - - // Keep track if the lexer has encountered an error to stop lexing asap - errored: bool, -} - -impl<'a> Lexer<'a> { - pub(crate) fn new(source: &'a str) -> Self { - let mut chars = source.chars(); - let window = [ - chars.next().unwrap_or('\0'), - chars.next().unwrap_or('\0'), - chars.next().unwrap_or('\0'), - ]; - - Self { - source: source.as_bytes(), - window, - chars, - start: Default::default(), - current: Default::default(), - errored: false, - } - } -} - -impl<'a> Lexer<'a> { - fn pos(&self) -> usize { - self.current.index - } - - fn peek(&self) -> char { - self.window[0] - } - - fn eof(&self) -> bool { - self.peek() == '\0' - } - - fn advance(&mut self) -> char { - let current = self.window[0]; - self.window = [ - self.window[1], - self.window[2], - self.chars.next().unwrap_or('\0'), - ]; - self.current.advance(current.len_utf8(), current == '\n'); - current - } - - fn advance_with(&mut self, with: TokenType) -> TokenType { - self.advance(); - with - } - - fn advance_by(&mut self, amount: usize) { - for _ in 0..amount { - self.advance(); - } - } - - fn advance_by_with(&mut self, amount: usize, with: TokenType) -> TokenType { - self.advance_by(amount); - with - } - - fn advance_while(&mut self, predicate: impl Fn([char; 3]) -> bool) { - while !self.eof() && predicate(self.window) { - self.advance(); - } - } -} - -impl<'a> Lexer<'a> { - fn lex_number(&mut self) -> TokenType { - let mut value = self.advance().to_string(); - - while self.peek().is_ascii_digit() { - value.push(self.advance()); - } - - if self.peek() == '.' { - value.push(self.advance()); - - while self.peek().is_ascii_digit() { - value.push(self.advance()); - } - - TokenType::Float(value.parse::().expect("Expected float")) - } else { - TokenType::Integer(value.parse::().expect("Expected integer")) - } - } - - fn lex_string(&mut self) -> TokenType { - let mut value = String::new(); - - self.advance(); - loop { - match self.window { - ['\\', '"', ..] => { - self.advance_by(2); - value.push('"'); - } - ['\\', 't', ..] => { - self.advance_by(2); - value.push('\t'); - } - ['\\', 'n', ..] => { - self.advance_by(2); - value.push('\n'); - } - ['"', ..] => { - self.advance(); - break; - } - _ => { - value.push(self.advance()); - continue; - } - } - } - - TokenType::String(value) - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Token<'a>; - - fn next(&mut self) -> Option { - // Skipping whitespace - self.advance_while(|it| it[0].is_whitespace()); - self.start = self.current; - - // If were at the end of the file or an error has occurred return nothing - if self.eof() || self.errored { - return None; - } - - // Figuring out the token type - let tt = match self.window { - ['#', '#', ..] => { - self.advance_while(|it| it[0] != '\n'); - // TODO: TokenType::DocComment - return self.next(); - } - - ['#', ..] => { - self.advance_while(|it| it[0] != '\n'); - // TODO: okenType::Comment - return self.next(); - } - - // Blocks - ['(', ..] => self.advance_with(TokenType::OpeningParen), - [')', ..] => self.advance_with(TokenType::ClosingParen), - ['[', ..] => self.advance_with(TokenType::OpeningBracket), - [']', ..] => self.advance_with(TokenType::ClosingBracket), - ['{', ..] => self.advance_with(TokenType::OpeningBrace), - ['}', ..] => self.advance_with(TokenType::ClosingBrace), - - // Operators - ['-', '>', ..] => self.advance_by_with(2, TokenType::Arrow), - ['=', '>', ..] => self.advance_by_with(2, TokenType::FatArrow), - - ['+', '+', '='] => self.advance_by_with(3, TokenType::PlusPlusEq), - ['*', '*', '='] => self.advance_by_with(3, TokenType::StarStarEq), - ['+', '+', ..] => self.advance_by_with(2, TokenType::PlusPlus), - ['*', '*', ..] => self.advance_by_with(2, TokenType::StarStar), - - ['+', '=', ..] => self.advance_by_with(2, TokenType::PlusEq), - ['-', '=', ..] => self.advance_by_with(2, TokenType::MinusEq), - ['*', '=', ..] => self.advance_by_with(2, TokenType::StarEq), - ['/', '=', ..] => self.advance_by_with(2, TokenType::SlashEq), - ['%', '=', ..] => self.advance_by_with(2, TokenType::PercEq), - ['~', '=', ..] => self.advance_by_with(2, TokenType::TildeEq), - - ['+', ..] => self.advance_with(TokenType::Plus), - ['-', ..] => self.advance_with(TokenType::Minus), - ['*', ..] => self.advance_with(TokenType::Star), - ['/', ..] => self.advance_with(TokenType::Slash), // TODO: Check for regex literals - ['%', ..] => self.advance_with(TokenType::Perc), - ['~', ..] => self.advance_with(TokenType::Tilde), - - ['&', '&', ..] => self.advance_by_with(2, TokenType::AmpAmp), - ['&', ..] => self.advance_with(TokenType::Amp), - - ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe), - ['|', ..] => self.advance_with(TokenType::Pipe), - - ['^', ..] => self.advance_by_with(2, TokenType::Caret), - - ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq), - ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq), - ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang), - ['=', ..] => self.advance_with(TokenType::Eq), - ['!', ..] => self.advance_with(TokenType::Bang), - - ['<', '<', '='] => self.advance_by_with(3, TokenType::LtLtEq), - ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt), - ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq), - ['<', ..] => self.advance_with(TokenType::Lt), - - ['>', '>', '='] => self.advance_by_with(3, TokenType::GtGtEq), - ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt), - ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq), - ['>', ..] => self.advance_with(TokenType::Gt), - - [',', ..] => self.advance_with(TokenType::Comma), - - ['.', '.', ..] => self.advance_by_with(2, TokenType::DotDot), - ['.', ..] => self.advance_with(TokenType::Dot), - ['?', '?', ..] => self.advance_by_with(2, TokenType::QuestionQuestion), - ['?', '.', ..] => self.advance_by_with(2, TokenType::QuestionDot), - ['?', ..] => self.advance_with(TokenType::Question), - - [';', ..] => self.advance_with(TokenType::SemiColon), - [':', ':', ..] => self.advance_by_with(2, TokenType::ColonColon), - [':', ..] => self.advance_with(TokenType::Colon), - - // Literals - ['\'', c, '\''] => self.advance_by_with(3, TokenType::Character(c)), - ['0'..='9', ..] => self.lex_number(), - ['"', ..] => self.lex_string(), - - ['a'..='z' | 'A'..='Z' | '_' | '$', ..] => { - let mut value = String::new(); - while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$') { - value.push(self.advance()); - } - - match value.as_str() { - "val" => TokenType::Val, - "var" => TokenType::Var, - "fn" => TokenType::Fn, - "return" => TokenType::Return, - "if" => TokenType::If, - "else" => TokenType::Else, - "while" => TokenType::While, - "for" => TokenType::For, - "in" => TokenType::In, - "loop" => TokenType::Loop, - "break" => TokenType::Break, - "continue" => TokenType::Continue, - "as" => TokenType::As, - "true" => TokenType::Boolean(true), - "false" => TokenType::Boolean(false), - _ => TokenType::Identifier(value), - } - } - - _ => { - self.errored = true; - TokenType::Error(LexerError::UnexpectedToken) - } - }; - - let lexeme = unsafe { - // At this point it is already known that the string is valid UTF-8, might - // aswell not check again - std::str::from_utf8_unchecked(&self.source[self.start.index..self.pos()]) - }; - - let token = Token { - tt, - lexeme, - start: self.start, - end: self.current, - }; - - Some(token) - } -} - -#[cfg(test)] -mod tests { - use itertools::Itertools; - - use super::{Lexer, TokenType}; - use crate::lexer::LexerError; - - #[test] - fn lex_operators() { - let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || ^ = == ! !! != < << \ - <<= <= > >> >>= >= , ? ?. ?? . .. : :: ; -> =>"; - let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); - - assert_eq!(&tokens, &[ - TokenType::Plus, - TokenType::PlusPlus, - TokenType::Minus, - TokenType::Star, - TokenType::StarStar, - TokenType::Slash, - TokenType::Perc, - TokenType::Tilde, - TokenType::PlusEq, - TokenType::PlusPlusEq, - TokenType::MinusEq, - TokenType::StarEq, - TokenType::StarStarEq, - TokenType::SlashEq, - TokenType::PercEq, - TokenType::TildeEq, - TokenType::Amp, - TokenType::AmpAmp, - TokenType::Pipe, - TokenType::PipePipe, - TokenType::Caret, - TokenType::Eq, - TokenType::EqEq, - TokenType::Bang, - TokenType::BangBang, - TokenType::BangEq, - TokenType::Lt, - TokenType::LtLt, - TokenType::LtLtEq, - TokenType::LtEq, - TokenType::Gt, - TokenType::GtGt, - TokenType::GtGtEq, - TokenType::GtEq, - TokenType::Comma, - TokenType::Question, - TokenType::QuestionDot, - TokenType::QuestionQuestion, - TokenType::Dot, - TokenType::DotDot, - TokenType::Colon, - TokenType::ColonColon, - TokenType::SemiColon, - TokenType::Arrow, - TokenType::FatArrow, - ]); - } - - #[test] - fn lex_keywords() { - let source = "val var fn if else while for in loop break continue as true false"; - let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); - - assert_eq!(&tokens, &[ - TokenType::Val, - TokenType::Var, - TokenType::Fn, - TokenType::If, - TokenType::Else, - TokenType::While, - TokenType::For, - TokenType::In, - TokenType::Loop, - TokenType::Break, - TokenType::Continue, - TokenType::As, - TokenType::Boolean(true), - TokenType::Boolean(false), - ]); - } - - #[test] - fn lex_literals_a() { - let source = "foo bar _foo __bar $0 $$1 \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" \ - 'a' 'b' '\"' 93 3252 238 -382 -832 83 -25 52.9 83.7 12.4 35.2 3.3"; - let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); - - assert_eq!(&tokens, &[ - TokenType::Identifier("foo".to_owned()), - TokenType::Identifier("bar".to_owned()), - TokenType::Identifier("_foo".to_owned()), - TokenType::Identifier("__bar".to_owned()), - TokenType::Identifier("$0".to_owned()), - TokenType::Identifier("$$1".to_owned()), - TokenType::String("foo".to_owned()), - TokenType::String("bar".to_owned()), - TokenType::String("baz".to_owned()), - TokenType::String("\"".to_owned()), - TokenType::String("\n".to_owned()), - TokenType::String("\t".to_owned()), - TokenType::Character('a'), - TokenType::Character('b'), - TokenType::Character('"'), - TokenType::Integer(93), - TokenType::Integer(3252), - TokenType::Integer(238), - TokenType::Minus, - TokenType::Integer(382), - TokenType::Minus, - TokenType::Integer(832), - TokenType::Integer(83), - TokenType::Minus, - TokenType::Integer(25), - TokenType::Float(52.9), - TokenType::Float(83.7), - TokenType::Float(12.4), - TokenType::Float(35.2), - TokenType::Float(3.3), - ]); - } - - #[test] - fn lex_errors() { - let source = "`"; - let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); - - assert_eq!(&tokens, &[TokenType::Error(LexerError::UnexpectedToken)]); - } -} diff --git a/crates/sloth/src/main.rs b/crates/sloth/src/main.rs deleted file mode 100644 index b770684..0000000 --- a/crates/sloth/src/main.rs +++ /dev/null @@ -1,76 +0,0 @@ -#![warn( - clippy::wildcard_imports, - clippy::string_add, - clippy::string_add_assign, - clippy::manual_ok_or, - unused_lifetimes -)] - -pub mod compiler; -pub mod lexer; -pub mod parser; - -use std::collections::HashMap; - -// use std::{env, fs}; -use itertools::Itertools; -use lexer::Lexer; -use parser::AstParser; -use sloth_vm::value::Function; -use sloth_vm::{ObjectMap, VM}; - -fn main() { - // let args = env::args().collect_vec(); - // - // if args.len() < 2 { - // println!("Sloth programming language interpreter\n"); - // println!("Usage: sloth "); - // return; - // } - // - // let source_path = &args[1]; - // let Ok(source) = fs::read_to_string(source_path) else { - // println!("Error while reading '{source_path}'"); - // return; - // }; - // let source = " 3 + 7 ;"; - // let source = r#" - - // fn hello() -> int { - // return 3 + 7; - // } - - // hello(); - // hello(); - // hello(); - // hello(); - // hello(); - // hello(); - - // "#; - let source = r#" - - fn hello() -> int { - var x = 5; - x = 7; - return x; - } - - hello(); - - "#; - - let tokens = Lexer::new(source).collect_vec(); - let ast = AstParser::new(tokens).parse(); - let mut object_map = ObjectMap::default(); - // let code = Compiler::compile(&mut object_map, HashMap::default(), - // ast.clone()); - - // println!("{ast:?}\n\n"); - // println!("{:?}\n\n", code.constants); - // println!("{:?}\n\n", code.code); - // - // let mut vm = VM::new(object_map, Function::root(code)); - // vm.run(); - // println!("{:?}", vm.stack.peek()); -} diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs deleted file mode 100644 index 543ea3a..0000000 --- a/crates/sloth/src/parser/ast.rs +++ /dev/null @@ -1,115 +0,0 @@ -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum BinaryOp { - Add, - Con, - Sub, - Mul, - Pow, - Div, - Mod, - - BWSftRight, - BWSftLeft, - BWAnd, - BWOr, - BWXor, - - Lt, - Gt, - LtEq, - GtEq, - EqEq, - NotEq, - LogAnd, - LogOr, - Range, -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum UnaryOp { - Not, - Neg, - - BWComp, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Literal { - Integer(i128), - Float(f64), - Bool(bool), - Char(char), - String(String), - Regex(String), - List(Vec), -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Expr { - Grouping(Box), - BinaryOp { - op: BinaryOp, - lhs: Box, - rhs: Box, - }, - UnaryOp { - op: UnaryOp, - value: Box, - }, - Call { - ident: Box, - args: Vec, - }, - Variable(String), - Literal(Literal), - Lambda, // TODO: Lambda -} - -#[derive(PartialEq, Clone, Debug)] -pub struct FuncArgs { - pub name: String, - pub typ: Option, -} - -#[derive(PartialEq, Clone, Debug)] -pub enum Stmt { - ExprStmt(Expr), - DefineFunction { - ident: String, - args: Vec, - body: Vec, - return_type: Option, - }, - DefineVariable { - name: String, - value: Expr, - typ: Option, - }, - DefineValue { - name: String, - value: Expr, - typ: Option, - }, - AssignVariable { - name: String, - value: Expr, - }, - If { - expr: Expr, - body: Vec, - else_if: Vec<(Expr, Stmt)>, - els: Option>, - }, - For { - name: String, - iter: Expr, - body: Vec, - }, - While { - condition: Expr, - body: Vec, - }, - Return { - value: Expr, - }, -} diff --git a/crates/sloth/src/parser/expr.rs b/crates/sloth/src/parser/expr.rs deleted file mode 100644 index 9e81f7f..0000000 --- a/crates/sloth/src/parser/expr.rs +++ /dev/null @@ -1,261 +0,0 @@ -use super::ast::{BinaryOp, Expr, Literal, UnaryOp}; -use super::AstParser; -use crate::lexer::TokenType; - -/// Implementation containing parsers internal components related to expressions -impl<'a> AstParser<'a> { - // FIXME: Should probably avoid cloning token types - - pub fn expression(&mut self) -> Expr { - self.logical_or() - } - - fn unary(&mut self) -> Expr { - if !self.eof() - && matches!( - self.peek().tt, - TokenType::Bang | TokenType::Plus | TokenType::Minus - ) - { - let operator = match self.advance().unwrap().tt.clone() { - TokenType::Bang => UnaryOp::Not, - TokenType::Tilde => UnaryOp::BWComp, - TokenType::Minus => UnaryOp::Neg, - _ => panic!(), - }; - - let rhs = self.unary(); - return Expr::UnaryOp { - op: (operator), - value: (Box::new(rhs)), - }; - } - - self.call() - } - - fn call(&mut self) -> Expr { - let mut expr = self.primary(); - - if self.advance_if_eq(&TokenType::OpeningParen) { - let mut arguments = Vec::::new(); - - if self.peek().tt != TokenType::ClosingParen { - loop { - arguments.push(self.expression()); - if !self.advance_if_eq(&TokenType::Comma) { - break; - } - } - } - - self.consume( - TokenType::ClosingParen, - "Expected ')' to close off function call", - ); - - // let Expr::Variable(_ident) = expr else { panic!("uh oh spaghettio"); }; - - expr = Expr::Call { - ident: (Box::new(expr)), - args: (arguments), - } - } - - expr - } - - fn primary(&mut self) -> Expr { - match self.advance().unwrap().tt.clone() { - TokenType::Integer(literal) => Expr::Literal(Literal::Integer(literal)), - TokenType::Float(literal) => Expr::Literal(Literal::Float(literal)), - TokenType::Boolean(literal) => Expr::Literal(Literal::Bool(literal)), - TokenType::Character(literal) => Expr::Literal(Literal::Char(literal)), - TokenType::String(literal) => Expr::Literal(Literal::String(literal)), - TokenType::Regex(literal) => Expr::Literal(Literal::Regex(literal)), - TokenType::Identifier(ident) => Expr::Variable(ident), - TokenType::OpeningParen => { - let expr = self.expression(); - self.consume(TokenType::ClosingParen, "Must end expression with ')'"); - Expr::Grouping(Box::new(expr)) - } - TokenType::OpeningBracket => { - let mut expr: Vec = Vec::new(); - - while !self.eof() && self.peek().tt != TokenType::ClosingBracket { - let exp = self.expression(); - expr.push(exp); - - self.advance_if_eq(&TokenType::Comma); - } - self.consume(TokenType::ClosingBracket, "Expected ']' at end of list"); - Expr::Literal(Literal::List(expr)) - } - _ => unimplemented!("{:?}", self.peek()), - } - } -} - -// Macro to generate repetitive binary expressions. Things like addition, -// multiplication, exc. -macro_rules! binary_expr { - ($name:ident, $parent:ident, $pattern:pat) => { - fn $name(&mut self) -> Expr { - let mut expr = self.$parent(); - - while !self.eof() && matches!(self.peek().tt, $pattern) { - let operator = match self.advance().unwrap().tt.clone() { - TokenType::Plus => BinaryOp::Add, - TokenType::PlusPlus => BinaryOp::Con, - TokenType::Minus => BinaryOp::Sub, - TokenType::Star => BinaryOp::Mul, - TokenType::StarStar => BinaryOp::Pow, - TokenType::Slash => BinaryOp::Div, - TokenType::Perc => BinaryOp::Mod, - TokenType::DotDot => BinaryOp::Range, - - TokenType::LtLt => BinaryOp::BWSftRight, - TokenType::GtGt => BinaryOp::BWSftLeft, - TokenType::Amp => BinaryOp::BWAnd, - TokenType::Pipe => BinaryOp::BWOr, - TokenType::Caret => BinaryOp::BWXor, - - TokenType::Lt => BinaryOp::Lt, - TokenType::Gt => BinaryOp::Gt, - TokenType::LtEq => BinaryOp::LtEq, - TokenType::GtEq => BinaryOp::GtEq, - TokenType::EqEq => BinaryOp::EqEq, - TokenType::BangEq => BinaryOp::NotEq, - TokenType::AmpAmp => BinaryOp::LogAnd, - TokenType::PipePipe => BinaryOp::LogOr, - _ => panic!("uh oh spagghetio"), - }; - - let rhs = self.$parent(); - expr = Expr::BinaryOp { - op: (operator), - lhs: (Box::new(expr)), - rhs: (Box::new(rhs)), - } - } - - expr - } - }; -} - -#[rustfmt::skip] -#[allow(unused_parens)] -impl<'a> AstParser<'a> { - // Binary expressions in order of precedence from lowest to highest. - binary_expr!(logical_or , logical_and , (TokenType::PipePipe)); - binary_expr!(logical_and , range , (TokenType::AmpAmp)); - binary_expr!(range , equality , (TokenType::DotDot)); - binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq)); - binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq)); - binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt)); - binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus)); - binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc)); -} - -#[cfg(test)] -mod tests { - use itertools::Itertools; - - use super::{AstParser, BinaryOp, Expr, Literal}; - use crate::lexer::Lexer; - use crate::parser::ast::UnaryOp; - - #[test] - fn basic_expression_a() { - let lexer = Lexer::new("3 + 5 * 4"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::BinaryOp { - op: BinaryOp::Add, - lhs: Box::new(Expr::Literal(Literal::Integer(3))), - rhs: Box::new(Expr::BinaryOp { - op: BinaryOp::Mul, - lhs: Box::new(Expr::Literal(Literal::Integer(5))), - rhs: Box::new(Expr::Literal(Literal::Integer(4))), - }), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_expression_b() { - let lexer = Lexer::new("17 - (-5 + 5) / 6"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::BinaryOp { - op: BinaryOp::Sub, - lhs: Box::new(Expr::Literal(Literal::Integer(17))), - rhs: Box::new(Expr::BinaryOp { - op: BinaryOp::Div, - lhs: Box::new(Expr::Grouping(Box::new(Expr::BinaryOp { - op: BinaryOp::Add, - lhs: Box::new(Expr::UnaryOp { - op: UnaryOp::Neg, - value: Box::new(Expr::Literal(Literal::Integer(5))), - }), - rhs: Box::new(Expr::Literal(Literal::Integer(5))), - }))), - rhs: Box::new(Expr::Literal(Literal::Integer(6))), - }), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - #[test] - fn basic_expression_c() { - let lexer = Lexer::new("[1, 2, 3]"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::Literal(Literal::List(vec![ - Expr::Literal(Literal::Integer(1)), - Expr::Literal(Literal::Integer(2)), - Expr::Literal(Literal::Integer(3)), - ])); - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - #[test] - fn basic_expression_d() { - let lexer = Lexer::new("1 .. 17"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::BinaryOp { - op: (BinaryOp::Range), - lhs: (Box::new(Expr::Literal(Literal::Integer(1)))), - rhs: (Box::new(Expr::Literal(Literal::Integer(17)))), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } -} diff --git a/crates/sloth/src/parser/mod.rs b/crates/sloth/src/parser/mod.rs deleted file mode 100644 index 9d77acc..0000000 --- a/crates/sloth/src/parser/mod.rs +++ /dev/null @@ -1,57 +0,0 @@ -pub mod ast; -pub mod expr; -pub mod stmt; - -use crate::lexer::{Token, TokenType}; -#[derive(Debug)] -pub struct AstParser<'a> { - tokens: Vec>, - index: usize, -} - -/// Implementation containing utilities used by the parsers internal components -impl<'a> AstParser<'a> { - pub fn new(tokens: Vec>) -> Self { - Self { tokens, index: 0 } - } - pub fn peek(&self) -> &Token { - &self.tokens[self.index] - } - - pub fn advance(&mut self) -> Option<&Token> { - if self.eof() { - return None; - } - - self.index += 1; - Some(&self.tokens[self.index - 1]) - } - - pub fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool { - if self.eof() { - return false; - } - - if next(self.peek()) { - self.advance(); - return true; - } - - false - } - - pub fn advance_if_eq(&mut self, next: &TokenType) -> bool { - self.advance_if(|it| it.tt == *next) - } - - pub fn consume(&mut self, next: TokenType, error: &str) { - if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) { - panic!("{error} at index {:?}", self.index); - } - self.advance(); - } - - pub fn eof(&self) -> bool { - self.index >= self.tokens.len() - } -} diff --git a/crates/sloth/src/parser/stmt.rs b/crates/sloth/src/parser/stmt.rs deleted file mode 100644 index 1a961b1..0000000 --- a/crates/sloth/src/parser/stmt.rs +++ /dev/null @@ -1,646 +0,0 @@ -use super::ast::{Expr, FuncArgs, Stmt}; -use super::AstParser; -use crate::lexer::TokenType; - -impl<'a> AstParser<'a> { - pub fn parse(&mut self) -> Vec { - let mut statements = Vec::new(); - - while !self.eof() { - statements.push(self.statement()); - } - - statements - } - - fn statement(&mut self) -> Stmt { - if self.advance_if_eq(&TokenType::Var) { - return self.var_statement(); - } - - if self.advance_if_eq(&TokenType::Val) { - return self.val_statement(); - } - - if self.advance_if_eq(&TokenType::If) { - return self.if_statement(); - } - - if self.advance_if_eq(&TokenType::For) { - return self.for_statement(); - } - - if self.advance_if_eq(&TokenType::While) { - return self.while_statement(); - } - - if self.advance_if_eq(&TokenType::Fn) { - return self.function_statement(); - } - - if self.advance_if_eq(&TokenType::Return) { - return self.return_statement(); - } - - self.mut_statement() - - // If we couldn't parse a statement return an expression statement - // self.expression_statement() - } - - fn mut_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.peek().tt.clone() else { - panic!("Identifier error {:?}", self.peek()); - }; - - self.advance(); - let next = self.advance().unwrap().tt.clone(); - if next == TokenType::Eq { - let value = self.expression(); - self.consume(TokenType::SemiColon, "No semi colon for me i guess"); - return Stmt::AssignVariable { - name: (ident), - value: (value), - }; - } else if next == TokenType::OpeningParen { - let mut arguments = Vec::::new(); - - if self.peek().tt != TokenType::ClosingParen { - loop { - arguments.push(self.expression()); - if !self.advance_if_eq(&TokenType::Comma) { - break; - } - } - } - - self.consume( - TokenType::ClosingParen, - "Expected ')' to close off function call", - ); - - self.consume(TokenType::SemiColon, "No semi colon for me i guess"); - return Stmt::ExprStmt(Expr::Call { - ident: Box::new(Expr::Variable(ident)), - args: (arguments), - }); - } - self.expression_statement() - } - - fn var_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.peek().tt.clone() else { - panic!("Identifier expected after 'var', not {:?}", self.peek()); - }; - - self.advance(); - - let mut typ: Option = None; - if self.peek().tt.clone() == TokenType::Colon { - self.consume(TokenType::Colon, "How did you even get this error?"); - let TokenType::Identifier(name) = self.peek().tt.clone() else { - panic!("Type expected after identifier, not {:?}", self.peek()); - }; - self.advance(); - typ = Some(name); - } - - self.consume(TokenType::Eq, "Expected '=' after identifier at "); - - let value = self.expression(); - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - - Stmt::DefineVariable { - name: (ident), - value: (value), - typ: (typ), - } - } - - fn val_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.peek().tt.clone() else { - panic!("Identifier expected after 'val'"); - }; - - self.advance(); // Advancing from the identifier - - let mut typ: Option = None; - if self.peek().tt.clone() == TokenType::Colon { - self.consume(TokenType::Colon, "How did you even get this error?"); - let TokenType::Identifier(name) = self.peek().tt.clone() else { - panic!("Type expected after identifier, not {:?}", self.peek()); - }; - self.advance(); - typ = Some(name); - } - - self.consume(TokenType::Eq, "Expected '=' after identifier"); - - let value = self.expression(); - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - - Stmt::DefineValue { - name: (ident), - value: (value), - typ: (typ), - } - } - - fn if_statement(&mut self) -> Stmt { - let condition = self.expression(); - - self.consume( - TokenType::OpeningBrace, - "Expected '{' at beggining of block", - ); - let mut body = Vec::new(); - while !self.eof() && self.peek().tt != TokenType::ClosingBrace { - body.push(self.statement()); - } - self.advance(); - Stmt::If { - expr: (condition), - body: (body), - else_if: (Vec::new()), - els: (None), - } // TODO: implement else if and else - } - - fn for_statement(&mut self) -> Stmt { - let binding = self.expression(); - let Expr::Variable(binding) = binding else { - panic!("Left side of for statement must be identifier"); - }; - - self.consume( - TokenType::In, - "Expected 'in' in between identifier and range", - ); - - // let range_start = self.expression(); - // self.consume( - // TokenType::DotDot, - // "Expected '..' denoting min and max of range", - // ); - // let range_end = self.expression(); - - let expr = self.expression(); - - self.consume(TokenType::OpeningBrace, "Expected '{' after iterator"); - - let mut body = Vec::new(); - while !self.eof() && self.peek().tt != TokenType::ClosingBrace { - body.push(self.statement()); - } - self.advance(); - - Stmt::For { - name: (binding), - iter: (expr), - body: (body), - } - } // TODO: Fix this garbage - - fn while_statement(&mut self) -> Stmt { - let condition = self.expression(); - - self.consume( - TokenType::OpeningBrace, - "Expected '{' at beggining of block", - ); - let mut body = Vec::new(); - while !self.eof() && self.peek().tt != TokenType::ClosingBrace { - println!("{:?}", self.peek().tt); - body.push(self.statement()); - } - self.consume( - TokenType::ClosingBrace, - "Expected '}' after block on while loop", - ); - - self.advance(); - Stmt::While { condition, body } - } - - fn expression_statement(&mut self) -> Stmt { - let expr = self.expression(); - - // FIXME: Move assignment handling - // if self.advance_if_eq(&TokenType::Eq) { - // if let Expr::Literal(_ident) = &expr { - // let value = self.expression(); - - // self.consume( - // TokenType::SemiColon, - // "Expected ';' at end of - // statement", - // ); // return Stmt::DefineVariable { - // // name: (ident.clone()), - // // value: (value), - // // typ: (None), - // // }; - // return Stmt::ExprStmt(expr); - // } - // } - - self.consume( - TokenType::SemiColon, - "Expected ';' at end of expr statement", - ); - Stmt::ExprStmt(expr) - } - - fn function_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.advance().unwrap().tt.clone() else { - panic!("Identifier expected after 'fn'"); - }; - - self.consume(TokenType::OpeningParen, "Expected '(' after identifier"); - let mut args: Vec = Vec::new(); - while !self.eof() && self.peek().tt != TokenType::ClosingParen { - let TokenType::Identifier(name) = self.advance().unwrap().tt.clone() else { - panic!("parameter expected after '('"); - }; - - let mut typ: Option = None; - - if self.peek().tt.clone() == TokenType::Colon { - self.consume(TokenType::Colon, "How did you even get this error?"); - let TokenType::Identifier(name) = self.peek().tt.clone() else { - panic!("Type expected after ':', not {:?}", self.peek()); - }; - self.advance(); - typ = Some(name); - } - - self.advance_if_eq(&TokenType::Comma); - - let arg = FuncArgs { - name: (name), - typ: (typ), - }; - args.push(arg); - } - self.advance(); - let mut typ: Option = None; - if self.peek().tt.clone() == TokenType::Arrow { - self.advance(); - let TokenType::Identifier(name) = self.peek().tt.clone() else { - panic!("Type expected after ':', not {:?}", self.peek()); - }; - typ = Some(name); - self.advance(); - } - self.consume(TokenType::OpeningBrace, "Expected '{' after parameters"); - let mut body = Vec::new(); - while !self.eof() && self.peek().tt != TokenType::ClosingBrace { - body.push(self.statement()); - } - self.consume(TokenType::ClosingBrace, "Expected '}' after body"); - - Stmt::DefineFunction { - ident: (ident), - args: (args), - body: (body), - return_type: (typ), - } - } - - fn return_statement(&mut self) -> Stmt { - let expr = self.expression(); - self.consume(TokenType::SemiColon, "Expected ';' after return statement"); - Stmt::Return { value: (expr) } - } -} - -#[cfg(test)] -mod tests { - use itertools::Itertools; - - use super::{AstParser, Stmt}; - use crate::lexer::Lexer; - use crate::parser::ast::{BinaryOp, Expr, FuncArgs, Literal, UnaryOp}; - - #[test] - fn basic_statement_a() { - let lexer = Lexer::new("var test_a: int = 5 + 3;"); - let tokens = lexer.collect_vec(); - - let expected_ast = Stmt::DefineVariable { - name: ("test_a".to_string()), - value: (Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), - }), - typ: Some("int".to_string()), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_statement_b() { - let lexer = Lexer::new("val test_b = \"Hello World\";"); - let tokens = lexer.collect_vec(); - - let expected_ast = Stmt::DefineValue { - name: ("test_b".to_string()), - value: (Expr::Literal(Literal::String("Hello World".to_string()))), - typ: (None), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_statement_c() { - let lexer = Lexer::new( - "\ - fn test_c (a, b, c) {\nreturn (a + b * c);\n}", - ); - let tokens = lexer.collect_vec(); - println!("{tokens:?}"); - - let expected_ast = Stmt::DefineFunction { - ident: ("test_c".to_string()), - args: (vec![ - FuncArgs { - name: ("a".to_string()), - typ: None, - }, - FuncArgs { - name: ("b".to_string()), - typ: None, - }, - FuncArgs { - name: ("c".to_string()), - typ: None, - }, - ]), - body: (vec![Stmt::Return { - value: (Expr::Grouping(Box::new(Expr::BinaryOp { - op: BinaryOp::Add, - lhs: Box::new(Expr::Variable("a".to_string())), - rhs: Box::new(Expr::BinaryOp { - op: BinaryOp::Mul, - lhs: Box::new(Expr::Variable("b".to_string())), - rhs: Box::new(Expr::Variable("c".to_string())), - }), - }))), - }]), - return_type: (None), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - #[test] - fn basic_statement_d() { - let lexer = Lexer::new( - "\ - while true {\nprint(\"Hello World\");\nprintln(5 + 7/-3);\n}", - ); - let tokens = lexer.collect_vec(); - println!("{tokens:?}"); - - let expected_ast = Stmt::While { - condition: (Expr::Literal(Literal::Bool(true))), - body: (vec![ - Stmt::ExprStmt(Expr::Call { - ident: Box::new(Expr::Variable("print".to_string())), - args: (vec![Expr::Literal(Literal::String("Hello World".to_string()))]), - }), - Stmt::ExprStmt(Expr::Call { - ident: Box::new(Expr::Variable("println".to_string())), - args: (vec![Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - rhs: (Box::new(Expr::BinaryOp { - op: (BinaryOp::Div), - lhs: (Box::new(Expr::Literal(Literal::Integer(7)))), - rhs: (Box::new(Expr::UnaryOp { - op: (UnaryOp::Neg), - value: (Box::new(Expr::Literal(Literal::Integer(3)))), - })), - })), - }]), - }), - ]), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - #[test] - fn basic_statement_e() { - let lexer = Lexer::new( - "\ - if a+5 > 10 {\nprint(a);\n}\nif a+5 < 10 {\nprintln(10);\n}\nif a+5 == 10 \ - {\nprint(toString(10));\na = true;\n}", - ); - let tokens = lexer.collect_vec(); - // println!("{tokens:?}"); - - let expected_ast = vec![ - Stmt::If { - expr: (Expr::BinaryOp { - op: (BinaryOp::Gt), - lhs: (Box::new(Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Variable("a".to_string()))), - rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - })), - rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), - }), - body: (vec![Stmt::ExprStmt(Expr::Call { - ident: (Box::new(Expr::Variable("print".to_string()))), - args: (vec![Expr::Variable("a".to_string())]), - })]), - else_if: (Vec::new()), - els: (None), - }, - Stmt::If { - expr: (Expr::BinaryOp { - op: (BinaryOp::Lt), - lhs: (Box::new(Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Variable("a".to_string()))), - rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - })), - rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), - }), - body: (vec![Stmt::ExprStmt(Expr::Call { - ident: (Box::new(Expr::Variable("println".to_string()))), - args: (vec![Expr::Literal(Literal::Integer(10))]), - })]), - else_if: (Vec::new()), - els: (None), - }, - Stmt::If { - expr: (Expr::BinaryOp { - op: (BinaryOp::EqEq), - lhs: (Box::new(Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Variable("a".to_string()))), - rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - })), - rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), - }), - body: (vec![ - Stmt::ExprStmt(Expr::Call { - ident: (Box::new(Expr::Variable("print".to_string()))), - // ident: (Box::new(Expr::Literal(Literal::String("print".to_string())))), - args: (vec![Expr::Call { - ident: (Box::new(Expr::Variable("toString".to_string()))), - // ident: Box::new(Expr::Literal(Literal::String("toString". - // to_string()))), - args: vec![Expr::Literal(Literal::Integer(10))], - }]), - }), - Stmt::AssignVariable { - name: ("a".to_string()), - value: (Expr::Literal(Literal::Bool(true))), - }, - ]), - - else_if: (Vec::new()), - els: (None), - }, - ]; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.parse(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_statement_f() { - let lexer = Lexer::new("test_a = 5 + 3;"); - let tokens = lexer.collect_vec(); - - let expected_ast = Stmt::AssignVariable { - name: ("test_a".to_string()), - value: (Expr::BinaryOp { - op: (BinaryOp::Add), - lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), - rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), - }), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - #[test] - fn basic_statement_g() { - let lexer = Lexer::new( - "\ - fn times_two(x: int) -> int {\nval y: int = x*2;\nreturn y;\n}", - ); - let tokens = lexer.collect_vec(); - - let expected_ast = Stmt::DefineFunction { - ident: ("times_two".to_string()), - args: (vec![FuncArgs { - name: ("x".to_string()), - typ: (Some("int".to_string())), - }]), - body: (vec![ - Stmt::DefineValue { - name: "y".to_string(), - value: (Expr::BinaryOp { - op: (BinaryOp::Mul), - lhs: (Box::new(Expr::Variable("x".to_string()))), - rhs: (Box::new(Expr::Literal(Literal::Integer(2)))), - }), - typ: Some("int".to_string()), - }, - Stmt::Return { - value: (Expr::Variable("y".to_string())), - }, - ]), - - return_type: Some("int".to_string()), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_statement_h() { - let lexer = Lexer::new("for i in 1 .. 3 {\nfor j in [1, 2, 3] {\nprint(j*i);}}"); - let tokens = lexer.collect_vec(); - - let expected_ast = Stmt::For { - name: ("i".to_string()), - iter: (Expr::BinaryOp { - op: (BinaryOp::Range), - lhs: (Box::new(Expr::Literal(Literal::Integer(1)))), - rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), - }), - body: (vec![Stmt::For { - name: ("j".to_string()), - iter: (Expr::Literal(Literal::List(vec![ - Expr::Literal(Literal::Integer(1)), - Expr::Literal(Literal::Integer(2)), - Expr::Literal(Literal::Integer(3)), - ]))), - body: (vec![Stmt::ExprStmt(Expr::Call { - ident: Box::new(Expr::Variable("print".to_string())), - args: (vec![Expr::BinaryOp { - op: (BinaryOp::Mul), - lhs: (Box::new(Expr::Variable("j".to_string()))), - rhs: (Box::new(Expr::Variable("i".to_string()))), - }]), - })]), - }]), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.statement(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } -} diff --git a/crates/sloth_asm/Cargo.toml b/crates/sloth_asm/Cargo.toml deleted file mode 100644 index b3ae934..0000000 --- a/crates/sloth_asm/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "sloth_asm" - -license.workspace = true -version.workspace = true -edition.workspace = true - -[dependencies] diff --git a/crates/sloth_asm/src/lib.rs b/crates/sloth_asm/src/lib.rs deleted file mode 100644 index 8b13789..0000000 --- a/crates/sloth_asm/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/crates/sloth_bytecode/Cargo.toml b/crates/sloth_bytecode/Cargo.toml deleted file mode 100644 index 0de211f..0000000 --- a/crates/sloth_bytecode/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "sloth_bytecode" - -license.workspace = true -version.workspace = true -edition.workspace = true - -[dependencies] -byteorder = "1.4.3" diff --git a/crates/sloth_bytecode/src/lib.rs b/crates/sloth_bytecode/src/lib.rs deleted file mode 100644 index c8152f6..0000000 --- a/crates/sloth_bytecode/src/lib.rs +++ /dev/null @@ -1,76 +0,0 @@ -#![allow(dead_code)] -#![warn( - clippy::wildcard_imports, - clippy::string_add, - clippy::string_add_assign, - clippy::manual_ok_or, - unused_lifetimes -)] - -pub enum Error { - UnknownOpcode(u8), - InvalidArguments, - Eof, -} - -macro_rules! opcodes { - ( $( $code:literal $name:ident $docs:literal ),* ) => { - #[repr(u8)] - #[derive(Debug, Clone, Copy, Eq, PartialEq)] - pub enum Opcode { - $( - #[doc = $docs] - $name = $code - ),* - } - - impl Opcode { - pub fn into_u8(self) -> u8 { - self as u8 - } - - pub fn from_u8(value: u8) -> Opcode { - match value { - $( $code => Self:: $name , )* - _ => panic!("Invalid opcode"), - } - } - } - }; -} - -opcodes! { - 0x00 Constant "Push a constant value onto the stack", - 0x01 Load "Load a value from a variable", - 0x02 Push "Push a value to a variable", - - 0x10 Dup "Duplicate a value on the stack", - 0x11 Pop "Pop a value from the stack", - - 0x12 GetGlobal "Get a global value", - 0x13 SetGlobal "Set a global value", - 0x14 GetLocal "Get a local value", - 0x15 SetLocal "Set a local value", - 0x16 Box "Box a value on the stack", - - 0x20 Add "Add the last 2 values on the stack", - 0x21 Sub "Subtract the last 2 values on the stack", - 0x22 Mul "Multiply the last 2 values on the stack", - 0x23 Div "Divide the last 2 values on the stack", - 0x24 Mod "Modulo the last 2 values on the stack", - - 0x30 Eq "Check if the last 2 values on the stack are equal", - 0x31 Ne "Check if the last 2 values on the stack are not equal", - - 0x40 Jump "Jump to a specific point in the program", - 0x41 JumpIf "Jump to a specific point in the program if true is on the stack", - - 0x50 Call "Call function on stack", - 0x51 CallNative "Call native function", - 0x52 Return "Return from function on stack", - - 0xE0 Halt "Halt the program", - - 0xF0 VMReturn "[DEBUG] Pop value from stack and return it fromthe program", - 0xF1 VMPrint "[DEBUG] Print value to console" -} diff --git a/crates/sloth_vm/Cargo.toml b/crates/sloth_vm/Cargo.toml deleted file mode 100644 index f484402..0000000 --- a/crates/sloth_vm/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "sloth_vm" - -license.workspace = true -version.workspace = true -edition.workspace = true - -[dependencies] -sloth_bytecode = { path = "../sloth_bytecode" } - -once_cell = "1.17.1" -rand = "0.8.5" diff --git a/crates/sloth_vm/src/lib.rs b/crates/sloth_vm/src/lib.rs deleted file mode 100644 index 9cf552b..0000000 --- a/crates/sloth_vm/src/lib.rs +++ /dev/null @@ -1,155 +0,0 @@ -#![allow(dead_code)] -#![warn( - clippy::wildcard_imports, - clippy::string_add, - clippy::string_add_assign, - clippy::manual_ok_or, - unused_lifetimes -)] - -pub mod native; -pub mod sloth_std; -pub mod value; -pub mod vm; - -use std::ops::{Index, IndexMut}; - -use value::{Object, ObjectType}; - -use crate::value::Primitive; -pub use crate::vm::VM; - -#[derive(Default)] -pub struct Chunk { - pub constants: Vec, - pub code: Vec, -} - -const STACK_SIZE: usize = 1024; - -#[derive(Debug)] -pub struct Stack { - stack: [Primitive; STACK_SIZE], - top: usize, -} - -impl Default for Stack { - fn default() -> Self { - Self { - top: Default::default(), - stack: [Primitive::Empty; STACK_SIZE], - } - } -} - -impl Stack { - #[inline(always)] - pub fn push(&mut self, value: Primitive) { - if self.top >= STACK_SIZE { - panic!("Stack overflow"); - } - - self.stack[self.top] = value; - self.top += 1; - } - - #[inline(always)] - pub fn pop(&mut self) -> Primitive { - if self.top == 0 { - panic!("Stack underflow"); - } - - self.top -= 1; - self.stack[self.top] - } - - #[inline(always)] - pub fn pop2(&mut self) -> (Primitive, Primitive) { - (self.pop(), self.pop()) - } - - #[inline(always)] - pub fn peek(&self) -> Primitive { - self.stack[self.top - 1] - } - - #[inline(always)] - pub fn peek_nth(&self, nth: usize) -> Primitive { - self.stack[self.top - 1 - nth] - } -} - -impl Index for Stack { - type Output = Primitive; - - fn index(&self, index: usize) -> &Self::Output { - &self.stack[index] - } -} - -impl IndexMut for Stack { - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - &mut self.stack[index] - } -} - -pub struct ObjectMap { - free: usize, - heap: Vec, -} - -impl Default for ObjectMap { - fn default() -> Self { - Self::with_capacity(32) - } -} - -impl From> for ObjectMap { - fn from(heap: Vec) -> Self { - let mut free = heap.len(); - for (idx, obj) in heap.iter().enumerate() { - if let ObjectType::Free { .. } = obj.typ { - free = idx; - break; - } - } - - Self { free, heap } - } -} - -impl ObjectMap { - pub fn with_capacity(capacity: usize) -> Self { - let mut heap = Vec::with_capacity(capacity); - for i in 0..capacity { - heap.push(Object::new(ObjectType::Free { next: i + 1 })); - } - - Self { free: 0, heap } - } - - pub fn allocate(&mut self, object: Object) -> usize { - let current = self.free; - if current >= self.heap.len() { - self.heap - .push(Object::new(ObjectType::Free { next: current + 1 })) - } - - let ObjectType::Free { next } = self.heap[current].typ else { - panic!("Allocation failed: Expected free location wasn't free"); - }; - - self.heap[current] = object; - self.free = next; - - current - } - - pub fn get(&self, idx: usize) -> Option<&Object> { - self.heap.get(idx) - } - - pub fn get_mut(&mut self, idx: usize) -> Option<&mut Object> { - self.heap.get_mut(idx) - } -} diff --git a/crates/sloth_vm/src/native.rs b/crates/sloth_vm/src/native.rs deleted file mode 100644 index fbd2626..0000000 --- a/crates/sloth_vm/src/native.rs +++ /dev/null @@ -1,19 +0,0 @@ -use crate::value::Primitive; -use crate::VM; - -pub type NativeFunctionResult = Result; -pub type NativeFunctionInput = fn(&mut VM, &[Primitive]) -> NativeFunctionResult; - -pub enum Error { - InvalidArgument, - Unknown(String), -} - -#[allow(clippy::type_complexity)] -pub struct NativeFunction { - pub name: &'static str, - pub function: NativeFunctionInput, - pub arity: u8, - pub returns_value: bool, - pub doc: Option<&'static str>, -} diff --git a/crates/sloth_vm/src/sloth_std/file.rs b/crates/sloth_vm/src/sloth_std/file.rs deleted file mode 100644 index b0b476a..0000000 --- a/crates/sloth_vm/src/sloth_std/file.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::fs; - -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::{Object, ObjectType, Primitive}; -use crate::VM; - -fn file_read(vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let Some(Primitive::Object(ptr)) = args.get(0).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let object = vm - .objects() - .get(ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::String(str) = &object.typ else { - return Err(native::Error::InvalidArgument); - }; - - let contents = fs::read_to_string(str).expect("IO Error: Failed to read file!"); - - let object = Object::new(ObjectType::String(contents)); - let ptr = vm.objects_mut().allocate(object); - - Ok(Primitive::Object(ptr as u32)) -} - -pub const FILE_READ: NativeFunction = NativeFunction { - name: "file$read", - function: file_read, - arity: 1, - returns_value: true, - doc: Some( - "NativeFunction file$read: \n\targs: path (str)\n\tdesc: Returns the contents of a file \ - at \n\tExample: `var todo = file$read('/home/sloth/todo.txt'); # Assuming the \ - contents of todo.txt are 'Take a nap' then todo = 'Take a nap'`", - ), -}; - -fn file_write(vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let Some(Primitive::Object(path_ptr)) = args.get(0).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let path_object = vm - .objects() - .get(path_ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::String(path) = &path_object.typ else { - return Err(native::Error::InvalidArgument); - }; - - let Some(Primitive::Object(content_ptr)) = args.get(1).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let content_object = vm - .objects() - .get(content_ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::String(content) = &content_object.typ else { - return Err(native::Error::InvalidArgument); - }; - - let _ = fs::write(path, content); - - Ok(Primitive::Empty) -} - -pub const FILE_WRITE: NativeFunction = NativeFunction { - name: "file$write", - function: file_write, - arity: 2, - returns_value: false, - doc: Some( - "NativeFunction file$write: \n\targs: path (str), content (str)\n\tdesc: Writes \ - to file at \n\tExample: `file$write('/home/sloth/todo.txt', 'Take a nap'); # \ - todo.txt now contains the string 'Take a nap'`", - ), -}; diff --git a/crates/sloth_vm/src/sloth_std/misc.rs b/crates/sloth_vm/src/sloth_std/misc.rs deleted file mode 100644 index ca08d1d..0000000 --- a/crates/sloth_vm/src/sloth_std/misc.rs +++ /dev/null @@ -1,39 +0,0 @@ -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::{Object, ObjectType, Primitive}; -use crate::VM; - -fn get_doc(vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let Some(Primitive::Object(ptr)) = args.get(0).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let object = vm - .objects() - .get(ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::NativeFunction(fnc) = &object.typ else { - return Err(native::Error::InvalidArgument); - }; - - let docs = fnc - .doc - .expect("Oopsie Poopsie the stringy no worky") - .to_string(); - let object = Object::new(ObjectType::String(docs)); - let ptr = vm.objects_mut().allocate(object); - - Ok(Primitive::Object(ptr as u32)) -} - -pub const DOCS: NativeFunction = NativeFunction { - name: "docs", - function: get_doc, - arity: 1, - returns_value: true, - doc: Some( - "NativeFunction docs: \n\targs: name (str)\n\tdesc: Returns documentaiton on a function \ - with name \n\tExample: `var doc = docs('wait'); # Returns the documentation of the \ - 'wait' function to doc`", - ), -}; diff --git a/crates/sloth_vm/src/sloth_std/mod.rs b/crates/sloth_vm/src/sloth_std/mod.rs deleted file mode 100644 index ff761a6..0000000 --- a/crates/sloth_vm/src/sloth_std/mod.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::collections::HashMap; - -use once_cell::sync::Lazy; - -use crate::native::NativeFunction; - -pub mod file; -pub mod misc; -pub mod rand; -pub mod stdio; -pub mod term; -pub mod time; - -pub static NATIVE_LIBRARY: Lazy> = Lazy::new(|| { - let mut map = HashMap::new(); - - // rand - map.insert("rand$gen", rand::GEN_FUNCTION); - map.insert("rand$gen_range", rand::GEN_RANGE_FUNCTION); - - // stdio - map.insert("write", stdio::WRITE_FUNCTION); - map.insert("writeln", stdio::WRITELN_FUNCTION); - map.insert("read", stdio::READ_FUNCTION); - - // term - map.insert("term$clear", term::TERM_CLEAR); - map.insert("term$setpos", term::TERM_SETPOS); - - // filesystem - // TODO: Make the files commands work by making a global file variable with - // certain permissions created by 'file.open' instead of just reading the file. - map.insert("file$read", file::FILE_READ); - map.insert("file$write", file::FILE_WRITE); - - // time - map.insert("wait", time::WAIT); - - // doc - map.insert("docs", misc::DOCS); - - map -}); diff --git a/crates/sloth_vm/src/sloth_std/rand.rs b/crates/sloth_vm/src/sloth_std/rand.rs deleted file mode 100644 index 870cca1..0000000 --- a/crates/sloth_vm/src/sloth_std/rand.rs +++ /dev/null @@ -1,48 +0,0 @@ -use rand::Rng; - -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::Primitive; -use crate::value::Primitive::{Float, Integer}; -use crate::VM; - -fn gen(_vm: &mut VM, _args: &[Primitive]) -> NativeFunctionResult { - let value = rand::thread_rng().gen_range(0.0..1.0); - - Ok(Float(value)) -} - -pub const GEN_FUNCTION: NativeFunction = NativeFunction { - name: "rand$gen", - function: gen, - arity: 0, - returns_value: true, - doc: Some( - "NativeFunction rand$gen:\n\tdesc: Returns a random number in the range `0.0 .. \ - 1.0`\n\tExample: `var num = rand$gen(); # num could be any number from 0.0 to 1.0`", - ), -}; - -fn gen_range(_vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let min = args.get(0).cloned(); - let max = args.get(1).cloned(); - - let (Some(Integer(min)), Some(Integer(max))) = (min, max) else { - return Err(native::Error::InvalidArgument); - }; - - let value = rand::thread_rng().gen_range(min..max); - - Ok(Integer(value)) -} - -pub const GEN_RANGE_FUNCTION: NativeFunction = NativeFunction { - name: "rand$gen_range", - function: gen_range, - arity: 2, - returns_value: true, - doc: Some( - "NativeFunction rand$gen_range: \n\targs: min (int), max (int)\n\tdesc: Returns a random \ - numnber in the range .. \n\tExample: `var num = rand$gen_range(20, 76); # num \ - could be any number from 20 to 76`", - ), -}; diff --git a/crates/sloth_vm/src/sloth_std/stdio.rs b/crates/sloth_vm/src/sloth_std/stdio.rs deleted file mode 100644 index f56b604..0000000 --- a/crates/sloth_vm/src/sloth_std/stdio.rs +++ /dev/null @@ -1,91 +0,0 @@ -use std::io::{stdin, BufRead}; - -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::{Object, ObjectType, Primitive}; -use crate::VM; - -fn write(vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let Some(Primitive::Object(ptr)) = args.get(0).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let object = vm - .objects() - .get(ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::String(str) = &object.typ else { - return Err(native::Error::InvalidArgument); - }; - - print!("{str}"); - - Ok(Primitive::Empty) -} - -pub const WRITE_FUNCTION: NativeFunction = NativeFunction { - name: "write", - function: write, - arity: 1, - returns_value: false, - doc: Some( - "NativeFunction write: \n\targs: string (str)\n\tdesc: Writes to the \ - terminal.\n\tExample: `write(\"I'm sleepy...\"); # Output: I'm sleepy...`", - ), -}; - -fn writeln(vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let Some(Primitive::Object(ptr)) = args.get(0).cloned() else { - return Err(native::Error::InvalidArgument); - }; - - let object = vm - .objects() - .get(ptr as usize) - .ok_or(native::Error::InvalidArgument)?; - - let ObjectType::String(str) = &object.typ else { - return Err(native::Error::InvalidArgument); - }; - - println!("{str}"); - - Ok(Primitive::Empty) -} - -pub const WRITELN_FUNCTION: NativeFunction = NativeFunction { - name: "writeln", - function: writeln, - arity: 1, - returns_value: false, - doc: Some( - "NativeFunction writeln: \n\targs: string (str)\n\tdesc: Writes to the terminal \ - and starts a new line.\n\tExample: `writeln(\"I'm sleepy...\"); # Output: I'm \ - sleepy...\n # This is a new line`", - ), -}; - -fn read(vm: &mut VM, _args: &[Primitive]) -> NativeFunctionResult { - let mut line = String::new(); - stdin() - .lock() - .read_line(&mut line) - .map_err(|it| native::Error::Unknown(it.to_string()))?; - - let object = Object::new(ObjectType::String(line)); - let ptr = vm.objects_mut().allocate(object); - - Ok(Primitive::Object(ptr as u32)) -} - -pub const READ_FUNCTION: NativeFunction = NativeFunction { - name: "read", - function: read, - arity: 0, - returns_value: true, - doc: Some( - "NativeFunction read:\n\tdesc: Reads input from the terminal and returns what was \ - read.\n\tExample: `var input = read(); # Hello World input = 'Hello \ - World'`", - ), -}; diff --git a/crates/sloth_vm/src/sloth_std/term.rs b/crates/sloth_vm/src/sloth_std/term.rs deleted file mode 100644 index f61321c..0000000 --- a/crates/sloth_vm/src/sloth_std/term.rs +++ /dev/null @@ -1,41 +0,0 @@ -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::Primitive; -use crate::value::Primitive::Integer; -use crate::VM; - -pub const TERM_CLEAR: NativeFunction = NativeFunction { - name: "term$clear", - function: |_vm, _args| { - print!("\x1b[2J\x1b[H"); - Ok(Primitive::Empty) - }, - arity: 0, - returns_value: false, - doc: Some( - "NativeFunction term$clear: \n\tdesc: Clears the terminal\n\tExample: `term$clear(); # \ - Clears the terminal`", - ), -}; - -fn term_setpos(_vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let x = args.get(0).cloned(); - let y = args.get(1).cloned(); - - let (Some(Integer(x)), Some(Integer(y))) = (x, y) else { - return Err(native::Error::InvalidArgument); - }; - print!("\x1b[{x};{y}H"); - Ok(Primitive::Empty) -} - -pub const TERM_SETPOS: NativeFunction = NativeFunction { - name: "term$setpos", - function: term_setpos, - arity: 2, - returns_value: false, - doc: Some( - "NativeFunction term$setpos: \n\targs: x (int), y (int)\n\tdesc: Sets the cursors \ - position to (, )\n\tExample: `term$setpos(5, 17); # Sets the position of the \ - cursor to (5, 17)`", - ), -}; diff --git a/crates/sloth_vm/src/sloth_std/time.rs b/crates/sloth_vm/src/sloth_std/time.rs deleted file mode 100644 index b27e0b5..0000000 --- a/crates/sloth_vm/src/sloth_std/time.rs +++ /dev/null @@ -1,29 +0,0 @@ -use std::{thread, time}; - -use crate::native::{self, NativeFunction, NativeFunctionResult}; -use crate::value::Primitive; -use crate::value::Primitive::Integer; -use crate::VM; - -fn wait(_vm: &mut VM, args: &[Primitive]) -> NativeFunctionResult { - let sec = args.get(0).cloned(); - - let Some(Integer(sec)) = sec else { - return Err(native::Error::InvalidArgument); - }; - - thread::sleep(time::Duration::from_secs(sec.try_into().unwrap())); - - Ok(Primitive::Empty) -} - -pub const WAIT: NativeFunction = NativeFunction { - name: "wait", - function: wait, - arity: 1, - returns_value: false, - doc: Some( - "NativeFunction wait: \n\targs: sec (int)\n\tdesc: Waits for seconds.\n\tExample: \ - `wait(10); # Waits 10 seconds`", - ), -}; diff --git a/crates/sloth_vm/src/value.rs b/crates/sloth_vm/src/value.rs deleted file mode 100644 index 4450b5a..0000000 --- a/crates/sloth_vm/src/value.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::native::NativeFunction; -use crate::Chunk; - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Primitive { - Integer(i128), - Float(f64), - Bool(bool), - /// Pointer to an object living on heap - Object(u32), - Empty, -} - -pub struct Object { - /// If the object has been marked by the VM or not - pub(crate) marked: bool, - pub(crate) typ: ObjectType, -} - -impl Object { - pub fn new(typ: ObjectType) -> Self { - Self { marked: false, typ } - } -} - -pub enum ObjectType { - Box(Primitive), - String(String), - List(Vec), - - Function(Function), - NativeFunction(NativeFunction), - - Free { next: usize }, -} - -pub struct Function { - pub name: Option, - pub chunk: Chunk, - pub arity: u8, - pub returns_value: bool, -} - -impl Function { - pub fn root(chunk: Chunk) -> Self { - Self { - name: None, - chunk, - arity: 0, - returns_value: false, - } - } -} diff --git a/crates/sloth_vm/src/vm.rs b/crates/sloth_vm/src/vm.rs deleted file mode 100644 index 3600719..0000000 --- a/crates/sloth_vm/src/vm.rs +++ /dev/null @@ -1,610 +0,0 @@ -use std::mem::MaybeUninit; - -use sloth_bytecode::Opcode; - -use crate::value::{Function, Object, ObjectType, Primitive}; -use crate::{native, vm, ObjectMap, Stack}; - -#[derive(Clone, Copy)] -pub struct CallFrame { - pointer: usize, - stack_offset: usize, - function: *const Function, // TODO: Safety -} - -impl CallFrame { - fn new(stack_offset: usize, function: &Function) -> Self { - Self { - pointer: 0, - stack_offset, - function: function as *const _, - } - } - - #[inline] - fn function(&self) -> &Function { - unsafe { &*self.function } - } -} - -const CALL_STACK_SIZE: usize = 1024; - -pub struct CallStack { - top: usize, - frames: [MaybeUninit; CALL_STACK_SIZE], -} - -impl Default for CallStack { - fn default() -> Self { - Self { - top: 0, - frames: [MaybeUninit::uninit(); CALL_STACK_SIZE], - } - } -} - -impl CallStack { - fn push(&mut self, frame: CallFrame) { - self.frames[self.top].write(frame); - self.top += 1; - } - - fn pop(&mut self) { - self.top -= 1; - } - - fn peek(&self) -> &CallFrame { - unsafe { self.frames[self.top - 1].assume_init_ref() } - } - - fn peek_mut(&mut self) -> &mut CallFrame { - unsafe { self.frames[self.top - 1].assume_init_mut() } - } -} - -// TODO: Fix visibility -pub struct VM { - pub stack: Stack, - call_stack: CallStack, - pub objects: ObjectMap, -} - -impl Default for VM { - fn default() -> Self { - Self::init(ObjectMap::default()) - } -} - -impl VM { - pub fn init(objects: ObjectMap) -> Self { - Self { - stack: Stack::default(), - call_stack: CallStack::default(), - objects, - } - } - - pub fn new(objects: ObjectMap, mut root: Function) -> Self { - let mut this = Self::init(objects); - - // Allocating the root function - root.chunk.code.push(Opcode::Halt as u8); - this.call_stack.push(CallFrame::new(0, &root)); - this.objects - .allocate(Object::new(ObjectType::Function(root))); - - this - } - - pub fn step(&mut self) -> bool { - use Primitive::*; - - let opcode = self.read_u8(); - - match Opcode::from_u8(opcode) { - Opcode::Constant => { - let idx = self.read_u16() as usize; - let value = self.call_stack.peek().function().chunk.constants[idx]; - - self.stack.push(value); - } - - Opcode::Dup => { - let value = self.stack.pop(); - self.stack.push(value); - self.stack.push(value); - } - Opcode::Pop => { - self.stack.pop(); - } - Opcode::GetLocal => { - let idx = self.read_u16() as usize; - let value = self.stack[self.call_stack.peek().stack_offset + idx]; - - self.stack.push(value); - } - Opcode::SetLocal => { - let idx = self.read_u16() as usize; - let value = self.stack.pop(); - - self.stack[self.call_stack.peek().stack_offset + idx] = value; - } - Opcode::Box => { - // FIXME: TODO: MEGA CURSED - let pos = self.read_u16() as usize; - let value = self.stack.pop(); - - let object = vm::Object::new(ObjectType::Box(value)); - - self.objects.heap[pos] = object; - self.stack.push(Object(pos as u32)); - } - - Opcode::Add => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Integer(lhs + rhs), - (Float(lhs), Float(rhs)) => Float(lhs + rhs), - _ => panic!(), - }; - - self.stack.push(value); - } - Opcode::Sub => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Integer(lhs - rhs), - (Float(lhs), Float(rhs)) => Float(lhs - rhs), - _ => panic!(), - }; - - self.stack.push(value); - } - Opcode::Mul => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Integer(lhs * rhs), - (Float(lhs), Float(rhs)) => Float(lhs * rhs), - _ => panic!(), - }; - - self.stack.push(value); - } - Opcode::Div => { - let value = match self.stack.pop2() { - (Integer(_), Integer(0)) => panic!("Divide by 0"), - (Integer(lhs), Integer(rhs)) => Integer(lhs / rhs), - (Float(lhs), Float(rhs)) => Float(lhs / rhs), - _ => panic!(), - }; - - self.stack.push(value); - } - Opcode::Mod => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Integer(lhs % rhs), - (Float(lhs), Float(rhs)) => Float(lhs % rhs), - _ => panic!(), - }; - - self.stack.push(value); - } - - Opcode::Eq => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Bool(lhs == rhs), - (Float(lhs), Float(rhs)) => Bool(lhs == rhs), - (Bool(lhs), Bool(rhs)) => Bool(lhs == rhs), - (Object(lhs), Object(rhs)) => Bool(lhs == rhs), - (Empty, Empty) => Bool(true), - _ => Bool(false), - }; - - self.stack.push(value); - } - Opcode::Ne => { - let value = match self.stack.pop2() { - (Integer(lhs), Integer(rhs)) => Bool(lhs != rhs), - (Float(lhs), Float(rhs)) => Bool(lhs != rhs), - (Bool(lhs), Bool(rhs)) => Bool(lhs != rhs), - (Object(lhs), Object(rhs)) => Bool(lhs != rhs), - (Empty, Empty) => Bool(false), - _ => Bool(false), - }; - - self.stack.push(value); - } - - Opcode::Jump => { - let to = self.read_u16(); - self.call_stack.peek_mut().pointer = to as usize; - } - Opcode::JumpIf => { - let to = self.read_u16(); - let value = self.stack.pop(); - - if let Bool(true) = value { - self.call_stack.peek_mut().pointer = to as usize; - } - } - - Opcode::Call => { - let Primitive::Object(ptr) = self.stack.pop() else { - panic!("Last element on stack was not an object"); - }; - - self.call(ptr as usize); - } - - Opcode::Return => { - self.call_return(); - } - - Opcode::Halt => return false, - - opcode => unimplemented!("Opcode {:?} unimplemented", opcode), - } - - true - } - - pub fn run(&mut self) { - while self.step() {} - } - - pub fn call(&mut self, ptr: usize) { - let Some(obj) = self.objects.get(ptr) else { - panic!("Pointer referenced nothing"); - }; - - match &obj.typ { - ObjectType::Function(function) => { - // Add a callstack entry for the function - let offset = self.stack.top - (function.arity as usize); - self.call_stack.push(CallFrame::new(offset, function)); - } - ObjectType::NativeFunction(function) => { - let mut args = Vec::with_capacity(function.arity as usize); - for _ in 0..function.arity { - args.push(self.stack.pop()); - } - - let name = function.name; - let returns_value = function.returns_value; - - let internal = function.function; - let result = internal(self, &args); - - match result { - Ok(value) => { - if returns_value { - self.stack.push(value); - } - } - Err(error) => match error { - native::Error::InvalidArgument => { - panic!("Invalid argument provided to '{name}'"); - } - native::Error::Unknown(msg) => { - panic!("Native function '{name}' failed due to '{msg}'"); - } - }, - } - } - _ => panic!("Object was not a function"), - } - } - - fn call_return(&mut self) { - let function = self.call_stack.peek().function(); - let stack_offset = self.call_stack.peek().stack_offset; - - let return_value = if function.returns_value { - Some(self.stack.pop()) - } else { - None - }; - - self.stack.top = stack_offset; - - if let Some(return_value) = return_value { - self.stack.push(return_value); - } - - self.call_stack.pop(); - } - - fn unwind(&mut self) { - unimplemented!("Implement unwinding for error handling"); - } - - #[inline(always)] - fn read_u8(&mut self) -> u8 { - let frame = self.call_stack.peek_mut(); - let function = frame.function(); - let byte = function.chunk.code[frame.pointer]; - frame.pointer += 1; - byte - } - - #[inline(always)] - fn read_u16(&mut self) -> u16 { - let frame = self.call_stack.peek_mut(); - let chunk = &frame.function().chunk; - - let bytes = (chunk.code[frame.pointer], chunk.code[frame.pointer + 1]); - - frame.pointer += 2; - - ((bytes.0 as u16) << 8) + (bytes.1 as u16) - } - - #[inline(always)] - pub fn objects(&self) -> &ObjectMap { - &self.objects - } - - #[inline(always)] - pub fn objects_mut(&mut self) -> &mut ObjectMap { - &mut self.objects - } -} - -#[cfg(test)] -mod tests { - use crate::value::{Function, Object, ObjectType, Primitive}; - use crate::{sloth_std, Chunk, ObjectMap, VM}; - - #[test] - fn arithmetic_ops() { - // Addition - let mut vm = VM::new( - ObjectMap::default(), - Function::root(Chunk { - constants: vec![Primitive::Integer(7)], - code: vec![ - 0x00, 0, 0, // Load constant from 0 - 0x10, // Duplicate - 0x20, // Add - 0xE0, - ], - }), - ); - - vm.run(); - assert_eq!(vm.stack.peek(), Primitive::Integer(14)); - - let mut vm = VM::new( - ObjectMap::default(), - Function::root(Chunk { - constants: vec![Primitive::Integer(2), Primitive::Integer(11)], - code: vec![ - 0x00, 0, 0, // Load constant from 0 - 0x00, 0, 1, // Load constant from 1 - 0x20, // Add - 0xE0, - ], - }), - ); - - vm.run(); - assert_eq!(vm.stack.peek(), Primitive::Integer(13)); - } - - #[test] - fn basic_function() { - let mut vm = VM::new( - ObjectMap::from(vec![Object::new(ObjectType::Function(Function { - name: Some("add".to_string()), - chunk: Chunk { - constants: vec![], - code: vec![0x14, 0, 0, 0x14, 0, 1, 0x20, 0x52], - }, - arity: 2, - returns_value: true, - }))]), - Function::root(Chunk { - constants: vec![ - Primitive::Integer(6), - Primitive::Integer(3), - Primitive::Object(0), - Primitive::Object(1), - Primitive::Object(2), - ], - code: vec![ - 0x00, 0, 0, // Load first function parameter from 0 - 0x00, 0, 1, // Load second function parameter from 1 - 0x00, 0, 2, // Load function constant from 2 - 0x50, // Call function - ], - }), - ); - - vm.run(); - - assert_eq!(vm.stack.peek(), Primitive::Integer(9)); - } - - #[test] - fn native_function() { - let mut vm = VM::new( - ObjectMap::from(vec![ - Object::new(ObjectType::NativeFunction(sloth_std::rand::GEN_FUNCTION)), - Object::new(ObjectType::NativeFunction( - sloth_std::rand::GEN_RANGE_FUNCTION, - )), - ]), - Function::root(Chunk { - constants: vec![ - Primitive::Object(0), - Primitive::Object(1), - Primitive::Integer(5), - Primitive::Integer(10), - ], - code: vec![ - // First part - 0x00, 0, 0, // - 0x50, // - 0xE0, // - // Second part - 0x00, 0, 3, // - 0x00, 0, 2, // - 0x00, 0, 1, // - 0x50, // - ], - }), - ); - - vm.run(); - - assert!({ - let Primitive::Float(i) = vm.stack.peek() else { panic!(); }; - (0.0..=1.0).contains(&i) - }); - - vm.run(); - - assert!({ - let Primitive::Integer(i) = vm.stack.peek() else { panic!(); }; - (5..10).contains(&i) - }); - } - - #[test] - fn fibonacci() { - #[rustfmt::skip] - let mut vm = VM::new( - ObjectMap::default(), - Function::root(Chunk { - constants: vec![ - Primitive::Integer(0), - Primitive::Integer(1), - Primitive::Integer(10), - ], - code: vec![ - // Load variables - 0x00, 0, 0, // 0 Index - 0x00, 0, 0, // 3 Me - 0x00, 0, 0, // 6 Parent - 0x00, 0, 1, // 9 Grandparent - - // Load parent and grandparent, sum them and put the value in me - 0x14, 0, 2, // 12 - 0x14, 0, 3, // 15 - 0x20, // 16 - 0x15, 0, 1, // 19 - - // Set grandparent to parent - 0x14, 0, 2, // 22 - 0x15, 0, 3, // 25 - - // Set parent to me - 0x14, 0, 1, // 28 - 0x15, 0, 2, // 31 - - // Increment Index by 1 - 0x00, 0, 1, // 34 - 0x14, 0, 0, // 37 Index - 0x20, // 40 - 0x15, 0, 0, // 41 Index - - // Load me - 0x14, 0, 1, // 44 - 0xE0, // 47 - 0x11, // 48 - - // Repeat until Index is 9 - 0x00, 0, 2, // 49 - 0x14, 0, 0, // 52 Index - 0x31, // 55 - 0x41, 0, 12, // 56 - ], - }), - ); - - let mut values = Vec::new(); - for _ in 0..10 { - vm.run(); - values.push(vm.stack.peek()); - } - - assert_eq!(&values, &[ - Primitive::Integer(1), - Primitive::Integer(1), - Primitive::Integer(2), - Primitive::Integer(3), - Primitive::Integer(5), - Primitive::Integer(8), - Primitive::Integer(13), - Primitive::Integer(21), - Primitive::Integer(34), - Primitive::Integer(55), - ]); - } - - #[test] - fn fibonacci_recursive() { - #[rustfmt::skip] - let mut vm = VM::new( - ObjectMap::from(vec![Object::new(ObjectType::Function(Function { - name: Some("fib".to_owned()), - chunk: Chunk { - constants: vec![ - Primitive::Object(0), - Primitive::Integer(0), - Primitive::Integer(1), - Primitive::Integer(2), - ], - code: vec![ - 0x14, 0, 0, // 0 - 0x00, 0, 1, // 3 - 0x31, // 6 - 0x41, 0, 14, // 7 - 0x00, 0, 1, // 10 - 0x52, // 13 - - 0x14, 0, 0, // 14 - 0x00, 0, 2, // 17 - 0x31, // 20 - 0x41, 0, 28, // 21 - 0x00, 0, 2, // 24 - 0x52, // 27 - - // fib(n - 1) - 0x00, 0, 2, // 28 - 0x14, 0, 0, // 31 - 0x21, // 34 - 0x00, 0, 0, // 35 - 0x50, // 38 - - // fib(n - 2) - 0x00, 0, 3, // 39 - 0x14, 0, 0, // 42 - 0x21, // 45 - 0x00, 0, 0, // 46 - 0x50, // 49 - - // add & return - 0x20, // 50 - 0x52, // 51 - ], - }, - arity: 1, - returns_value: true, - }))]), - Function::root(Chunk { - constants: vec![ - Primitive::Object(0), - Primitive::Integer(10), - ], - code: vec![ - // Load n and the function and call it - 0x00, 0, 1, // 0 - 0x00, 0, 0, // 3 - 0x50, // 6 - ], - }), - ); - - vm.run(); - - assert_eq!(Primitive::Integer(55), vm.stack.peek()); - } -} diff --git a/flake.nix b/flake.nix index 4223f11..64485bd 100644 --- a/flake.nix +++ b/flake.nix @@ -56,6 +56,16 @@ cargo-watch cargo-deny cargo-release + + pkg-config + + # Packages required for LLVM + llvmPackages_15.libllvm + libffi + libxml2 + + # C compiler for debugging + clang ]; }; } diff --git a/sloth/Cargo.toml b/sloth/Cargo.toml new file mode 100644 index 0000000..4fabdb7 --- /dev/null +++ b/sloth/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "sloth" + +license.workspace = true +version.workspace = true +edition.workspace = true + +[dependencies] +inkwell = { version = "0.2.0", features = ["llvm15-0"] } +itertools = "0.10.5" +thiserror = "1.0.40" diff --git a/sloth/src/compiler/mod.rs b/sloth/src/compiler/mod.rs new file mode 100644 index 0000000..87c0618 --- /dev/null +++ b/sloth/src/compiler/mod.rs @@ -0,0 +1,131 @@ +#![allow(unused)] + +use std::collections::HashMap; +use std::path::Path; +use std::vec; + +use inkwell::builder::Builder; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::targets::{ + CodeModel, FileType, InitializationConfig, RelocMode, Target, TargetMachine, +}; +use inkwell::values::IntValue; +use inkwell::OptimizationLevel; + +use crate::parser::ast::{BinaryOp, Expr, FuncArgs, Literal, Stmt, UnaryOp}; + +pub struct Compiler<'ctx> { + context: &'ctx Context, + builder: Builder<'ctx>, + module: Module<'ctx>, +} + +impl<'ctx> Compiler<'ctx> { + pub fn new(context: &'ctx Context) -> Self { + let builder = context.create_builder(); + let module = context.create_module("sloth"); + + Self { + context, + builder, + module, + } + } + + pub fn compile(&self, src: Vec) { + for stmt in src { + match stmt { + Stmt::DefineFunction { + ident, + args, + body, + return_type, + } => { + self.compile_function(&ident, &args, return_type.is_some(), body); + } + _ => panic!("You may only define a function top level"), + } + } + + Target::initialize_native(&InitializationConfig::default()).unwrap(); + + let triple = TargetMachine::get_default_triple(); + let target = Target::from_triple(&triple).unwrap(); + let machine = target + .create_target_machine( + &triple, + "x86-64", + "", + OptimizationLevel::None, + RelocMode::Default, + CodeModel::Default, + ) + .unwrap(); + + self.module.set_triple(&triple); + machine + .write_to_file(&self.module, FileType::Object, Path::new("output.o")) + .unwrap(); + } + + fn compile_function(&self, identifier: &str, args: &[FuncArgs], returns: bool, src: Vec) { + let void_type = self.context.void_type(); + let i64_type = self.context.i64_type(); + + let function_type = if returns { + i64_type.fn_type(&vec![i64_type.into(); args.len()], false) + } else { + void_type.fn_type(&vec![i64_type.into(); args.len()], false) + }; + let function = self.module.add_function(identifier, function_type, None); + + let basic_block = self.context.append_basic_block(function, "body"); + + self.builder.position_at_end(basic_block); + + let mut arg_values = HashMap::::new(); + for (i, arg) in args.iter().enumerate() { + arg_values.insert( + arg.name.clone(), + function.get_nth_param(i as u32).unwrap().into_int_value(), + ); + } + + for stmt in src { + match stmt { + Stmt::Return { value } => match value { + Expr::BinaryOp { op, lhs, rhs } => { + let lhs = match *lhs { + Expr::Variable(a) => arg_values[&a], + _ => unimplemented!(), + }; + + let rhs = match *rhs { + Expr::Variable(a) => arg_values[&a], + _ => unimplemented!(), + }; + + let res = match op { + BinaryOp::Add => self.builder.build_int_add(lhs, rhs, "addop"), + BinaryOp::Sub => self.builder.build_int_sub(lhs, rhs, "subop"), + _ => unimplemented!(), + }; + + self.builder.build_return(Some(&res)); + return; + } + Expr::Variable(name) => { + let var = arg_values[&name]; + self.builder.build_return(Some(&var)); + return; + } + _ => unimplemented!(), + }, + _ => unimplemented!(), + } + } + + self.builder.build_return(None); + } +} diff --git a/sloth/src/lexer.rs b/sloth/src/lexer.rs new file mode 100644 index 0000000..0afaf1c --- /dev/null +++ b/sloth/src/lexer.rs @@ -0,0 +1,559 @@ +#![allow(dead_code)] + +//! TODO: Lexing Regex Literals + +use std::str::Chars; + +use thiserror::Error; + +#[derive(Debug, Clone, PartialEq, Error)] +pub enum LexerError { + #[error("Unexpected token")] + UnexpectedToken, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType { + // Meta + DocComment, + Comment, + + // Brackets + OpeningParen, // ( + ClosingParen, // ) + OpeningBracket, // [ + ClosingBracket, // ] + OpeningBrace, // { + ClosingBrace, // } + + // Operators + Plus, // + + PlusPlus, // ++ + Minus, // - + Star, // * + StarStar, // ** + Slash, // / + Perc, // % + Tilde, // ~ + + PlusEq, // += + PlusPlusEq, // ++= + MinusEq, // -= + StarEq, // *= + StarStarEq, // **= + SlashEq, // /= + PercEq, // %= + TildeEq, // ~= + + Amp, // & + AmpAmp, // && + Pipe, // | + PipePipe, // || + Caret, // ^ + + Eq, // = + EqEq, // == + Bang, // ! + BangBang, // !! + BangEq, // != + + Lt, // < + LtLt, // << + LtEq, // <= + LtLtEq, // <<= + Gt, // > + GtGt, // >> + GtEq, // >= + GtGtEq, // >>= + + Comma, + + Question, // ? + QuestionDot, // ?. + QuestionQuestion, // ?? + Dot, // . + DotDot, // .. + + Colon, // : + ColonColon, // :: + SemiColon, // ; + + Arrow, // -> + FatArrow, // => + + // Keywords + Val, + Var, + + Fn, + Return, + + If, + Else, + + While, + For, + In, + + Loop, + Break, + Continue, + + As, + + // Literals + Integer(i128), + Float(f64), + Boolean(bool), + Character(char), + String(String), + Regex(String), + + Identifier(String), + + // Utility + Error(LexerError), +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct Location { + index: usize, + pub row: u32, + pub col: u32, +} + +impl Location { + fn advance(&mut self, len: usize, newline: bool) { + if newline { + self.row += 1; + self.col = 0; + } else { + self.col += 1; + } + self.index += len; + } +} + +#[derive(Debug)] +pub struct Token<'a> { + pub tt: TokenType, + pub lexeme: &'a str, + + start: Location, + end: Location, +} + +pub struct Lexer<'a> { + source: &'a [u8], + window: [char; 3], + chars: Chars<'a>, + + start: Location, + current: Location, + + // Keep track if the lexer has encountered an error to stop lexing asap + errored: bool, +} + +impl<'a> Lexer<'a> { + pub(crate) fn new(source: &'a str) -> Self { + let mut chars = source.chars(); + let window = [ + chars.next().unwrap_or('\0'), + chars.next().unwrap_or('\0'), + chars.next().unwrap_or('\0'), + ]; + + Self { + source: source.as_bytes(), + window, + chars, + start: Default::default(), + current: Default::default(), + errored: false, + } + } +} + +impl<'a> Lexer<'a> { + fn pos(&self) -> usize { + self.current.index + } + + fn peek(&self) -> char { + self.window[0] + } + + fn eof(&self) -> bool { + self.peek() == '\0' + } + + fn advance(&mut self) -> char { + let current = self.window[0]; + self.window = [ + self.window[1], + self.window[2], + self.chars.next().unwrap_or('\0'), + ]; + self.current.advance(current.len_utf8(), current == '\n'); + current + } + + fn advance_with(&mut self, with: TokenType) -> TokenType { + self.advance(); + with + } + + fn advance_by(&mut self, amount: usize) { + for _ in 0..amount { + self.advance(); + } + } + + fn advance_by_with(&mut self, amount: usize, with: TokenType) -> TokenType { + self.advance_by(amount); + with + } + + fn advance_while(&mut self, predicate: impl Fn([char; 3]) -> bool) { + while !self.eof() && predicate(self.window) { + self.advance(); + } + } +} + +impl<'a> Lexer<'a> { + fn lex_number(&mut self) -> TokenType { + let mut value = self.advance().to_string(); + + while self.peek().is_ascii_digit() { + value.push(self.advance()); + } + + if self.peek() == '.' { + value.push(self.advance()); + + while self.peek().is_ascii_digit() { + value.push(self.advance()); + } + + TokenType::Float(value.parse::().expect("Expected float")) + } else { + TokenType::Integer(value.parse::().expect("Expected integer")) + } + } + + fn lex_string(&mut self) -> TokenType { + let mut value = String::new(); + + self.advance(); + loop { + match self.window { + ['\\', '"', ..] => { + self.advance_by(2); + value.push('"'); + } + ['\\', 't', ..] => { + self.advance_by(2); + value.push('\t'); + } + ['\\', 'n', ..] => { + self.advance_by(2); + value.push('\n'); + } + ['"', ..] => { + self.advance(); + break; + } + _ => { + value.push(self.advance()); + continue; + } + } + } + + TokenType::String(value) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token<'a>; + + fn next(&mut self) -> Option { + // Skipping whitespace + self.advance_while(|it| it[0].is_whitespace()); + self.start = self.current; + + // If were at the end of the file or an error has occurred return nothing + if self.eof() || self.errored { + return None; + } + + // Figuring out the token type + let tt = match self.window { + ['#', '#', ..] => { + self.advance_while(|it| it[0] != '\n'); + // TODO: TokenType::DocComment + return self.next(); + } + + ['#', ..] => { + self.advance_while(|it| it[0] != '\n'); + // TODO: okenType::Comment + return self.next(); + } + + // Blocks + ['(', ..] => self.advance_with(TokenType::OpeningParen), + [')', ..] => self.advance_with(TokenType::ClosingParen), + ['[', ..] => self.advance_with(TokenType::OpeningBracket), + [']', ..] => self.advance_with(TokenType::ClosingBracket), + ['{', ..] => self.advance_with(TokenType::OpeningBrace), + ['}', ..] => self.advance_with(TokenType::ClosingBrace), + + // Operators + ['-', '>', ..] => self.advance_by_with(2, TokenType::Arrow), + ['=', '>', ..] => self.advance_by_with(2, TokenType::FatArrow), + + ['+', '+', '='] => self.advance_by_with(3, TokenType::PlusPlusEq), + ['*', '*', '='] => self.advance_by_with(3, TokenType::StarStarEq), + ['+', '+', ..] => self.advance_by_with(2, TokenType::PlusPlus), + ['*', '*', ..] => self.advance_by_with(2, TokenType::StarStar), + + ['+', '=', ..] => self.advance_by_with(2, TokenType::PlusEq), + ['-', '=', ..] => self.advance_by_with(2, TokenType::MinusEq), + ['*', '=', ..] => self.advance_by_with(2, TokenType::StarEq), + ['/', '=', ..] => self.advance_by_with(2, TokenType::SlashEq), + ['%', '=', ..] => self.advance_by_with(2, TokenType::PercEq), + ['~', '=', ..] => self.advance_by_with(2, TokenType::TildeEq), + + ['+', ..] => self.advance_with(TokenType::Plus), + ['-', ..] => self.advance_with(TokenType::Minus), + ['*', ..] => self.advance_with(TokenType::Star), + ['/', ..] => self.advance_with(TokenType::Slash), // TODO: Check for regex literals + ['%', ..] => self.advance_with(TokenType::Perc), + ['~', ..] => self.advance_with(TokenType::Tilde), + + ['&', '&', ..] => self.advance_by_with(2, TokenType::AmpAmp), + ['&', ..] => self.advance_with(TokenType::Amp), + + ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe), + ['|', ..] => self.advance_with(TokenType::Pipe), + + ['^', ..] => self.advance_by_with(2, TokenType::Caret), + + ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq), + ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq), + ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang), + ['=', ..] => self.advance_with(TokenType::Eq), + ['!', ..] => self.advance_with(TokenType::Bang), + + ['<', '<', '='] => self.advance_by_with(3, TokenType::LtLtEq), + ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt), + ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq), + ['<', ..] => self.advance_with(TokenType::Lt), + + ['>', '>', '='] => self.advance_by_with(3, TokenType::GtGtEq), + ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt), + ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq), + ['>', ..] => self.advance_with(TokenType::Gt), + + [',', ..] => self.advance_with(TokenType::Comma), + + ['.', '.', ..] => self.advance_by_with(2, TokenType::DotDot), + ['.', ..] => self.advance_with(TokenType::Dot), + ['?', '?', ..] => self.advance_by_with(2, TokenType::QuestionQuestion), + ['?', '.', ..] => self.advance_by_with(2, TokenType::QuestionDot), + ['?', ..] => self.advance_with(TokenType::Question), + + [';', ..] => self.advance_with(TokenType::SemiColon), + [':', ':', ..] => self.advance_by_with(2, TokenType::ColonColon), + [':', ..] => self.advance_with(TokenType::Colon), + + // Literals + ['\'', c, '\''] => self.advance_by_with(3, TokenType::Character(c)), + ['0'..='9', ..] => self.lex_number(), + ['"', ..] => self.lex_string(), + + ['a'..='z' | 'A'..='Z' | '_' | '$', ..] => { + let mut value = String::new(); + while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$') { + value.push(self.advance()); + } + + match value.as_str() { + "val" => TokenType::Val, + "var" => TokenType::Var, + "fn" => TokenType::Fn, + "return" => TokenType::Return, + "if" => TokenType::If, + "else" => TokenType::Else, + "while" => TokenType::While, + "for" => TokenType::For, + "in" => TokenType::In, + "loop" => TokenType::Loop, + "break" => TokenType::Break, + "continue" => TokenType::Continue, + "as" => TokenType::As, + "true" => TokenType::Boolean(true), + "false" => TokenType::Boolean(false), + _ => TokenType::Identifier(value), + } + } + + _ => { + self.errored = true; + TokenType::Error(LexerError::UnexpectedToken) + } + }; + + let lexeme = unsafe { + // At this point it is already known that the string is valid UTF-8, might + // aswell not check again + std::str::from_utf8_unchecked(&self.source[self.start.index..self.pos()]) + }; + + let token = Token { + tt, + lexeme, + start: self.start, + end: self.current, + }; + + Some(token) + } +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + + use super::{Lexer, TokenType}; + use crate::lexer::LexerError; + + #[test] + fn lex_operators() { + let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || ^ = == ! !! != < << \ + <<= <= > >> >>= >= , ? ?. ?? . .. : :: ; -> =>"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Plus, + TokenType::PlusPlus, + TokenType::Minus, + TokenType::Star, + TokenType::StarStar, + TokenType::Slash, + TokenType::Perc, + TokenType::Tilde, + TokenType::PlusEq, + TokenType::PlusPlusEq, + TokenType::MinusEq, + TokenType::StarEq, + TokenType::StarStarEq, + TokenType::SlashEq, + TokenType::PercEq, + TokenType::TildeEq, + TokenType::Amp, + TokenType::AmpAmp, + TokenType::Pipe, + TokenType::PipePipe, + TokenType::Caret, + TokenType::Eq, + TokenType::EqEq, + TokenType::Bang, + TokenType::BangBang, + TokenType::BangEq, + TokenType::Lt, + TokenType::LtLt, + TokenType::LtLtEq, + TokenType::LtEq, + TokenType::Gt, + TokenType::GtGt, + TokenType::GtGtEq, + TokenType::GtEq, + TokenType::Comma, + TokenType::Question, + TokenType::QuestionDot, + TokenType::QuestionQuestion, + TokenType::Dot, + TokenType::DotDot, + TokenType::Colon, + TokenType::ColonColon, + TokenType::SemiColon, + TokenType::Arrow, + TokenType::FatArrow, + ]); + } + + #[test] + fn lex_keywords() { + let source = "val var fn if else while for in loop break continue as true false"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Val, + TokenType::Var, + TokenType::Fn, + TokenType::If, + TokenType::Else, + TokenType::While, + TokenType::For, + TokenType::In, + TokenType::Loop, + TokenType::Break, + TokenType::Continue, + TokenType::As, + TokenType::Boolean(true), + TokenType::Boolean(false), + ]); + } + + #[test] + fn lex_literals_a() { + let source = "foo bar _foo __bar $0 $$1 \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" \ + 'a' 'b' '\"' 93 3252 238 -382 -832 83 -25 52.9 83.7 12.4 35.2 3.3"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Identifier("foo".to_owned()), + TokenType::Identifier("bar".to_owned()), + TokenType::Identifier("_foo".to_owned()), + TokenType::Identifier("__bar".to_owned()), + TokenType::Identifier("$0".to_owned()), + TokenType::Identifier("$$1".to_owned()), + TokenType::String("foo".to_owned()), + TokenType::String("bar".to_owned()), + TokenType::String("baz".to_owned()), + TokenType::String("\"".to_owned()), + TokenType::String("\n".to_owned()), + TokenType::String("\t".to_owned()), + TokenType::Character('a'), + TokenType::Character('b'), + TokenType::Character('"'), + TokenType::Integer(93), + TokenType::Integer(3252), + TokenType::Integer(238), + TokenType::Minus, + TokenType::Integer(382), + TokenType::Minus, + TokenType::Integer(832), + TokenType::Integer(83), + TokenType::Minus, + TokenType::Integer(25), + TokenType::Float(52.9), + TokenType::Float(83.7), + TokenType::Float(12.4), + TokenType::Float(35.2), + TokenType::Float(3.3), + ]); + } + + #[test] + fn lex_errors() { + let source = "`"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[TokenType::Error(LexerError::UnexpectedToken)]); + } +} diff --git a/sloth/src/main.rs b/sloth/src/main.rs new file mode 100644 index 0000000..a611156 --- /dev/null +++ b/sloth/src/main.rs @@ -0,0 +1,43 @@ +#![warn( + clippy::wildcard_imports, + clippy::string_add, + clippy::string_add_assign, + clippy::manual_ok_or, + unused_lifetimes +)] + +pub mod compiler; +pub mod lexer; +pub mod parser; + +use std::{env, fs}; + +use compiler::Compiler; +use inkwell::context::Context; +use itertools::Itertools; +use lexer::Lexer; +use parser::AstParser; + +fn main() { + let args = env::args().collect_vec(); + + if args.len() < 2 { + println!("Sloth programming language interpreter\n"); + println!("Usage: sloth "); + return; + } + + let source_path = &args[1]; + let Ok(source) = fs::read_to_string(source_path) else { + println!("Error while reading '{source_path}'"); + return; + }; + + let tokens = Lexer::new(&source).collect_vec(); + let ast = AstParser::new(tokens).parse(); + + let context = Context::create(); + let compiler = Compiler::new(&context); + + compiler.compile(ast); +} diff --git a/sloth/src/parser/ast.rs b/sloth/src/parser/ast.rs new file mode 100644 index 0000000..543ea3a --- /dev/null +++ b/sloth/src/parser/ast.rs @@ -0,0 +1,115 @@ +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum BinaryOp { + Add, + Con, + Sub, + Mul, + Pow, + Div, + Mod, + + BWSftRight, + BWSftLeft, + BWAnd, + BWOr, + BWXor, + + Lt, + Gt, + LtEq, + GtEq, + EqEq, + NotEq, + LogAnd, + LogOr, + Range, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum UnaryOp { + Not, + Neg, + + BWComp, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Integer(i128), + Float(f64), + Bool(bool), + Char(char), + String(String), + Regex(String), + List(Vec), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + Grouping(Box), + BinaryOp { + op: BinaryOp, + lhs: Box, + rhs: Box, + }, + UnaryOp { + op: UnaryOp, + value: Box, + }, + Call { + ident: Box, + args: Vec, + }, + Variable(String), + Literal(Literal), + Lambda, // TODO: Lambda +} + +#[derive(PartialEq, Clone, Debug)] +pub struct FuncArgs { + pub name: String, + pub typ: Option, +} + +#[derive(PartialEq, Clone, Debug)] +pub enum Stmt { + ExprStmt(Expr), + DefineFunction { + ident: String, + args: Vec, + body: Vec, + return_type: Option, + }, + DefineVariable { + name: String, + value: Expr, + typ: Option, + }, + DefineValue { + name: String, + value: Expr, + typ: Option, + }, + AssignVariable { + name: String, + value: Expr, + }, + If { + expr: Expr, + body: Vec, + else_if: Vec<(Expr, Stmt)>, + els: Option>, + }, + For { + name: String, + iter: Expr, + body: Vec, + }, + While { + condition: Expr, + body: Vec, + }, + Return { + value: Expr, + }, +} diff --git a/sloth/src/parser/expr.rs b/sloth/src/parser/expr.rs new file mode 100644 index 0000000..9e81f7f --- /dev/null +++ b/sloth/src/parser/expr.rs @@ -0,0 +1,261 @@ +use super::ast::{BinaryOp, Expr, Literal, UnaryOp}; +use super::AstParser; +use crate::lexer::TokenType; + +/// Implementation containing parsers internal components related to expressions +impl<'a> AstParser<'a> { + // FIXME: Should probably avoid cloning token types + + pub fn expression(&mut self) -> Expr { + self.logical_or() + } + + fn unary(&mut self) -> Expr { + if !self.eof() + && matches!( + self.peek().tt, + TokenType::Bang | TokenType::Plus | TokenType::Minus + ) + { + let operator = match self.advance().unwrap().tt.clone() { + TokenType::Bang => UnaryOp::Not, + TokenType::Tilde => UnaryOp::BWComp, + TokenType::Minus => UnaryOp::Neg, + _ => panic!(), + }; + + let rhs = self.unary(); + return Expr::UnaryOp { + op: (operator), + value: (Box::new(rhs)), + }; + } + + self.call() + } + + fn call(&mut self) -> Expr { + let mut expr = self.primary(); + + if self.advance_if_eq(&TokenType::OpeningParen) { + let mut arguments = Vec::::new(); + + if self.peek().tt != TokenType::ClosingParen { + loop { + arguments.push(self.expression()); + if !self.advance_if_eq(&TokenType::Comma) { + break; + } + } + } + + self.consume( + TokenType::ClosingParen, + "Expected ')' to close off function call", + ); + + // let Expr::Variable(_ident) = expr else { panic!("uh oh spaghettio"); }; + + expr = Expr::Call { + ident: (Box::new(expr)), + args: (arguments), + } + } + + expr + } + + fn primary(&mut self) -> Expr { + match self.advance().unwrap().tt.clone() { + TokenType::Integer(literal) => Expr::Literal(Literal::Integer(literal)), + TokenType::Float(literal) => Expr::Literal(Literal::Float(literal)), + TokenType::Boolean(literal) => Expr::Literal(Literal::Bool(literal)), + TokenType::Character(literal) => Expr::Literal(Literal::Char(literal)), + TokenType::String(literal) => Expr::Literal(Literal::String(literal)), + TokenType::Regex(literal) => Expr::Literal(Literal::Regex(literal)), + TokenType::Identifier(ident) => Expr::Variable(ident), + TokenType::OpeningParen => { + let expr = self.expression(); + self.consume(TokenType::ClosingParen, "Must end expression with ')'"); + Expr::Grouping(Box::new(expr)) + } + TokenType::OpeningBracket => { + let mut expr: Vec = Vec::new(); + + while !self.eof() && self.peek().tt != TokenType::ClosingBracket { + let exp = self.expression(); + expr.push(exp); + + self.advance_if_eq(&TokenType::Comma); + } + self.consume(TokenType::ClosingBracket, "Expected ']' at end of list"); + Expr::Literal(Literal::List(expr)) + } + _ => unimplemented!("{:?}", self.peek()), + } + } +} + +// Macro to generate repetitive binary expressions. Things like addition, +// multiplication, exc. +macro_rules! binary_expr { + ($name:ident, $parent:ident, $pattern:pat) => { + fn $name(&mut self) -> Expr { + let mut expr = self.$parent(); + + while !self.eof() && matches!(self.peek().tt, $pattern) { + let operator = match self.advance().unwrap().tt.clone() { + TokenType::Plus => BinaryOp::Add, + TokenType::PlusPlus => BinaryOp::Con, + TokenType::Minus => BinaryOp::Sub, + TokenType::Star => BinaryOp::Mul, + TokenType::StarStar => BinaryOp::Pow, + TokenType::Slash => BinaryOp::Div, + TokenType::Perc => BinaryOp::Mod, + TokenType::DotDot => BinaryOp::Range, + + TokenType::LtLt => BinaryOp::BWSftRight, + TokenType::GtGt => BinaryOp::BWSftLeft, + TokenType::Amp => BinaryOp::BWAnd, + TokenType::Pipe => BinaryOp::BWOr, + TokenType::Caret => BinaryOp::BWXor, + + TokenType::Lt => BinaryOp::Lt, + TokenType::Gt => BinaryOp::Gt, + TokenType::LtEq => BinaryOp::LtEq, + TokenType::GtEq => BinaryOp::GtEq, + TokenType::EqEq => BinaryOp::EqEq, + TokenType::BangEq => BinaryOp::NotEq, + TokenType::AmpAmp => BinaryOp::LogAnd, + TokenType::PipePipe => BinaryOp::LogOr, + _ => panic!("uh oh spagghetio"), + }; + + let rhs = self.$parent(); + expr = Expr::BinaryOp { + op: (operator), + lhs: (Box::new(expr)), + rhs: (Box::new(rhs)), + } + } + + expr + } + }; +} + +#[rustfmt::skip] +#[allow(unused_parens)] +impl<'a> AstParser<'a> { + // Binary expressions in order of precedence from lowest to highest. + binary_expr!(logical_or , logical_and , (TokenType::PipePipe)); + binary_expr!(logical_and , range , (TokenType::AmpAmp)); + binary_expr!(range , equality , (TokenType::DotDot)); + binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq)); + binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq)); + binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt)); + binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus)); + binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc)); +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + + use super::{AstParser, BinaryOp, Expr, Literal}; + use crate::lexer::Lexer; + use crate::parser::ast::UnaryOp; + + #[test] + fn basic_expression_a() { + let lexer = Lexer::new("3 + 5 * 4"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::BinaryOp { + op: BinaryOp::Add, + lhs: Box::new(Expr::Literal(Literal::Integer(3))), + rhs: Box::new(Expr::BinaryOp { + op: BinaryOp::Mul, + lhs: Box::new(Expr::Literal(Literal::Integer(5))), + rhs: Box::new(Expr::Literal(Literal::Integer(4))), + }), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_expression_b() { + let lexer = Lexer::new("17 - (-5 + 5) / 6"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::BinaryOp { + op: BinaryOp::Sub, + lhs: Box::new(Expr::Literal(Literal::Integer(17))), + rhs: Box::new(Expr::BinaryOp { + op: BinaryOp::Div, + lhs: Box::new(Expr::Grouping(Box::new(Expr::BinaryOp { + op: BinaryOp::Add, + lhs: Box::new(Expr::UnaryOp { + op: UnaryOp::Neg, + value: Box::new(Expr::Literal(Literal::Integer(5))), + }), + rhs: Box::new(Expr::Literal(Literal::Integer(5))), + }))), + rhs: Box::new(Expr::Literal(Literal::Integer(6))), + }), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + #[test] + fn basic_expression_c() { + let lexer = Lexer::new("[1, 2, 3]"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::Literal(Literal::List(vec![ + Expr::Literal(Literal::Integer(1)), + Expr::Literal(Literal::Integer(2)), + Expr::Literal(Literal::Integer(3)), + ])); + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + #[test] + fn basic_expression_d() { + let lexer = Lexer::new("1 .. 17"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::BinaryOp { + op: (BinaryOp::Range), + lhs: (Box::new(Expr::Literal(Literal::Integer(1)))), + rhs: (Box::new(Expr::Literal(Literal::Integer(17)))), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } +} diff --git a/sloth/src/parser/mod.rs b/sloth/src/parser/mod.rs new file mode 100644 index 0000000..9d77acc --- /dev/null +++ b/sloth/src/parser/mod.rs @@ -0,0 +1,57 @@ +pub mod ast; +pub mod expr; +pub mod stmt; + +use crate::lexer::{Token, TokenType}; +#[derive(Debug)] +pub struct AstParser<'a> { + tokens: Vec>, + index: usize, +} + +/// Implementation containing utilities used by the parsers internal components +impl<'a> AstParser<'a> { + pub fn new(tokens: Vec>) -> Self { + Self { tokens, index: 0 } + } + pub fn peek(&self) -> &Token { + &self.tokens[self.index] + } + + pub fn advance(&mut self) -> Option<&Token> { + if self.eof() { + return None; + } + + self.index += 1; + Some(&self.tokens[self.index - 1]) + } + + pub fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool { + if self.eof() { + return false; + } + + if next(self.peek()) { + self.advance(); + return true; + } + + false + } + + pub fn advance_if_eq(&mut self, next: &TokenType) -> bool { + self.advance_if(|it| it.tt == *next) + } + + pub fn consume(&mut self, next: TokenType, error: &str) { + if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) { + panic!("{error} at index {:?}", self.index); + } + self.advance(); + } + + pub fn eof(&self) -> bool { + self.index >= self.tokens.len() + } +} diff --git a/sloth/src/parser/stmt.rs b/sloth/src/parser/stmt.rs new file mode 100644 index 0000000..1a961b1 --- /dev/null +++ b/sloth/src/parser/stmt.rs @@ -0,0 +1,646 @@ +use super::ast::{Expr, FuncArgs, Stmt}; +use super::AstParser; +use crate::lexer::TokenType; + +impl<'a> AstParser<'a> { + pub fn parse(&mut self) -> Vec { + let mut statements = Vec::new(); + + while !self.eof() { + statements.push(self.statement()); + } + + statements + } + + fn statement(&mut self) -> Stmt { + if self.advance_if_eq(&TokenType::Var) { + return self.var_statement(); + } + + if self.advance_if_eq(&TokenType::Val) { + return self.val_statement(); + } + + if self.advance_if_eq(&TokenType::If) { + return self.if_statement(); + } + + if self.advance_if_eq(&TokenType::For) { + return self.for_statement(); + } + + if self.advance_if_eq(&TokenType::While) { + return self.while_statement(); + } + + if self.advance_if_eq(&TokenType::Fn) { + return self.function_statement(); + } + + if self.advance_if_eq(&TokenType::Return) { + return self.return_statement(); + } + + self.mut_statement() + + // If we couldn't parse a statement return an expression statement + // self.expression_statement() + } + + fn mut_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.peek().tt.clone() else { + panic!("Identifier error {:?}", self.peek()); + }; + + self.advance(); + let next = self.advance().unwrap().tt.clone(); + if next == TokenType::Eq { + let value = self.expression(); + self.consume(TokenType::SemiColon, "No semi colon for me i guess"); + return Stmt::AssignVariable { + name: (ident), + value: (value), + }; + } else if next == TokenType::OpeningParen { + let mut arguments = Vec::::new(); + + if self.peek().tt != TokenType::ClosingParen { + loop { + arguments.push(self.expression()); + if !self.advance_if_eq(&TokenType::Comma) { + break; + } + } + } + + self.consume( + TokenType::ClosingParen, + "Expected ')' to close off function call", + ); + + self.consume(TokenType::SemiColon, "No semi colon for me i guess"); + return Stmt::ExprStmt(Expr::Call { + ident: Box::new(Expr::Variable(ident)), + args: (arguments), + }); + } + self.expression_statement() + } + + fn var_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.peek().tt.clone() else { + panic!("Identifier expected after 'var', not {:?}", self.peek()); + }; + + self.advance(); + + let mut typ: Option = None; + if self.peek().tt.clone() == TokenType::Colon { + self.consume(TokenType::Colon, "How did you even get this error?"); + let TokenType::Identifier(name) = self.peek().tt.clone() else { + panic!("Type expected after identifier, not {:?}", self.peek()); + }; + self.advance(); + typ = Some(name); + } + + self.consume(TokenType::Eq, "Expected '=' after identifier at "); + + let value = self.expression(); + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + + Stmt::DefineVariable { + name: (ident), + value: (value), + typ: (typ), + } + } + + fn val_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.peek().tt.clone() else { + panic!("Identifier expected after 'val'"); + }; + + self.advance(); // Advancing from the identifier + + let mut typ: Option = None; + if self.peek().tt.clone() == TokenType::Colon { + self.consume(TokenType::Colon, "How did you even get this error?"); + let TokenType::Identifier(name) = self.peek().tt.clone() else { + panic!("Type expected after identifier, not {:?}", self.peek()); + }; + self.advance(); + typ = Some(name); + } + + self.consume(TokenType::Eq, "Expected '=' after identifier"); + + let value = self.expression(); + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + + Stmt::DefineValue { + name: (ident), + value: (value), + typ: (typ), + } + } + + fn if_statement(&mut self) -> Stmt { + let condition = self.expression(); + + self.consume( + TokenType::OpeningBrace, + "Expected '{' at beggining of block", + ); + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + body.push(self.statement()); + } + self.advance(); + Stmt::If { + expr: (condition), + body: (body), + else_if: (Vec::new()), + els: (None), + } // TODO: implement else if and else + } + + fn for_statement(&mut self) -> Stmt { + let binding = self.expression(); + let Expr::Variable(binding) = binding else { + panic!("Left side of for statement must be identifier"); + }; + + self.consume( + TokenType::In, + "Expected 'in' in between identifier and range", + ); + + // let range_start = self.expression(); + // self.consume( + // TokenType::DotDot, + // "Expected '..' denoting min and max of range", + // ); + // let range_end = self.expression(); + + let expr = self.expression(); + + self.consume(TokenType::OpeningBrace, "Expected '{' after iterator"); + + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + body.push(self.statement()); + } + self.advance(); + + Stmt::For { + name: (binding), + iter: (expr), + body: (body), + } + } // TODO: Fix this garbage + + fn while_statement(&mut self) -> Stmt { + let condition = self.expression(); + + self.consume( + TokenType::OpeningBrace, + "Expected '{' at beggining of block", + ); + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + println!("{:?}", self.peek().tt); + body.push(self.statement()); + } + self.consume( + TokenType::ClosingBrace, + "Expected '}' after block on while loop", + ); + + self.advance(); + Stmt::While { condition, body } + } + + fn expression_statement(&mut self) -> Stmt { + let expr = self.expression(); + + // FIXME: Move assignment handling + // if self.advance_if_eq(&TokenType::Eq) { + // if let Expr::Literal(_ident) = &expr { + // let value = self.expression(); + + // self.consume( + // TokenType::SemiColon, + // "Expected ';' at end of + // statement", + // ); // return Stmt::DefineVariable { + // // name: (ident.clone()), + // // value: (value), + // // typ: (None), + // // }; + // return Stmt::ExprStmt(expr); + // } + // } + + self.consume( + TokenType::SemiColon, + "Expected ';' at end of expr statement", + ); + Stmt::ExprStmt(expr) + } + + fn function_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.advance().unwrap().tt.clone() else { + panic!("Identifier expected after 'fn'"); + }; + + self.consume(TokenType::OpeningParen, "Expected '(' after identifier"); + let mut args: Vec = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingParen { + let TokenType::Identifier(name) = self.advance().unwrap().tt.clone() else { + panic!("parameter expected after '('"); + }; + + let mut typ: Option = None; + + if self.peek().tt.clone() == TokenType::Colon { + self.consume(TokenType::Colon, "How did you even get this error?"); + let TokenType::Identifier(name) = self.peek().tt.clone() else { + panic!("Type expected after ':', not {:?}", self.peek()); + }; + self.advance(); + typ = Some(name); + } + + self.advance_if_eq(&TokenType::Comma); + + let arg = FuncArgs { + name: (name), + typ: (typ), + }; + args.push(arg); + } + self.advance(); + let mut typ: Option = None; + if self.peek().tt.clone() == TokenType::Arrow { + self.advance(); + let TokenType::Identifier(name) = self.peek().tt.clone() else { + panic!("Type expected after ':', not {:?}", self.peek()); + }; + typ = Some(name); + self.advance(); + } + self.consume(TokenType::OpeningBrace, "Expected '{' after parameters"); + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + body.push(self.statement()); + } + self.consume(TokenType::ClosingBrace, "Expected '}' after body"); + + Stmt::DefineFunction { + ident: (ident), + args: (args), + body: (body), + return_type: (typ), + } + } + + fn return_statement(&mut self) -> Stmt { + let expr = self.expression(); + self.consume(TokenType::SemiColon, "Expected ';' after return statement"); + Stmt::Return { value: (expr) } + } +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + + use super::{AstParser, Stmt}; + use crate::lexer::Lexer; + use crate::parser::ast::{BinaryOp, Expr, FuncArgs, Literal, UnaryOp}; + + #[test] + fn basic_statement_a() { + let lexer = Lexer::new("var test_a: int = 5 + 3;"); + let tokens = lexer.collect_vec(); + + let expected_ast = Stmt::DefineVariable { + name: ("test_a".to_string()), + value: (Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), + }), + typ: Some("int".to_string()), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_statement_b() { + let lexer = Lexer::new("val test_b = \"Hello World\";"); + let tokens = lexer.collect_vec(); + + let expected_ast = Stmt::DefineValue { + name: ("test_b".to_string()), + value: (Expr::Literal(Literal::String("Hello World".to_string()))), + typ: (None), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_statement_c() { + let lexer = Lexer::new( + "\ + fn test_c (a, b, c) {\nreturn (a + b * c);\n}", + ); + let tokens = lexer.collect_vec(); + println!("{tokens:?}"); + + let expected_ast = Stmt::DefineFunction { + ident: ("test_c".to_string()), + args: (vec![ + FuncArgs { + name: ("a".to_string()), + typ: None, + }, + FuncArgs { + name: ("b".to_string()), + typ: None, + }, + FuncArgs { + name: ("c".to_string()), + typ: None, + }, + ]), + body: (vec![Stmt::Return { + value: (Expr::Grouping(Box::new(Expr::BinaryOp { + op: BinaryOp::Add, + lhs: Box::new(Expr::Variable("a".to_string())), + rhs: Box::new(Expr::BinaryOp { + op: BinaryOp::Mul, + lhs: Box::new(Expr::Variable("b".to_string())), + rhs: Box::new(Expr::Variable("c".to_string())), + }), + }))), + }]), + return_type: (None), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + #[test] + fn basic_statement_d() { + let lexer = Lexer::new( + "\ + while true {\nprint(\"Hello World\");\nprintln(5 + 7/-3);\n}", + ); + let tokens = lexer.collect_vec(); + println!("{tokens:?}"); + + let expected_ast = Stmt::While { + condition: (Expr::Literal(Literal::Bool(true))), + body: (vec![ + Stmt::ExprStmt(Expr::Call { + ident: Box::new(Expr::Variable("print".to_string())), + args: (vec![Expr::Literal(Literal::String("Hello World".to_string()))]), + }), + Stmt::ExprStmt(Expr::Call { + ident: Box::new(Expr::Variable("println".to_string())), + args: (vec![Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + rhs: (Box::new(Expr::BinaryOp { + op: (BinaryOp::Div), + lhs: (Box::new(Expr::Literal(Literal::Integer(7)))), + rhs: (Box::new(Expr::UnaryOp { + op: (UnaryOp::Neg), + value: (Box::new(Expr::Literal(Literal::Integer(3)))), + })), + })), + }]), + }), + ]), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + #[test] + fn basic_statement_e() { + let lexer = Lexer::new( + "\ + if a+5 > 10 {\nprint(a);\n}\nif a+5 < 10 {\nprintln(10);\n}\nif a+5 == 10 \ + {\nprint(toString(10));\na = true;\n}", + ); + let tokens = lexer.collect_vec(); + // println!("{tokens:?}"); + + let expected_ast = vec![ + Stmt::If { + expr: (Expr::BinaryOp { + op: (BinaryOp::Gt), + lhs: (Box::new(Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Variable("a".to_string()))), + rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + })), + rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), + }), + body: (vec![Stmt::ExprStmt(Expr::Call { + ident: (Box::new(Expr::Variable("print".to_string()))), + args: (vec![Expr::Variable("a".to_string())]), + })]), + else_if: (Vec::new()), + els: (None), + }, + Stmt::If { + expr: (Expr::BinaryOp { + op: (BinaryOp::Lt), + lhs: (Box::new(Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Variable("a".to_string()))), + rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + })), + rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), + }), + body: (vec![Stmt::ExprStmt(Expr::Call { + ident: (Box::new(Expr::Variable("println".to_string()))), + args: (vec![Expr::Literal(Literal::Integer(10))]), + })]), + else_if: (Vec::new()), + els: (None), + }, + Stmt::If { + expr: (Expr::BinaryOp { + op: (BinaryOp::EqEq), + lhs: (Box::new(Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Variable("a".to_string()))), + rhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + })), + rhs: (Box::new(Expr::Literal(Literal::Integer(10)))), + }), + body: (vec![ + Stmt::ExprStmt(Expr::Call { + ident: (Box::new(Expr::Variable("print".to_string()))), + // ident: (Box::new(Expr::Literal(Literal::String("print".to_string())))), + args: (vec![Expr::Call { + ident: (Box::new(Expr::Variable("toString".to_string()))), + // ident: Box::new(Expr::Literal(Literal::String("toString". + // to_string()))), + args: vec![Expr::Literal(Literal::Integer(10))], + }]), + }), + Stmt::AssignVariable { + name: ("a".to_string()), + value: (Expr::Literal(Literal::Bool(true))), + }, + ]), + + else_if: (Vec::new()), + els: (None), + }, + ]; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.parse(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_statement_f() { + let lexer = Lexer::new("test_a = 5 + 3;"); + let tokens = lexer.collect_vec(); + + let expected_ast = Stmt::AssignVariable { + name: ("test_a".to_string()), + value: (Expr::BinaryOp { + op: (BinaryOp::Add), + lhs: (Box::new(Expr::Literal(Literal::Integer(5)))), + rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), + }), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + #[test] + fn basic_statement_g() { + let lexer = Lexer::new( + "\ + fn times_two(x: int) -> int {\nval y: int = x*2;\nreturn y;\n}", + ); + let tokens = lexer.collect_vec(); + + let expected_ast = Stmt::DefineFunction { + ident: ("times_two".to_string()), + args: (vec![FuncArgs { + name: ("x".to_string()), + typ: (Some("int".to_string())), + }]), + body: (vec![ + Stmt::DefineValue { + name: "y".to_string(), + value: (Expr::BinaryOp { + op: (BinaryOp::Mul), + lhs: (Box::new(Expr::Variable("x".to_string()))), + rhs: (Box::new(Expr::Literal(Literal::Integer(2)))), + }), + typ: Some("int".to_string()), + }, + Stmt::Return { + value: (Expr::Variable("y".to_string())), + }, + ]), + + return_type: Some("int".to_string()), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_statement_h() { + let lexer = Lexer::new("for i in 1 .. 3 {\nfor j in [1, 2, 3] {\nprint(j*i);}}"); + let tokens = lexer.collect_vec(); + + let expected_ast = Stmt::For { + name: ("i".to_string()), + iter: (Expr::BinaryOp { + op: (BinaryOp::Range), + lhs: (Box::new(Expr::Literal(Literal::Integer(1)))), + rhs: (Box::new(Expr::Literal(Literal::Integer(3)))), + }), + body: (vec![Stmt::For { + name: ("j".to_string()), + iter: (Expr::Literal(Literal::List(vec![ + Expr::Literal(Literal::Integer(1)), + Expr::Literal(Literal::Integer(2)), + Expr::Literal(Literal::Integer(3)), + ]))), + body: (vec![Stmt::ExprStmt(Expr::Call { + ident: Box::new(Expr::Variable("print".to_string())), + args: (vec![Expr::BinaryOp { + op: (BinaryOp::Mul), + lhs: (Box::new(Expr::Variable("j".to_string()))), + rhs: (Box::new(Expr::Variable("i".to_string()))), + }]), + })]), + }]), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.statement(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } +} diff --git a/test.c b/test.c new file mode 100644 index 0000000..3a39c44 --- /dev/null +++ b/test.c @@ -0,0 +1,11 @@ +#include + +int add(int, int); +int subtract(int, int); + +int main() { + int a = add(5, 2); + int b = subtract(3, 8); + printf("%d %d\n", a, b); + return 0; +} diff --git a/test.sloth b/test.sloth new file mode 100644 index 0000000..8ad1754 --- /dev/null +++ b/test.sloth @@ -0,0 +1,7 @@ +fn add(x, y) { + return x + y; +} + +fn subtract(x, y) { + return x - y; +} -- cgit v1.2.3