diff options
| author | Cody <cody@codyq.dev> | 2023-03-24 17:33:44 -0500 |
|---|---|---|
| committer | Cody <cody@codyq.dev> | 2023-03-24 17:33:44 -0500 |
| commit | f9d13f3098b2a5984f59d612be87c184aba0b2c7 (patch) | |
| tree | 0a8059a4604026c3c0fcde587617507063cf7ccf /src | |
| parent | 28e0b95d8ecbbc44ef81069ad122a88b2a64c74e (diff) | |
| download | sloth-f9d13f3098b2a5984f59d612be87c184aba0b2c7.tar.gz | |
Stuff and things
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast/mod.rs | 75 | ||||
| -rw-r--r-- | src/ast/parser.rs | 372 | ||||
| -rw-r--r-- | src/ast/printer.rs | 38 | ||||
| -rw-r--r-- | src/interpreter.rs | 221 | ||||
| -rw-r--r-- | src/lexer.rs | 418 | ||||
| -rw-r--r-- | src/main.rs | 115 |
6 files changed, 48 insertions, 1191 deletions
diff --git a/src/ast/mod.rs b/src/ast/mod.rs deleted file mode 100644 index b3e7c36..0000000 --- a/src/ast/mod.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::lexer::{Literal, TokenType}; - -pub mod parser; -pub mod printer; - -#[derive(Debug, Eq, PartialEq)] -pub enum Stmt { - Block(Vec<Stmt>), - Expr(Expr), - Val { - ident: String, - value: Expr, - }, - Var { - ident: String, - value: Expr, - }, - Assignment { - ident: String, - value: Expr, - }, - Function { - ident: String, - arguments: Vec<FunctionArgument>, - return_type: String, - body: Vec<Stmt>, - }, - If { - condition: Expr, - body: Vec<Stmt>, - }, - For { - binding: String, - range: (Expr, Expr), - body: Vec<Stmt>, - }, - While { - condition: Expr, - body: Vec<Stmt>, - }, - Return { - value: Expr, - }, -} - -#[derive(Debug, Eq, PartialEq)] -pub struct FunctionArgument { - name: String, - types: String, -} - -#[derive(Debug, Eq, PartialEq)] -pub enum Expr { - Literal(Literal), - Variable(String), - Grouping(Box<Expr>), - Call { - ident: String, - arguments: Vec<Expr>, - }, - Binary { - operator: TokenType, - lhs: Box<Expr>, - rhs: Box<Expr>, - }, - Unary { - operator: TokenType, - expr: Box<Expr>, - }, -} - -pub trait AstVisitor<T = ()> { - fn visit_stmt(&mut self, stmt: &Stmt) -> T; - fn visit_expr(&mut self, expr: &Expr) -> T; -} diff --git a/src/ast/parser.rs b/src/ast/parser.rs deleted file mode 100644 index 85be68c..0000000 --- a/src/ast/parser.rs +++ /dev/null @@ -1,372 +0,0 @@ -use super::{Expr, Stmt}; -use crate::lexer::{Token, TokenType}; - -pub struct AstParser<'a> { - tokens: Vec<Token<'a>>, - index: usize, -} - -/// Implementation containing utilities used by the parsers internal components -impl<'a> AstParser<'a> { - pub fn new(tokens: Vec<Token<'a>>) -> Self { - Self { tokens, index: 0 } - } - fn peek(&self) -> &Token { - &self.tokens[self.index] - } - - fn advance(&mut self) -> Option<&Token> { - if self.eof() { - return None; - } - - self.index += 1; - Some(&self.tokens[self.index - 1]) - } - - fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool { - if self.eof() { - return false; - } - - if next(self.peek()) { - self.advance(); - return true; - } - - false - } - - fn advance_if_eq(&mut self, next: &TokenType) -> bool { - self.advance_if(|it| it.tt == *next) - } - - fn consume(&mut self, next: TokenType, error: &str) { - if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) { - panic!("{error}"); - } - self.advance(); - } - - fn eof(&self) -> bool { - self.index >= self.tokens.len() - } -} - -/// Implementation containing parsers internal components related to statements -impl<'a> AstParser<'a> { - pub fn parse(&mut self) -> Vec<Stmt> { - let mut statements = Vec::new(); - - while !self.eof() { - statements.push(self.statement()); - } - - statements - } - - fn block(&mut self) -> Vec<Stmt> { - self.consume(TokenType::LeftBrace, "Expected '{' at beggining of block"); - - let mut statements = Vec::new(); - - while !self.eof() && self.peek().tt != TokenType::RightBrace { - statements.push(self.statement()); - } - - self.consume(TokenType::RightBrace, "Expected '}' at end of block"); - statements - } - - fn statement(&mut self) -> Stmt { - if self.peek().tt == TokenType::LeftBrace { - return Stmt::Block(self.block()); - } - - if self.advance_if_eq(&TokenType::Var) { - return self.var_statement(); - } - - if self.advance_if_eq(&TokenType::Val) { - return self.val_statement(); - } - - if self.advance_if_eq(&TokenType::If) { - return self.if_statement(); - } - - if self.advance_if_eq(&TokenType::For) { - return self.for_statement(); - } - - if self.advance_if_eq(&TokenType::While) { - return self.while_statement(); - } - - // If we couldn't parse a statement return an expression statement - self.expression_statement() - } - - fn var_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.peek().tt.clone() else { - panic!("Identifier expected after 'var'"); - }; - - self.advance(); // Advancing from the identifier - self.consume(TokenType::Eq, "Expected '=' after identifier"); - - let value = self.expression(); - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - - Stmt::Var { ident, value } - } - - fn val_statement(&mut self) -> Stmt { - let TokenType::Identifier(ident) = self.peek().tt.clone() else { - panic!("Identifier expected after 'val'"); - }; - - self.advance(); // Advancing from the identifier - self.consume(TokenType::Eq, "Expected '=' after identifier"); - - let value = self.expression(); - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - - Stmt::Val { ident, value } - } - - fn if_statement(&mut self) -> Stmt { - let condition = self.expression(); - let body = self.block(); - - Stmt::If { condition, body } - } - - fn for_statement(&mut self) -> Stmt { - let binding = self.expression(); - let Expr::Variable(binding) = binding else { - panic!("Left side of for statement must be identifier"); - }; - - self.consume( - TokenType::In, - "Expected 'in' in between identifier and range", - ); - - let range_start = self.expression(); - self.consume( - TokenType::DotDot, - "Expected '..' denoting min and max of range", - ); - let range_end = self.expression(); - - let body = self.block(); - - Stmt::For { - binding, - range: (range_start, range_end), - body, - } - } - - fn while_statement(&mut self) -> Stmt { - let condition = self.expression(); - let body = self.block(); - - Stmt::While { condition, body } - } - - fn expression_statement(&mut self) -> Stmt { - let expr = self.expression(); - - // FIXME: Move assignment handling - if self.advance_if_eq(&TokenType::Eq) { - if let Expr::Variable(ident) = &expr { - let value = self.expression(); - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - return Stmt::Assignment { - ident: ident.clone(), - value, - }; - } - } - - self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); - Stmt::Expr(expr) - } -} - -/// Implementation containing parsers internal components related to expressions -impl<'a> AstParser<'a> { - // FIXME: Should probably avoid cloning token types - - fn expression(&mut self) -> Expr { - self.logical_or() - } - - fn unary(&mut self) -> Expr { - if !self.eof() - && matches!( - self.peek().tt, - TokenType::Bang | TokenType::Plus | TokenType::Minus - ) - { - let operator = self.advance().unwrap().tt.clone(); - let rhs = self.unary(); - return Expr::Unary { - operator, - expr: Box::new(rhs), - }; - } - - self.call() - } - - fn call(&mut self) -> Expr { - let mut expr = self.primary(); - - if self.advance_if_eq(&TokenType::LeftParen) { - let mut arguments = Vec::<Expr>::new(); - - if self.peek().tt != TokenType::RightParen { - loop { - arguments.push(self.expression()); - if !self.advance_if_eq(&TokenType::Comma) { - break; - } - } - } - - self.consume( - TokenType::RightParen, - "Expected ')' to close off function call", - ); - - let Expr::Variable(ident) = expr else { panic!("uh oh spaghettio"); }; - - expr = Expr::Call { ident, arguments } - } - - expr - } - - fn primary(&mut self) -> Expr { - match self.advance().unwrap().tt.clone() { - TokenType::Literal(literal) => Expr::Literal(literal), - TokenType::Identifier(ident) => Expr::Variable(ident), - TokenType::LeftParen => { - let expr = self.expression(); - self.consume(TokenType::RightParen, "Must end expression with ')'"); - Expr::Grouping(Box::new(expr)) - } - _ => unimplemented!("{:?}", self.peek()), - } - } -} - -// Macro to generate repetitive binary expressions. Things like addition, -// multiplication, exc. -macro_rules! binary_expr { - ($name:ident, $parent:ident, $pattern:pat) => { - fn $name(&mut self) -> Expr { - let mut expr = self.$parent(); - - while !self.eof() && matches!(self.peek().tt, $pattern) { - let operator = self.advance().unwrap().tt.clone(); - let rhs = self.$parent(); - expr = Expr::Binary { - operator, - lhs: Box::new(expr), - rhs: Box::new(rhs), - }; - } - - expr - } - }; -} - -#[rustfmt::skip] -#[allow(unused_parens)] -impl<'a> AstParser<'a> { - // Binary expressions in order of precedence from lowest to highest. - binary_expr!(logical_or , logical_and , (TokenType::PipePipe)); - binary_expr!(logical_and , equality , (TokenType::AmpAmp)); - binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq)); - binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq)); - binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt)); - binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus)); - binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc)); -} - -#[cfg(test)] -mod tests { - use itertools::Itertools; - - use super::AstParser; - use crate::ast::Expr; - use crate::lexer::{Lexer, Literal, TokenType}; - - #[test] - fn basic_expression_a() { - let lexer = Lexer::new("3 + 5 * 4"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::Binary { - operator: TokenType::Plus, - lhs: Box::new(Expr::Literal(Literal::Number(3))), - rhs: Box::new(Expr::Binary { - operator: TokenType::Star, - lhs: Box::new(Expr::Literal(Literal::Number(5))), - rhs: Box::new(Expr::Literal(Literal::Number(4))), - }), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_expression_b() { - let lexer = Lexer::new("17 - (-5 + 5) / 6"); - let tokens = lexer.collect_vec(); - - let expected_ast = Expr::Binary { - operator: TokenType::Minus, - lhs: Box::new(Expr::Literal(Literal::Number(17))), - rhs: Box::new(Expr::Binary { - operator: TokenType::Slash, - lhs: Box::new(Expr::Grouping(Box::new(Expr::Binary { - operator: TokenType::Plus, - lhs: Box::new(Expr::Unary { - operator: TokenType::Minus, - expr: Box::new(Expr::Literal(Literal::Number(5))), - }), - rhs: Box::new(Expr::Literal(Literal::Number(5))), - }))), - rhs: Box::new(Expr::Literal(Literal::Number(6))), - }), - }; - - let mut parser = AstParser::new(tokens); - let generated_ast = parser.expression(); - - println!("Expected AST:\n{expected_ast:#?}\n\n"); - println!("Generated AST:\n{generated_ast:#?}\n\n"); - - assert_eq!(expected_ast, generated_ast); - } - - #[test] - fn basic_expression_c() { - // TODO: - } -} diff --git a/src/ast/printer.rs b/src/ast/printer.rs deleted file mode 100644 index 1aa32ae..0000000 --- a/src/ast/printer.rs +++ /dev/null @@ -1,38 +0,0 @@ -// use super::{AstVisitor, Expr, Stmt}; - -// pub struct AstPrettyPrinter; -// impl AstVisitor<String> for AstPrettyPrinter { -// fn visit_stmt(&self, stmt: &Stmt) -> String { -// match stmt { -// Stmt::Expr(expr) => self.visit_expr(expr), -// Stmt::Val(name, expr) => format!("(val '{}' <- {})", name, -// self.visit_expr(expr)), Stmt::Var(name, expr) => format!("(var -// '{}' <- {})", name, self.visit_expr(expr)), } -// } - -// fn visit_expr(&self, expr: &Expr) -> String { -// match expr { -// Expr::Literal(i) => i.to_string(), -// Expr::Add(lhs, rhs) => { -// let lhs = self.visit_expr(lhs); -// let rhs = self.visit_expr(rhs); -// format!("({lhs} + {rhs})") -// } -// Expr::Sub(lhs, rhs) => { -// let lhs = self.visit_expr(lhs); -// let rhs = self.visit_expr(rhs); -// format!("({lhs} - {rhs})") -// } -// Expr::Mul(lhs, rhs) => { -// let lhs = self.visit_expr(lhs); -// let rhs = self.visit_expr(rhs); -// format!("({lhs} * {rhs})") -// } -// Expr::Div(lhs, rhs) => { -// let lhs = self.visit_expr(lhs); -// let rhs = self.visit_expr(rhs); -// format!("({lhs} / {rhs})") -// } -// } -// } -// } diff --git a/src/interpreter.rs b/src/interpreter.rs deleted file mode 100644 index a7937db..0000000 --- a/src/interpreter.rs +++ /dev/null @@ -1,221 +0,0 @@ -use std::collections::HashMap; -use std::fmt::Display; - -use itertools::Itertools; - -use crate::ast::{AstVisitor, Expr, Stmt}; -use crate::lexer::{Literal, TokenType}; - -#[derive(Default)] -pub struct AstInterpreter { - pub callables: HashMap<String, Box<dyn SlothCallable>>, - memory: HashMap<String, (Value, bool)>, -} - -impl AstVisitor<Value> for AstInterpreter { - fn visit_stmt(&mut self, stmt: &Stmt) -> Value { - match stmt { - Stmt::Block(stmts) => { - self.interpret(stmts); - } - Stmt::Expr(expr) => { - self.visit_expr(expr); - } - Stmt::Val { ident, value } => { - let value = self.visit_expr(value); - self.memory.insert(ident.clone(), (value, false)); - } - Stmt::Var { ident, value } => { - let value = self.visit_expr(value); - self.memory.insert(ident.clone(), (value, true)); - } - Stmt::Assignment { ident, value } => { - if !self.memory.contains_key(ident) { - panic!("Cannot assign to variable that doesn't exist"); - } - - if !self.memory[ident].1 { - panic!("Cannot mutate value '{ident}'"); - } - - let value = self.visit_expr(value); - self.memory.insert(ident.clone(), (value, true)); - } - Stmt::Function { - ident: _, - arguments: _, - return_type: _, - body: _, - } => todo!(), - Stmt::If { condition, body } => { - let result = self.visit_expr(condition); - if result == Value::Bool(true) { - self.interpret(body); - } - } - Stmt::For { - binding, - range, - body, - } => { - let Value::Number(lower_range) = self.visit_expr(&range.0) else { panic!("Lower range must be number") }; - let Value::Number(upper_range) = self.visit_expr(&range.1) else { panic!("Upper range must be number") }; - - for i in lower_range..upper_range { - self.memory - .insert(binding.clone(), (Value::Number(i), false)); - self.interpret(body); - } - - self.memory.remove(binding); - } - Stmt::While { condition, body } => { - while self.visit_expr(condition) == Value::Bool(true) { - self.interpret(body); - } - } - Stmt::Return { value: _ } => todo!(), - }; - - // FIXME: Honestly should probably abandon this "visitor" pattern. 2 functions - // with these match statements would work better - Value::Nil - } - - fn visit_expr(&mut self, expr: &Expr) -> Value { - match expr { - Expr::Literal(literal) => match literal { - Literal::String(value) => Value::String(value.clone()), - Literal::Character(value) => Value::String(value.to_string()), - Literal::Number(value) => Value::Number(*value), - Literal::Bool(value) => Value::Bool(*value), - Literal::Nil => Value::Nil, - }, - Expr::Variable(ident) => self.memory.get(ident).unwrap().clone().0, - Expr::Grouping(child) => self.visit_expr(child), - Expr::Binary { operator, lhs, rhs } => { - let lhs = self.visit_expr(lhs); - let rhs = self.visit_expr(rhs); - - if let Value::Number(lhs) = lhs && let Value::Number(rhs) = rhs { - match operator { - TokenType::Plus => Value::Number(lhs + rhs), - TokenType::Minus => Value::Number(lhs - rhs), - TokenType::Star => Value::Number(lhs * rhs), - TokenType::Slash => Value::Number(lhs / rhs), - TokenType::Perc => Value::Number(lhs % rhs), - - TokenType::Gt => Value::Bool(lhs > rhs), - TokenType::GtEq => Value::Bool(lhs >= rhs), - TokenType::Lt => Value::Bool(lhs < rhs), - TokenType::LtEq => Value::Bool(lhs <= rhs), - - TokenType::BangEq => Value::Bool(lhs != rhs), - TokenType::EqEq => Value::Bool(lhs == rhs), - - _ => panic!(), - } - } else if let Value::Bool(lhs) = lhs && let Value::Bool(rhs) = rhs { - match operator { - TokenType::AmpAmp => Value::Bool(lhs && rhs), - TokenType::PipePipe => Value::Bool(lhs || rhs), - _ => panic!() - } - } else if let Value::String(lhs) = lhs && let Value::String(rhs) = rhs { - match operator { - TokenType::Plus => { - let mut value = lhs; - value.push_str(&rhs); - Value::String(value) - }, - TokenType::BangEq => Value::Bool(lhs != rhs), - TokenType::EqEq => Value::Bool(lhs == rhs), - _ => panic!() - } - } else { - panic!("Invalid operations for types"); - } - } - Expr::Unary { operator, expr } => { - let value = self.visit_expr(expr); - - match operator { - TokenType::Bang => { - let Value::Bool(value) = value else { - panic!("Invalid operations for types"); - }; - - Value::Bool(!value) - } - TokenType::Plus => value, - TokenType::Minus => { - let Value::Number(value) = value else { - panic!("Invalid operations for types"); - }; - - Value::Number(-value) - } - _ => panic!(), - } - } - Expr::Call { ident, arguments } => { - let argument_values = arguments.iter().map(|it| self.visit_expr(it)).collect_vec(); - let Some(callable) = self.callables.remove(ident) else { - panic!("Unkown callable '{ident}'"); - }; - - let result = callable.call(self, &argument_values); - self.callables.insert(ident.clone(), callable); - result - } - } - } -} - -impl AstInterpreter { - pub fn interpret(&mut self, stmts: &Vec<Stmt>) { - for stmt in stmts { - self.visit_stmt(stmt); - } - } -} - -#[derive(Clone, Eq, PartialEq)] -pub enum Value { - Number(i32), - String(String), - Bool(bool), - Nil, -} - -pub trait SlothCallable { - fn call(&self, interpreter: &mut AstInterpreter, args: &[Value]) -> Value; -} - -pub struct InternalFunction<'a>(pub &'a dyn Fn(&[Value]) -> Value); - -impl<'a> SlothCallable for InternalFunction<'a> { - fn call(&self, _interpreter: &mut AstInterpreter, args: &[Value]) -> Value { - self.0(args) - } -} - -// pub struct SlothFunction(Vec<Stmt>); - -impl Display for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Value::Number(value) => write!(f, "{value}")?, - Value::String(value) => write!(f, "{value}")?, - Value::Bool(value) => write!(f, "{value}")?, - Value::Nil => write!(f, "nil")?, - } - - Ok(()) - } -} - -#[cfg(test)] -mod test { - // -} diff --git a/src/lexer.rs b/src/lexer.rs index 88d86bd..ef79716 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,84 +1,49 @@ #![allow(dead_code)] -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum TokenType { - // Utility - DocComment(String), - Comment(String), - - // Short - Plus, // + - Minus, // - - Slash, // / - Star, // * - Perc, // % - - PlusEq, // += - MinusEq, // -= - SlashEq, // /= - StarEq, // *= - PercEq, // %= - - Eq, // = - EqEq, // == - Bang, // ! - BangEq, // != +use thiserror::Error; - Gt, // > - GtGt, // >> - GtEq, // >= - Lt, // < - LtLt, // << - LtEq, // <= - - Amp, // & - AmpAmp, // && - Pipe, // | - PipePipe, // || - - DotDot, // . - - LeftParen, // ( - RightParen, // ) - LeftBracket, // [ - RightBracket, // ] - LeftBrace, // { - RightBrace, // } - - Comma, // , - Dot, // . - Colon, // : - SemiColon, // ; +#[derive(Debug, Error)] +pub enum LexerError { + #[error("Unexpected token")] + UnexpectedToken, +} - // Literals +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum TokenType { + // Meta + DocComment, + Comment, + + // Operatiors + Plus, + Minus, + Star, + Slash, + Perc, + + PlusEq, + MinusEq, + StarEq, + SlashEq, + PercEq, + + // Misc Literal(Literal), - Identifier(String), - - // Keywords - Val, - Var, - Fn, - - If, - Else, - - For, - In, - - While, - - Loop, - Break, - Continue, } #[derive(Debug, Clone, Eq, PartialEq)] pub enum Literal { - String(String), - Character(char), - Number(i32), - Bool(bool), - Nil, + Numeric, + Boolean, + Character, + String, + Regex, +} + +#[derive(Debug, Default)] +pub struct Location { + row: u32, + column: u32, } #[derive(Debug)] @@ -86,52 +51,24 @@ pub struct Token<'a> { pub tt: TokenType, pub lexeme: &'a str, - start: usize, - length: usize, - line: u32, + start: Location, + end: Location, } pub struct Lexer<'a> { source: &'a [u8], - start: usize, // Start of the lexme - pos: usize, // End of the lexme - line: u32, + + start: Location, + end: Location, } impl<'a> Lexer<'a> { - pub fn new(source: &'a str) -> Lexer<'a> { + fn new(source: &'a str) -> Self { Self { source: source.as_bytes(), - start: 0, - pos: 0, - line: 0, - } - } - - fn peek(&self) -> Option<char> { - self.source.get(self.pos).map(|it| *it as char) - } - - fn peek_nth(&self, nth: usize) -> Option<char> { - self.source.get(self.pos + nth).map(|it| *it as char) - } - - fn advance(&mut self) -> Option<char> { - self.pos += 1; - self.source.get(self.pos - 1).map(|it| *it as char) - } - - fn advance_if(&mut self, next: impl FnOnce(Option<char>) -> bool) -> bool { - if next(self.peek()) { - self.advance(); - return true; + start: Default::default(), + end: Default::default(), } - - false - } - - fn advance_if_eq(&mut self, next: Option<char>) -> bool { - self.advance_if(|it| it == next) } } @@ -139,269 +76,6 @@ impl<'a> Iterator for Lexer<'a> { type Item = Token<'a>; fn next(&mut self) -> Option<Self::Item> { - // Ignore all whitespace - loop { - match self.peek() { - Some('\n') => self.line += 1, - Some(' ') | Some('\r') | Some('\t') => (), - _ => break, - } - self.advance(); - } - - // Resetting the lexeme - self.start = self.pos; - - // Parse the next lexeme- If it is EOF return nothing - let Some(character) = self.advance() else { - return None; - }; - - let tt = match character { - // Whitespace & Comments - '#' if self.advance_if_eq(Some('#')) => { - let mut value = String::new(); - while self.peek() != Some('\n') { - value.push(self.advance().unwrap()); - } - - TokenType::DocComment(value) - } - - '#' => { - let mut value = String::new(); - while self.peek() != Some('\n') { - value.push(self.advance().unwrap()); - } - - TokenType::Comment(value) - } - - // Arithmetic - '+' if self.advance_if_eq(Some('=')) => TokenType::PlusEq, - '-' if self.advance_if_eq(Some('=')) => TokenType::MinusEq, - '*' if self.advance_if_eq(Some('=')) => TokenType::StarEq, - '/' if self.advance_if_eq(Some('=')) => TokenType::SlashEq, - '%' if self.advance_if_eq(Some('=')) => TokenType::PercEq, - '+' => TokenType::Plus, - '-' => TokenType::Minus, - '*' => TokenType::Star, - '/' => TokenType::Slash, - '%' => TokenType::Perc, - - '0'..='9' => { - let mut value = String::new(); - value.push(character); - while let Some('0'..='9') = &self.peek() { - value.push(self.advance().unwrap()); - } - - if self.peek() == Some('.') && self.peek_nth(1) != Some('.') { - self.advance(); - value.push('.'); - while self.peek().unwrap().is_ascii_digit() { - value.push(self.advance().unwrap()); - } - } - TokenType::Literal(Literal::Number(value.parse::<i32>().unwrap())) - } - - // Logical & Bitwise - '!' if self.advance_if_eq(Some('=')) => TokenType::BangEq, - '=' if self.advance_if_eq(Some('=')) => TokenType::EqEq, - '>' if self.advance_if_eq(Some('>')) => TokenType::GtGt, - '>' if self.advance_if_eq(Some('=')) => TokenType::GtEq, - '<' if self.advance_if_eq(Some('<')) => TokenType::LtLt, - '<' if self.advance_if_eq(Some('=')) => TokenType::LtEq, - '!' => TokenType::Bang, - '=' => TokenType::Eq, - '>' => TokenType::Gt, - '<' => TokenType::Lt, - - '&' if self.advance_if_eq(Some('&')) => TokenType::AmpAmp, - '|' if self.advance_if_eq(Some('|')) => TokenType::PipePipe, - '&' => TokenType::Amp, - '|' => TokenType::Pipe, - - // Misc. Operators - '.' if self.advance_if_eq(Some('.')) => TokenType::DotDot, - - // Scope - '(' => TokenType::LeftParen, - ')' => TokenType::RightParen, - '[' => TokenType::LeftBracket, - ']' => TokenType::RightBracket, - '{' => TokenType::LeftBrace, - '}' => TokenType::RightBrace, - ',' => TokenType::Comma, - '.' => TokenType::Dot, - ':' => TokenType::Colon, - ';' => TokenType::SemiColon, - - '"' => { - let mut value = String::new(); - while self.peek() != Some('"') { - let Some(character) = self.advance() else { - panic!("Syntax Error: String invalid"); - }; - - if character == '\\' { - match self.advance().unwrap() { - '\\' => value.push('\\'), - '"' => value.push('"'), - 'n' => value.push('\n'), - _ => panic!(), - } - continue; - } - - value.push(character); - } - - self.advance(); - TokenType::Literal(Literal::String(value)) - } - - // Keywords & Identifiers - 'a'..='z' | 'A'..='Z' | '_' => { - let mut value = String::new(); - value.push(character); - - while let Some(character) = self.peek() && matches!(character, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') { - value.push(self.advance().unwrap()); - } - - match value.as_str() { - "val" => TokenType::Val, - "var" => TokenType::Var, - "fn" => TokenType::Fn, - "if" => TokenType::If, - "else" => TokenType::Else, - "for" => TokenType::For, - "in" => TokenType::In, - "while" => TokenType::While, - "loop" => TokenType::Loop, - "break" => TokenType::Break, - "continue" => TokenType::Continue, - "true" => TokenType::Literal(Literal::Bool(true)), - "false" => TokenType::Literal(Literal::Bool(false)), - _ => TokenType::Identifier(value), - } - } - - // Misc. - _ => panic!("Failed to parse"), - }; - - let lexeme = unsafe { - // If it got to this point we know the slice is valid UTF-8. The only area in - // the language that UTF-8 characters are recognized is within strings. - std::str::from_utf8_unchecked(&self.source[self.start..self.pos]) - }; - - let token = Token { - tt, - lexeme, - start: self.start, - length: self.pos - self.start, - line: self.line, - }; - - Some(token) - } -} - -#[cfg(test)] -mod tests { - extern crate test; - - use test::Bencher; - - use super::{Lexer, Literal, TokenType}; - - const SAMPLE_PROGRAM: &str = r#" -val variable = 5; - -if variable >= 7 { - print("Hello World"); -} - -if variable < 52 { - variable += 1; - print("Hello ${variable}"); -} - -for person in ["Cody", "Johnny"] { - print("Hello ${person}"); -} -"#; - - #[test] - fn simple_code() { - let tokens = vec![ - // top - TokenType::Val, - TokenType::Identifier("variable".to_owned()), - TokenType::Eq, - TokenType::Literal(Literal::Number(5)), - TokenType::SemiColon, - // 1st block - TokenType::If, - TokenType::Identifier("variable".to_owned()), - TokenType::GtEq, - TokenType::Literal(Literal::Number(7)), - TokenType::LeftBrace, - TokenType::Identifier("print".to_owned()), - TokenType::LeftParen, - TokenType::Literal(Literal::String("Hello World".to_owned())), - TokenType::RightParen, - TokenType::SemiColon, - TokenType::RightBrace, - // 2nd block - TokenType::If, - TokenType::Identifier("variable".to_owned()), - TokenType::Lt, - TokenType::Literal(Literal::Number(52)), - TokenType::LeftBrace, - TokenType::Identifier("variable".to_owned()), - TokenType::PlusEq, - TokenType::Literal(Literal::Number(1)), - TokenType::SemiColon, - TokenType::Identifier("print".to_owned()), - TokenType::LeftParen, - TokenType::Literal(Literal::String("Hello ${variable}".to_owned())), - TokenType::RightParen, - TokenType::SemiColon, - TokenType::RightBrace, - // 3rd block - TokenType::For, - TokenType::Identifier("person".to_owned()), - TokenType::In, - TokenType::LeftBracket, - TokenType::Literal(Literal::String("Cody".to_owned())), - TokenType::Comma, - TokenType::Literal(Literal::String("Johnny".to_owned())), - TokenType::RightBracket, - TokenType::LeftBrace, - TokenType::Identifier("print".to_owned()), - TokenType::LeftParen, - TokenType::Literal(Literal::String("Hello ${person}".to_owned())), - TokenType::RightParen, - TokenType::SemiColon, - TokenType::RightBrace, - ]; - - let lexed_code = Lexer::new(SAMPLE_PROGRAM) - .map(|it| it.tt) - .collect::<Vec<_>>(); - - assert_eq!(tokens, lexed_code); - } - - #[bench] - fn bench_lexer(b: &mut Bencher) { - b.iter(|| { - let _ = Lexer::new(SAMPLE_PROGRAM).collect::<Vec<_>>(); - }); + unimplemented!() } } diff --git a/src/main.rs b/src/main.rs index 91db8c6..5d4cafc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,19 +7,11 @@ unused_lifetimes )] -pub mod ast; -pub mod interpreter; pub mod lexer; -use std::io::{BufRead, Write}; -use std::{env, fs, io}; +use std::{env, fs}; use itertools::Itertools; -use rand::Rng; - -use crate::ast::parser::AstParser; -use crate::interpreter::{AstInterpreter, InternalFunction, Value}; -use crate::lexer::Lexer; fn main() { let args = env::args().collect_vec(); @@ -36,108 +28,5 @@ fn main() { return; }; - let lexer = Lexer::new(&source); - let tokens = lexer.collect_vec(); - let mut parser = AstParser::new(tokens); - let ast = parser.parse(); - - println!("--- Program Output ---"); - - let mut interpreter = AstInterpreter::default(); - - // Defining some builtin callables for our interpreter - interpreter.callables.insert( - "print".to_owned(), - Box::new(InternalFunction(&|args| { - use std::fmt::Write; - - let mut buffer = String::new(); - for arg in args { - write!(&mut buffer, "{}", arg).unwrap(); - } - - let mut stdout = io::stdout(); - stdout.lock().write_all(buffer.as_bytes()).unwrap(); - stdout.flush().unwrap(); - - Value::Nil - })), - ); - - interpreter.callables.insert( - "println".to_owned(), - Box::new(InternalFunction(&|args| { - use std::fmt::Write; - - let mut buffer = String::new(); - for arg in args { - write!(&mut buffer, "{}", arg).unwrap(); - } - writeln!(&mut buffer).unwrap(); - - let mut stdout = io::stdout(); - stdout.lock().write_all(buffer.as_bytes()).unwrap(); - stdout.flush().unwrap(); - - Value::Nil - })), - ); - - interpreter.callables.insert( - "readln".to_owned(), - Box::new(InternalFunction(&|_| { - let stdin = io::stdin(); - let mut line = String::new(); - stdin - .lock() - .read_line(&mut line) - .expect("Failed to read line from stdin"); - line.pop(); - - Value::String(line) - })), - ); - - interpreter.callables.insert( - "random".to_owned(), - Box::new(InternalFunction(&|args| { - let result = match args { - [] => rand::thread_rng().gen_range(1..=100), - [Value::Number(max)] => rand::thread_rng().gen_range(0..=*max), - [Value::Number(min), Value::Number(max)] => { - rand::thread_rng().gen_range(*min..=*max) - } - _ => panic!("Invalid usage of 'random' function"), - }; - - Value::Number(result) - })), - ); - - interpreter.callables.insert( - "len".to_owned(), - Box::new(InternalFunction(&|args| { - let result = match &args[0] { - Value::String(value) => value.len() as i32, - _ => panic!("Invalid usage of 'len' function"), - }; - - Value::Number(result) - })), - ); - - interpreter.callables.insert( - "parse_int".to_owned(), - Box::new(InternalFunction(&|args| { - let result = match &args[0] { - Value::String(value) => value.parse::<i32>(), - _ => panic!("Invalid usage of 'parse_int' function"), - } - .expect("Provided string was not an intenger"); - - Value::Number(result) - })), - ); - - interpreter.interpret(&ast); + // TODO: } |
