diff options
| author | nic-gaffney <gaffney_nic@protonmail.com> | 2023-04-10 23:12:20 -0500 |
|---|---|---|
| committer | nic-gaffney <gaffney_nic@protonmail.com> | 2023-04-10 23:12:20 -0500 |
| commit | 047ce431153c3b39b22e18eac3c149d2e12a9fd6 (patch) | |
| tree | c8f07fe511b214883339df39ab2ce317b636e332 /crates | |
| parent | a72f47465c34a8812993e764c4444a228eaf061e (diff) | |
| download | sloth-047ce431153c3b39b22e18eac3c149d2e12a9fd6.tar.gz | |
Stole the old code and refactored it for the new shit, gonna go and change some shit up in the future
Diffstat (limited to 'crates')
| -rw-r--r-- | crates/sloth/src/parser/ast.rs | 79 | ||||
| -rw-r--r-- | crates/sloth/src/parser/expr.rs | 206 | ||||
| -rw-r--r-- | crates/sloth/src/parser/stmt.rs | 158 |
3 files changed, 433 insertions, 10 deletions
diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs index 45c4d8f..d472269 100644 --- a/crates/sloth/src/parser/ast.rs +++ b/crates/sloth/src/parser/ast.rs @@ -1,3 +1,6 @@ +use crate::lexer::{Token, TokenType}; +#[derive(Debug)] +#[derive(PartialEq)] pub enum BinaryOp { Add, Con, @@ -22,28 +25,32 @@ pub enum BinaryOp { LogAnd, LogOr, } - +#[derive(Debug)] +#[derive(PartialEq)] pub enum UnaryOp { Not, Neg, BWComp, } - +#[derive(Debug)] +#[derive(PartialEq)] pub enum Literal { Integer(i128), Float(f64), Bool(bool), Char(char), String(String), - Regex(String), - List(Vec<Expr>), // TODO: holy shit we forgor empty listys + Regex(String), + List(Vec<Expr>), // TODO: holy shit we forgor listys } - +#[derive(Debug)] +#[derive(PartialEq)] pub enum Expr { + Grouping(Box<Expr>), BinaryOp { op: BinaryOp, - lhs: Box<Expr>, + lhs: Box<Expr>, rhs: Box<Expr>, }, UnaryOp { @@ -64,7 +71,7 @@ pub struct FuncArgs { pub typ: Option<String>, } -pub enum Stmt { +pub enum Stmt { ExprStmt(Expr), DefineFunction { ident: String, @@ -83,13 +90,13 @@ pub enum Stmt { typ: Option<String>, }, If { - expr: Vec<Expr>, + expr: Expr, body: Vec<Stmt>, else_if: Vec<(Expr, Stmt)>, els: Option<Box<Stmt>>, }, For { - name: Expr, + name: String, iter: Expr, body: Vec<Stmt>, }, @@ -97,4 +104,56 @@ pub enum Stmt { condition: Expr, body: Vec<Stmt>, }, -}
\ No newline at end of file +} + +pub struct AstParser<'a> { + tokens: Vec<Token<'a>>, + index: usize, +} + +/// Implementation containing utilities used by the parsers internal components +impl<'a> AstParser<'a> { + pub fn new(tokens: Vec<Token<'a>>) -> Self { + Self { tokens, index: 0 } + } + pub fn peek(&self) -> &Token { + &self.tokens[self.index] + } + + pub fn advance(&mut self) -> Option<&Token> { + if self.eof() { + return None; + } + + self.index += 1; + Some(&self.tokens[self.index - 1]) + } + + pub fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool { + if self.eof() { + return false; + } + + if next(self.peek()) { + self.advance(); + return true; + } + + false + } + + pub fn advance_if_eq(&mut self, next: &TokenType) -> bool { + self.advance_if(|it| it.tt == *next) + } + + pub fn consume(&mut self, next: TokenType, error: &str) { + if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) { + panic!("{error}"); + } + self.advance(); + } + + pub fn eof(&self) -> bool { + self.index >= self.tokens.len() + } +} diff --git a/crates/sloth/src/parser/expr.rs b/crates/sloth/src/parser/expr.rs index 8b13789..84aab31 100644 --- a/crates/sloth/src/parser/expr.rs +++ b/crates/sloth/src/parser/expr.rs @@ -1 +1,207 @@ +use super::ast::{AstParser, BinaryOp, Expr, Literal, UnaryOp}; +use crate::lexer::TokenType; +/// Implementation containing parsers internal components related to expressions +impl<'a> AstParser<'a> { + // FIXME: Should probably avoid cloning token types + + pub fn expression(&mut self) -> Expr { + self.logical_or() + } + + fn unary(&mut self) -> Expr { + if !self.eof() + && matches!( + self.peek().tt, + TokenType::Bang | TokenType::Plus | TokenType::Minus + ) + { + let operator = match self.advance().unwrap().tt.clone() { + TokenType::Bang => UnaryOp::Not, + TokenType::Tilde => UnaryOp::BWComp, + TokenType::Minus => UnaryOp::Neg, + _ => UnaryOp::Neg, // TODO: Idk how to not have this shit + }; + + let rhs = self.unary(); + return Expr::UnaryOp { + op: (operator), + value: (Box::new(rhs)), + }; + } + + self.call() + } + + fn call(&mut self) -> Expr { + let mut expr = self.primary(); + + if self.advance_if_eq(&TokenType::OpeningParen) { + let mut arguments = Vec::<Expr>::new(); + + if self.peek().tt != TokenType::ClosingParen { + loop { + arguments.push(self.expression()); + if !self.advance_if_eq(&TokenType::Comma) { + break; + } + } + } + + self.consume( + TokenType::ClosingParen, + "Expected ')' to close off function call", + ); + + // let Expr::Variable(_ident) = expr else { panic!("uh oh spaghettio"); }; + + expr = Expr::Call { + ident: (Box::new(expr)), + args: (arguments), + } + } + + expr + } + + fn primary(&mut self) -> Expr { + match self.advance().unwrap().tt.clone() { + TokenType::Integer(literal) => Expr::Literal(Literal::Integer(literal)), + TokenType::Float(literal) => Expr::Literal(Literal::Float(literal)), + TokenType::Boolean(literal) => Expr::Literal(Literal::Bool(literal)), + TokenType::Character(literal) => Expr::Literal(Literal::Char(literal)), + TokenType::String(literal) => Expr::Literal(Literal::String(literal)), + TokenType::Regex(literal) => Expr::Literal(Literal::Regex(literal)), + TokenType::Identifier(ident) => Expr::Variable(ident), + TokenType::OpeningParen => { + let expr = self.expression(); + self.consume(TokenType::ClosingParen, "Must end expression with ')'"); + Expr::Grouping(Box::new(expr)) + } + _ => unimplemented!("{:?}", self.peek()), + } + } +} + +// Macro to generate repetitive binary expressions. Things like addition, +// multiplication, exc. +macro_rules! binary_expr { + ($name:ident, $parent:ident, $pattern:pat) => { + fn $name(&mut self) -> Expr { + let mut expr = self.$parent(); + + while !self.eof() && matches!(self.peek().tt, $pattern) { + let operator = match self.advance().unwrap().tt.clone() { + TokenType::Plus => BinaryOp::Add, + TokenType::PlusPlus => BinaryOp::Con, + TokenType::Minus => BinaryOp::Sub, + TokenType::Star => BinaryOp::Mul, + TokenType::StarStar => BinaryOp::Pow, + TokenType::Slash => BinaryOp::Div, + TokenType::Perc => BinaryOp::Mod, + + TokenType::LtLt => BinaryOp::BWSftRight, + TokenType::GtGt => BinaryOp::BWSftLeft, + TokenType::Amp => BinaryOp::BWAnd, + TokenType::Pipe => BinaryOp::BWOr, + TokenType::Caret => BinaryOp::BWXor, + + TokenType::Lt => BinaryOp::Lt, + TokenType::Gt => BinaryOp::Gt, + TokenType::LtEq => BinaryOp::LtEq, + TokenType::GtEq => BinaryOp::GtEq, + TokenType::EqEq => BinaryOp::EqEq, + TokenType::BangEq => BinaryOp::NotEq, + TokenType::AmpAmp => BinaryOp::LogAnd, + TokenType::PipePipe => BinaryOp::LogOr, + _ => BinaryOp::Add, // TODO: Idk how to not have this shit + }; + + let rhs = self.$parent(); + expr = Expr::BinaryOp { + op: (operator), + lhs: (Box::new(expr)), + rhs: (Box::new(rhs)), + } + } + + expr + } + }; +} + +#[rustfmt::skip] +#[allow(unused_parens)] +impl<'a> AstParser<'a> { + // Binary expressions in order of precedence from lowest to highest. + binary_expr!(logical_or , logical_and , (TokenType::PipePipe)); + binary_expr!(logical_and , equality , (TokenType::AmpAmp)); + binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq)); + binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq)); + binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt)); + binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus)); + binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc)); +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + use super::{AstParser, BinaryOp, Expr, Literal}; + + use crate::{lexer::{Lexer}, parser::ast::UnaryOp}; + + #[test] + fn basic_expression_a() { + let lexer = Lexer::new("3 + 5 * 4"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::BinaryOp { + op: BinaryOp::Add, + lhs: Box::new(Expr::Literal(Literal::Integer(3))), + rhs: Box::new(Expr::BinaryOp { + op: BinaryOp::Mul, + lhs: Box::new(Expr::Literal(Literal::Integer(5))), + rhs: Box::new(Expr::Literal(Literal::Integer(4))), + }), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } + + #[test] + fn basic_expression_b() { + let lexer = Lexer::new("17 - (-5 + 5) / 6"); + let tokens = lexer.collect_vec(); + + let expected_ast = Expr::BinaryOp { + op: BinaryOp::Sub, + lhs: Box::new(Expr::Literal(Literal::Integer(17))), + rhs: Box::new(Expr::BinaryOp { + op: BinaryOp::Div, + lhs: Box::new(Expr::Grouping(Box::new(Expr::BinaryOp { + op: BinaryOp::Add, + lhs: Box::new(Expr::UnaryOp { + op: UnaryOp::Neg, + value: Box::new(Expr::Literal(Literal::Integer(5))), + }), + rhs: Box::new(Expr::Literal(Literal::Integer(5))), + }))), + rhs: Box::new(Expr::Literal(Literal::Integer(6))), + }), + }; + + let mut parser = AstParser::new(tokens); + let generated_ast = parser.expression(); + + println!("Expected AST:\n{expected_ast:#?}\n\n"); + println!("Generated AST:\n{generated_ast:#?}\n\n"); + + assert_eq!(expected_ast, generated_ast); + } +}
\ No newline at end of file diff --git a/crates/sloth/src/parser/stmt.rs b/crates/sloth/src/parser/stmt.rs index 8b13789..c5c0c6a 100644 --- a/crates/sloth/src/parser/stmt.rs +++ b/crates/sloth/src/parser/stmt.rs @@ -1 +1,159 @@ +use super::ast::{AstParser, Expr, Stmt}; +use crate::lexer::TokenType; +impl<'a> AstParser<'a> { + pub fn parse(&mut self) -> Vec<Stmt> { + let mut statements = Vec::new(); + + while !self.eof() { + statements.push(self.statement()); + } + + statements + } + + fn statement(&mut self) -> Stmt { + if self.advance_if_eq(&TokenType::Var) { + return self.var_statement(); + } + + if self.advance_if_eq(&TokenType::Val) { + return self.val_statement(); + } + + if self.advance_if_eq(&TokenType::If) { + return self.if_statement(); + } + + // if self.advance_if_eq(&TokenType::For) { + // return self.for_statement(); + // } + + if self.advance_if_eq(&TokenType::While) { + return self.while_statement(); + } + + // If we couldn't parse a statement return an expression statement + self.expression_statement() + } + + fn var_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.peek().tt.clone() else { + panic!("Identifier expected after 'var'"); + }; + + self.advance(); // Advancing from the identifier TODO: Check for type + self.consume(TokenType::Eq, "Expected '=' after identifier"); + + let value = self.expression(); + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + + Stmt::DefineVariable { + name: (ident), + value: (value), + typ: (None), + } + } + + fn val_statement(&mut self) -> Stmt { + let TokenType::Identifier(ident) = self.peek().tt.clone() else { + panic!("Identifier expected after 'val'"); + }; + + self.advance(); // Advancing from the identifier + self.consume(TokenType::Eq, "Expected '=' after identifier"); + + let value = self.expression(); + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + + Stmt::DefineValue { + name: (ident), + value: (value), + typ: (None), + } + } + + fn if_statement(&mut self) -> Stmt { + let condition = self.expression(); + + self.consume( + TokenType::OpeningBrace, + "Expected '{' at beggining of block", + ); + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + body.push(self.statement()); + } + + Stmt::If { + expr: (condition), + body: (body), + else_if: (Vec::new()), + els: (None), + } // TODO: implement else if and else + } + + // fn for_statement(&mut self) -> Stmt { + // let binding = self.expression(); + // let Expr::Variable(binding) = binding else { + // panic!("Left side of for statement must be identifier"); + // }; + + // self.consume( + // TokenType::In, + // "Expected 'in' in between identifier and range", + // ); + + // let range_start = self.expression(); + // self.consume( + // TokenType::DotDot, + // "Expected '..' denoting min and max of range", + // ); + // let range_end = self.expression(); + + // let mut body = Vec::new(); + // while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + // body.push(self.statement()); + // } + + // Stmt::For { name: (binding), iter: (), body: (body) } + // } TODO: Fix this garbage + + fn while_statement(&mut self) -> Stmt { + let condition = self.expression(); + + self.consume( + TokenType::OpeningBrace, + "Expected '{' at beggining of block", + ); + let mut body = Vec::new(); + while !self.eof() && self.peek().tt != TokenType::ClosingBrace { + body.push(self.statement()); + } + + Stmt::While { condition, body } + } + + fn expression_statement(&mut self) -> Stmt { + let expr = self.expression(); + + // FIXME: Move assignment handling + if self.advance_if_eq(&TokenType::Eq) { + if let Expr::Variable(ident) = &expr { + let value = self.expression(); + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + return Stmt::DefineVariable { + name: (ident.clone()), + value: (value), + typ: (None), + }; + } + } + + self.consume(TokenType::SemiColon, "Expected ';' at end of statement"); + Stmt::ExprStmt(expr) + } +} |
