aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crates/sloth/src/parser/ast.rs79
-rw-r--r--crates/sloth/src/parser/expr.rs206
-rw-r--r--crates/sloth/src/parser/stmt.rs158
3 files changed, 433 insertions, 10 deletions
diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs
index 45c4d8f..d472269 100644
--- a/crates/sloth/src/parser/ast.rs
+++ b/crates/sloth/src/parser/ast.rs
@@ -1,3 +1,6 @@
+use crate::lexer::{Token, TokenType};
+#[derive(Debug)]
+#[derive(PartialEq)]
pub enum BinaryOp {
Add,
Con,
@@ -22,28 +25,32 @@ pub enum BinaryOp {
LogAnd,
LogOr,
}
-
+#[derive(Debug)]
+#[derive(PartialEq)]
pub enum UnaryOp {
Not,
Neg,
BWComp,
}
-
+#[derive(Debug)]
+#[derive(PartialEq)]
pub enum Literal {
Integer(i128),
Float(f64),
Bool(bool),
Char(char),
String(String),
- Regex(String),
- List(Vec<Expr>), // TODO: holy shit we forgor empty listys
+ Regex(String),
+ List(Vec<Expr>), // TODO: holy shit we forgor listys
}
-
+#[derive(Debug)]
+#[derive(PartialEq)]
pub enum Expr {
+ Grouping(Box<Expr>),
BinaryOp {
op: BinaryOp,
- lhs: Box<Expr>,
+ lhs: Box<Expr>,
rhs: Box<Expr>,
},
UnaryOp {
@@ -64,7 +71,7 @@ pub struct FuncArgs {
pub typ: Option<String>,
}
-pub enum Stmt {
+pub enum Stmt {
ExprStmt(Expr),
DefineFunction {
ident: String,
@@ -83,13 +90,13 @@ pub enum Stmt {
typ: Option<String>,
},
If {
- expr: Vec<Expr>,
+ expr: Expr,
body: Vec<Stmt>,
else_if: Vec<(Expr, Stmt)>,
els: Option<Box<Stmt>>,
},
For {
- name: Expr,
+ name: String,
iter: Expr,
body: Vec<Stmt>,
},
@@ -97,4 +104,56 @@ pub enum Stmt {
condition: Expr,
body: Vec<Stmt>,
},
-} \ No newline at end of file
+}
+
+pub struct AstParser<'a> {
+ tokens: Vec<Token<'a>>,
+ index: usize,
+}
+
+/// Implementation containing utilities used by the parsers internal components
+impl<'a> AstParser<'a> {
+ pub fn new(tokens: Vec<Token<'a>>) -> Self {
+ Self { tokens, index: 0 }
+ }
+ pub fn peek(&self) -> &Token {
+ &self.tokens[self.index]
+ }
+
+ pub fn advance(&mut self) -> Option<&Token> {
+ if self.eof() {
+ return None;
+ }
+
+ self.index += 1;
+ Some(&self.tokens[self.index - 1])
+ }
+
+ pub fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool {
+ if self.eof() {
+ return false;
+ }
+
+ if next(self.peek()) {
+ self.advance();
+ return true;
+ }
+
+ false
+ }
+
+ pub fn advance_if_eq(&mut self, next: &TokenType) -> bool {
+ self.advance_if(|it| it.tt == *next)
+ }
+
+ pub fn consume(&mut self, next: TokenType, error: &str) {
+ if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) {
+ panic!("{error}");
+ }
+ self.advance();
+ }
+
+ pub fn eof(&self) -> bool {
+ self.index >= self.tokens.len()
+ }
+}
diff --git a/crates/sloth/src/parser/expr.rs b/crates/sloth/src/parser/expr.rs
index 8b13789..84aab31 100644
--- a/crates/sloth/src/parser/expr.rs
+++ b/crates/sloth/src/parser/expr.rs
@@ -1 +1,207 @@
+use super::ast::{AstParser, BinaryOp, Expr, Literal, UnaryOp};
+use crate::lexer::TokenType;
+/// Implementation containing parsers internal components related to expressions
+impl<'a> AstParser<'a> {
+ // FIXME: Should probably avoid cloning token types
+
+ pub fn expression(&mut self) -> Expr {
+ self.logical_or()
+ }
+
+ fn unary(&mut self) -> Expr {
+ if !self.eof()
+ && matches!(
+ self.peek().tt,
+ TokenType::Bang | TokenType::Plus | TokenType::Minus
+ )
+ {
+ let operator = match self.advance().unwrap().tt.clone() {
+ TokenType::Bang => UnaryOp::Not,
+ TokenType::Tilde => UnaryOp::BWComp,
+ TokenType::Minus => UnaryOp::Neg,
+ _ => UnaryOp::Neg, // TODO: Idk how to not have this shit
+ };
+
+ let rhs = self.unary();
+ return Expr::UnaryOp {
+ op: (operator),
+ value: (Box::new(rhs)),
+ };
+ }
+
+ self.call()
+ }
+
+ fn call(&mut self) -> Expr {
+ let mut expr = self.primary();
+
+ if self.advance_if_eq(&TokenType::OpeningParen) {
+ let mut arguments = Vec::<Expr>::new();
+
+ if self.peek().tt != TokenType::ClosingParen {
+ loop {
+ arguments.push(self.expression());
+ if !self.advance_if_eq(&TokenType::Comma) {
+ break;
+ }
+ }
+ }
+
+ self.consume(
+ TokenType::ClosingParen,
+ "Expected ')' to close off function call",
+ );
+
+ // let Expr::Variable(_ident) = expr else { panic!("uh oh spaghettio"); };
+
+ expr = Expr::Call {
+ ident: (Box::new(expr)),
+ args: (arguments),
+ }
+ }
+
+ expr
+ }
+
+ fn primary(&mut self) -> Expr {
+ match self.advance().unwrap().tt.clone() {
+ TokenType::Integer(literal) => Expr::Literal(Literal::Integer(literal)),
+ TokenType::Float(literal) => Expr::Literal(Literal::Float(literal)),
+ TokenType::Boolean(literal) => Expr::Literal(Literal::Bool(literal)),
+ TokenType::Character(literal) => Expr::Literal(Literal::Char(literal)),
+ TokenType::String(literal) => Expr::Literal(Literal::String(literal)),
+ TokenType::Regex(literal) => Expr::Literal(Literal::Regex(literal)),
+ TokenType::Identifier(ident) => Expr::Variable(ident),
+ TokenType::OpeningParen => {
+ let expr = self.expression();
+ self.consume(TokenType::ClosingParen, "Must end expression with ')'");
+ Expr::Grouping(Box::new(expr))
+ }
+ _ => unimplemented!("{:?}", self.peek()),
+ }
+ }
+}
+
+// Macro to generate repetitive binary expressions. Things like addition,
+// multiplication, exc.
+macro_rules! binary_expr {
+ ($name:ident, $parent:ident, $pattern:pat) => {
+ fn $name(&mut self) -> Expr {
+ let mut expr = self.$parent();
+
+ while !self.eof() && matches!(self.peek().tt, $pattern) {
+ let operator = match self.advance().unwrap().tt.clone() {
+ TokenType::Plus => BinaryOp::Add,
+ TokenType::PlusPlus => BinaryOp::Con,
+ TokenType::Minus => BinaryOp::Sub,
+ TokenType::Star => BinaryOp::Mul,
+ TokenType::StarStar => BinaryOp::Pow,
+ TokenType::Slash => BinaryOp::Div,
+ TokenType::Perc => BinaryOp::Mod,
+
+ TokenType::LtLt => BinaryOp::BWSftRight,
+ TokenType::GtGt => BinaryOp::BWSftLeft,
+ TokenType::Amp => BinaryOp::BWAnd,
+ TokenType::Pipe => BinaryOp::BWOr,
+ TokenType::Caret => BinaryOp::BWXor,
+
+ TokenType::Lt => BinaryOp::Lt,
+ TokenType::Gt => BinaryOp::Gt,
+ TokenType::LtEq => BinaryOp::LtEq,
+ TokenType::GtEq => BinaryOp::GtEq,
+ TokenType::EqEq => BinaryOp::EqEq,
+ TokenType::BangEq => BinaryOp::NotEq,
+ TokenType::AmpAmp => BinaryOp::LogAnd,
+ TokenType::PipePipe => BinaryOp::LogOr,
+ _ => BinaryOp::Add, // TODO: Idk how to not have this shit
+ };
+
+ let rhs = self.$parent();
+ expr = Expr::BinaryOp {
+ op: (operator),
+ lhs: (Box::new(expr)),
+ rhs: (Box::new(rhs)),
+ }
+ }
+
+ expr
+ }
+ };
+}
+
+#[rustfmt::skip]
+#[allow(unused_parens)]
+impl<'a> AstParser<'a> {
+ // Binary expressions in order of precedence from lowest to highest.
+ binary_expr!(logical_or , logical_and , (TokenType::PipePipe));
+ binary_expr!(logical_and , equality , (TokenType::AmpAmp));
+ binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq));
+ binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq));
+ binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt));
+ binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus));
+ binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc));
+}
+
+#[cfg(test)]
+mod tests {
+ use itertools::Itertools;
+ use super::{AstParser, BinaryOp, Expr, Literal};
+
+ use crate::{lexer::{Lexer}, parser::ast::UnaryOp};
+
+ #[test]
+ fn basic_expression_a() {
+ let lexer = Lexer::new("3 + 5 * 4");
+ let tokens = lexer.collect_vec();
+
+ let expected_ast = Expr::BinaryOp {
+ op: BinaryOp::Add,
+ lhs: Box::new(Expr::Literal(Literal::Integer(3))),
+ rhs: Box::new(Expr::BinaryOp {
+ op: BinaryOp::Mul,
+ lhs: Box::new(Expr::Literal(Literal::Integer(5))),
+ rhs: Box::new(Expr::Literal(Literal::Integer(4))),
+ }),
+ };
+
+ let mut parser = AstParser::new(tokens);
+ let generated_ast = parser.expression();
+
+ println!("Expected AST:\n{expected_ast:#?}\n\n");
+ println!("Generated AST:\n{generated_ast:#?}\n\n");
+
+ assert_eq!(expected_ast, generated_ast);
+ }
+
+ #[test]
+ fn basic_expression_b() {
+ let lexer = Lexer::new("17 - (-5 + 5) / 6");
+ let tokens = lexer.collect_vec();
+
+ let expected_ast = Expr::BinaryOp {
+ op: BinaryOp::Sub,
+ lhs: Box::new(Expr::Literal(Literal::Integer(17))),
+ rhs: Box::new(Expr::BinaryOp {
+ op: BinaryOp::Div,
+ lhs: Box::new(Expr::Grouping(Box::new(Expr::BinaryOp {
+ op: BinaryOp::Add,
+ lhs: Box::new(Expr::UnaryOp {
+ op: UnaryOp::Neg,
+ value: Box::new(Expr::Literal(Literal::Integer(5))),
+ }),
+ rhs: Box::new(Expr::Literal(Literal::Integer(5))),
+ }))),
+ rhs: Box::new(Expr::Literal(Literal::Integer(6))),
+ }),
+ };
+
+ let mut parser = AstParser::new(tokens);
+ let generated_ast = parser.expression();
+
+ println!("Expected AST:\n{expected_ast:#?}\n\n");
+ println!("Generated AST:\n{generated_ast:#?}\n\n");
+
+ assert_eq!(expected_ast, generated_ast);
+ }
+} \ No newline at end of file
diff --git a/crates/sloth/src/parser/stmt.rs b/crates/sloth/src/parser/stmt.rs
index 8b13789..c5c0c6a 100644
--- a/crates/sloth/src/parser/stmt.rs
+++ b/crates/sloth/src/parser/stmt.rs
@@ -1 +1,159 @@
+use super::ast::{AstParser, Expr, Stmt};
+use crate::lexer::TokenType;
+impl<'a> AstParser<'a> {
+ pub fn parse(&mut self) -> Vec<Stmt> {
+ let mut statements = Vec::new();
+
+ while !self.eof() {
+ statements.push(self.statement());
+ }
+
+ statements
+ }
+
+ fn statement(&mut self) -> Stmt {
+ if self.advance_if_eq(&TokenType::Var) {
+ return self.var_statement();
+ }
+
+ if self.advance_if_eq(&TokenType::Val) {
+ return self.val_statement();
+ }
+
+ if self.advance_if_eq(&TokenType::If) {
+ return self.if_statement();
+ }
+
+ // if self.advance_if_eq(&TokenType::For) {
+ // return self.for_statement();
+ // }
+
+ if self.advance_if_eq(&TokenType::While) {
+ return self.while_statement();
+ }
+
+ // If we couldn't parse a statement return an expression statement
+ self.expression_statement()
+ }
+
+ fn var_statement(&mut self) -> Stmt {
+ let TokenType::Identifier(ident) = self.peek().tt.clone() else {
+ panic!("Identifier expected after 'var'");
+ };
+
+ self.advance(); // Advancing from the identifier TODO: Check for type
+ self.consume(TokenType::Eq, "Expected '=' after identifier");
+
+ let value = self.expression();
+
+ self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
+
+ Stmt::DefineVariable {
+ name: (ident),
+ value: (value),
+ typ: (None),
+ }
+ }
+
+ fn val_statement(&mut self) -> Stmt {
+ let TokenType::Identifier(ident) = self.peek().tt.clone() else {
+ panic!("Identifier expected after 'val'");
+ };
+
+ self.advance(); // Advancing from the identifier
+ self.consume(TokenType::Eq, "Expected '=' after identifier");
+
+ let value = self.expression();
+
+ self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
+
+ Stmt::DefineValue {
+ name: (ident),
+ value: (value),
+ typ: (None),
+ }
+ }
+
+ fn if_statement(&mut self) -> Stmt {
+ let condition = self.expression();
+
+ self.consume(
+ TokenType::OpeningBrace,
+ "Expected '{' at beggining of block",
+ );
+ let mut body = Vec::new();
+ while !self.eof() && self.peek().tt != TokenType::ClosingBrace {
+ body.push(self.statement());
+ }
+
+ Stmt::If {
+ expr: (condition),
+ body: (body),
+ else_if: (Vec::new()),
+ els: (None),
+ } // TODO: implement else if and else
+ }
+
+ // fn for_statement(&mut self) -> Stmt {
+ // let binding = self.expression();
+ // let Expr::Variable(binding) = binding else {
+ // panic!("Left side of for statement must be identifier");
+ // };
+
+ // self.consume(
+ // TokenType::In,
+ // "Expected 'in' in between identifier and range",
+ // );
+
+ // let range_start = self.expression();
+ // self.consume(
+ // TokenType::DotDot,
+ // "Expected '..' denoting min and max of range",
+ // );
+ // let range_end = self.expression();
+
+ // let mut body = Vec::new();
+ // while !self.eof() && self.peek().tt != TokenType::ClosingBrace {
+ // body.push(self.statement());
+ // }
+
+ // Stmt::For { name: (binding), iter: (), body: (body) }
+ // } TODO: Fix this garbage
+
+ fn while_statement(&mut self) -> Stmt {
+ let condition = self.expression();
+
+ self.consume(
+ TokenType::OpeningBrace,
+ "Expected '{' at beggining of block",
+ );
+ let mut body = Vec::new();
+ while !self.eof() && self.peek().tt != TokenType::ClosingBrace {
+ body.push(self.statement());
+ }
+
+ Stmt::While { condition, body }
+ }
+
+ fn expression_statement(&mut self) -> Stmt {
+ let expr = self.expression();
+
+ // FIXME: Move assignment handling
+ if self.advance_if_eq(&TokenType::Eq) {
+ if let Expr::Variable(ident) = &expr {
+ let value = self.expression();
+
+ self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
+ return Stmt::DefineVariable {
+ name: (ident.clone()),
+ value: (value),
+ typ: (None),
+ };
+ }
+ }
+
+ self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
+ Stmt::ExprStmt(expr)
+ }
+}