aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCody <cody@codyq.dev>2023-03-24 17:33:44 -0500
committerCody <cody@codyq.dev>2023-03-24 17:33:44 -0500
commitf9d13f3098b2a5984f59d612be87c184aba0b2c7 (patch)
tree0a8059a4604026c3c0fcde587617507063cf7ccf /src
parent28e0b95d8ecbbc44ef81069ad122a88b2a64c74e (diff)
downloadsloth-f9d13f3098b2a5984f59d612be87c184aba0b2c7.tar.gz
Stuff and things
Diffstat (limited to 'src')
-rw-r--r--src/ast/mod.rs75
-rw-r--r--src/ast/parser.rs372
-rw-r--r--src/ast/printer.rs38
-rw-r--r--src/interpreter.rs221
-rw-r--r--src/lexer.rs418
-rw-r--r--src/main.rs115
6 files changed, 48 insertions, 1191 deletions
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
deleted file mode 100644
index b3e7c36..0000000
--- a/src/ast/mod.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-use crate::lexer::{Literal, TokenType};
-
-pub mod parser;
-pub mod printer;
-
-#[derive(Debug, Eq, PartialEq)]
-pub enum Stmt {
- Block(Vec<Stmt>),
- Expr(Expr),
- Val {
- ident: String,
- value: Expr,
- },
- Var {
- ident: String,
- value: Expr,
- },
- Assignment {
- ident: String,
- value: Expr,
- },
- Function {
- ident: String,
- arguments: Vec<FunctionArgument>,
- return_type: String,
- body: Vec<Stmt>,
- },
- If {
- condition: Expr,
- body: Vec<Stmt>,
- },
- For {
- binding: String,
- range: (Expr, Expr),
- body: Vec<Stmt>,
- },
- While {
- condition: Expr,
- body: Vec<Stmt>,
- },
- Return {
- value: Expr,
- },
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub struct FunctionArgument {
- name: String,
- types: String,
-}
-
-#[derive(Debug, Eq, PartialEq)]
-pub enum Expr {
- Literal(Literal),
- Variable(String),
- Grouping(Box<Expr>),
- Call {
- ident: String,
- arguments: Vec<Expr>,
- },
- Binary {
- operator: TokenType,
- lhs: Box<Expr>,
- rhs: Box<Expr>,
- },
- Unary {
- operator: TokenType,
- expr: Box<Expr>,
- },
-}
-
-pub trait AstVisitor<T = ()> {
- fn visit_stmt(&mut self, stmt: &Stmt) -> T;
- fn visit_expr(&mut self, expr: &Expr) -> T;
-}
diff --git a/src/ast/parser.rs b/src/ast/parser.rs
deleted file mode 100644
index 85be68c..0000000
--- a/src/ast/parser.rs
+++ /dev/null
@@ -1,372 +0,0 @@
-use super::{Expr, Stmt};
-use crate::lexer::{Token, TokenType};
-
-pub struct AstParser<'a> {
- tokens: Vec<Token<'a>>,
- index: usize,
-}
-
-/// Implementation containing utilities used by the parsers internal components
-impl<'a> AstParser<'a> {
- pub fn new(tokens: Vec<Token<'a>>) -> Self {
- Self { tokens, index: 0 }
- }
- fn peek(&self) -> &Token {
- &self.tokens[self.index]
- }
-
- fn advance(&mut self) -> Option<&Token> {
- if self.eof() {
- return None;
- }
-
- self.index += 1;
- Some(&self.tokens[self.index - 1])
- }
-
- fn advance_if(&mut self, next: impl FnOnce(&Token) -> bool) -> bool {
- if self.eof() {
- return false;
- }
-
- if next(self.peek()) {
- self.advance();
- return true;
- }
-
- false
- }
-
- fn advance_if_eq(&mut self, next: &TokenType) -> bool {
- self.advance_if(|it| it.tt == *next)
- }
-
- fn consume(&mut self, next: TokenType, error: &str) {
- if std::mem::discriminant(&self.peek().tt) != std::mem::discriminant(&next) {
- panic!("{error}");
- }
- self.advance();
- }
-
- fn eof(&self) -> bool {
- self.index >= self.tokens.len()
- }
-}
-
-/// Implementation containing parsers internal components related to statements
-impl<'a> AstParser<'a> {
- pub fn parse(&mut self) -> Vec<Stmt> {
- let mut statements = Vec::new();
-
- while !self.eof() {
- statements.push(self.statement());
- }
-
- statements
- }
-
- fn block(&mut self) -> Vec<Stmt> {
- self.consume(TokenType::LeftBrace, "Expected '{' at beggining of block");
-
- let mut statements = Vec::new();
-
- while !self.eof() && self.peek().tt != TokenType::RightBrace {
- statements.push(self.statement());
- }
-
- self.consume(TokenType::RightBrace, "Expected '}' at end of block");
- statements
- }
-
- fn statement(&mut self) -> Stmt {
- if self.peek().tt == TokenType::LeftBrace {
- return Stmt::Block(self.block());
- }
-
- if self.advance_if_eq(&TokenType::Var) {
- return self.var_statement();
- }
-
- if self.advance_if_eq(&TokenType::Val) {
- return self.val_statement();
- }
-
- if self.advance_if_eq(&TokenType::If) {
- return self.if_statement();
- }
-
- if self.advance_if_eq(&TokenType::For) {
- return self.for_statement();
- }
-
- if self.advance_if_eq(&TokenType::While) {
- return self.while_statement();
- }
-
- // If we couldn't parse a statement return an expression statement
- self.expression_statement()
- }
-
- fn var_statement(&mut self) -> Stmt {
- let TokenType::Identifier(ident) = self.peek().tt.clone() else {
- panic!("Identifier expected after 'var'");
- };
-
- self.advance(); // Advancing from the identifier
- self.consume(TokenType::Eq, "Expected '=' after identifier");
-
- let value = self.expression();
-
- self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
-
- Stmt::Var { ident, value }
- }
-
- fn val_statement(&mut self) -> Stmt {
- let TokenType::Identifier(ident) = self.peek().tt.clone() else {
- panic!("Identifier expected after 'val'");
- };
-
- self.advance(); // Advancing from the identifier
- self.consume(TokenType::Eq, "Expected '=' after identifier");
-
- let value = self.expression();
-
- self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
-
- Stmt::Val { ident, value }
- }
-
- fn if_statement(&mut self) -> Stmt {
- let condition = self.expression();
- let body = self.block();
-
- Stmt::If { condition, body }
- }
-
- fn for_statement(&mut self) -> Stmt {
- let binding = self.expression();
- let Expr::Variable(binding) = binding else {
- panic!("Left side of for statement must be identifier");
- };
-
- self.consume(
- TokenType::In,
- "Expected 'in' in between identifier and range",
- );
-
- let range_start = self.expression();
- self.consume(
- TokenType::DotDot,
- "Expected '..' denoting min and max of range",
- );
- let range_end = self.expression();
-
- let body = self.block();
-
- Stmt::For {
- binding,
- range: (range_start, range_end),
- body,
- }
- }
-
- fn while_statement(&mut self) -> Stmt {
- let condition = self.expression();
- let body = self.block();
-
- Stmt::While { condition, body }
- }
-
- fn expression_statement(&mut self) -> Stmt {
- let expr = self.expression();
-
- // FIXME: Move assignment handling
- if self.advance_if_eq(&TokenType::Eq) {
- if let Expr::Variable(ident) = &expr {
- let value = self.expression();
-
- self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
- return Stmt::Assignment {
- ident: ident.clone(),
- value,
- };
- }
- }
-
- self.consume(TokenType::SemiColon, "Expected ';' at end of statement");
- Stmt::Expr(expr)
- }
-}
-
-/// Implementation containing parsers internal components related to expressions
-impl<'a> AstParser<'a> {
- // FIXME: Should probably avoid cloning token types
-
- fn expression(&mut self) -> Expr {
- self.logical_or()
- }
-
- fn unary(&mut self) -> Expr {
- if !self.eof()
- && matches!(
- self.peek().tt,
- TokenType::Bang | TokenType::Plus | TokenType::Minus
- )
- {
- let operator = self.advance().unwrap().tt.clone();
- let rhs = self.unary();
- return Expr::Unary {
- operator,
- expr: Box::new(rhs),
- };
- }
-
- self.call()
- }
-
- fn call(&mut self) -> Expr {
- let mut expr = self.primary();
-
- if self.advance_if_eq(&TokenType::LeftParen) {
- let mut arguments = Vec::<Expr>::new();
-
- if self.peek().tt != TokenType::RightParen {
- loop {
- arguments.push(self.expression());
- if !self.advance_if_eq(&TokenType::Comma) {
- break;
- }
- }
- }
-
- self.consume(
- TokenType::RightParen,
- "Expected ')' to close off function call",
- );
-
- let Expr::Variable(ident) = expr else { panic!("uh oh spaghettio"); };
-
- expr = Expr::Call { ident, arguments }
- }
-
- expr
- }
-
- fn primary(&mut self) -> Expr {
- match self.advance().unwrap().tt.clone() {
- TokenType::Literal(literal) => Expr::Literal(literal),
- TokenType::Identifier(ident) => Expr::Variable(ident),
- TokenType::LeftParen => {
- let expr = self.expression();
- self.consume(TokenType::RightParen, "Must end expression with ')'");
- Expr::Grouping(Box::new(expr))
- }
- _ => unimplemented!("{:?}", self.peek()),
- }
- }
-}
-
-// Macro to generate repetitive binary expressions. Things like addition,
-// multiplication, exc.
-macro_rules! binary_expr {
- ($name:ident, $parent:ident, $pattern:pat) => {
- fn $name(&mut self) -> Expr {
- let mut expr = self.$parent();
-
- while !self.eof() && matches!(self.peek().tt, $pattern) {
- let operator = self.advance().unwrap().tt.clone();
- let rhs = self.$parent();
- expr = Expr::Binary {
- operator,
- lhs: Box::new(expr),
- rhs: Box::new(rhs),
- };
- }
-
- expr
- }
- };
-}
-
-#[rustfmt::skip]
-#[allow(unused_parens)]
-impl<'a> AstParser<'a> {
- // Binary expressions in order of precedence from lowest to highest.
- binary_expr!(logical_or , logical_and , (TokenType::PipePipe));
- binary_expr!(logical_and , equality , (TokenType::AmpAmp));
- binary_expr!(equality , comparison , (TokenType::BangEq | TokenType::EqEq));
- binary_expr!(comparison , bitwise_shifting, (TokenType::Lt | TokenType::Gt | TokenType::LtEq | TokenType::GtEq));
- binary_expr!(bitwise_shifting, additive , (TokenType::LtLt | TokenType::GtGt));
- binary_expr!(additive , multiplicative , (TokenType::Plus | TokenType::Minus));
- binary_expr!(multiplicative , unary , (TokenType::Star | TokenType::Slash | TokenType::Perc));
-}
-
-#[cfg(test)]
-mod tests {
- use itertools::Itertools;
-
- use super::AstParser;
- use crate::ast::Expr;
- use crate::lexer::{Lexer, Literal, TokenType};
-
- #[test]
- fn basic_expression_a() {
- let lexer = Lexer::new("3 + 5 * 4");
- let tokens = lexer.collect_vec();
-
- let expected_ast = Expr::Binary {
- operator: TokenType::Plus,
- lhs: Box::new(Expr::Literal(Literal::Number(3))),
- rhs: Box::new(Expr::Binary {
- operator: TokenType::Star,
- lhs: Box::new(Expr::Literal(Literal::Number(5))),
- rhs: Box::new(Expr::Literal(Literal::Number(4))),
- }),
- };
-
- let mut parser = AstParser::new(tokens);
- let generated_ast = parser.expression();
-
- println!("Expected AST:\n{expected_ast:#?}\n\n");
- println!("Generated AST:\n{generated_ast:#?}\n\n");
-
- assert_eq!(expected_ast, generated_ast);
- }
-
- #[test]
- fn basic_expression_b() {
- let lexer = Lexer::new("17 - (-5 + 5) / 6");
- let tokens = lexer.collect_vec();
-
- let expected_ast = Expr::Binary {
- operator: TokenType::Minus,
- lhs: Box::new(Expr::Literal(Literal::Number(17))),
- rhs: Box::new(Expr::Binary {
- operator: TokenType::Slash,
- lhs: Box::new(Expr::Grouping(Box::new(Expr::Binary {
- operator: TokenType::Plus,
- lhs: Box::new(Expr::Unary {
- operator: TokenType::Minus,
- expr: Box::new(Expr::Literal(Literal::Number(5))),
- }),
- rhs: Box::new(Expr::Literal(Literal::Number(5))),
- }))),
- rhs: Box::new(Expr::Literal(Literal::Number(6))),
- }),
- };
-
- let mut parser = AstParser::new(tokens);
- let generated_ast = parser.expression();
-
- println!("Expected AST:\n{expected_ast:#?}\n\n");
- println!("Generated AST:\n{generated_ast:#?}\n\n");
-
- assert_eq!(expected_ast, generated_ast);
- }
-
- #[test]
- fn basic_expression_c() {
- // TODO:
- }
-}
diff --git a/src/ast/printer.rs b/src/ast/printer.rs
deleted file mode 100644
index 1aa32ae..0000000
--- a/src/ast/printer.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-// use super::{AstVisitor, Expr, Stmt};
-
-// pub struct AstPrettyPrinter;
-// impl AstVisitor<String> for AstPrettyPrinter {
-// fn visit_stmt(&self, stmt: &Stmt) -> String {
-// match stmt {
-// Stmt::Expr(expr) => self.visit_expr(expr),
-// Stmt::Val(name, expr) => format!("(val '{}' <- {})", name,
-// self.visit_expr(expr)), Stmt::Var(name, expr) => format!("(var
-// '{}' <- {})", name, self.visit_expr(expr)), }
-// }
-
-// fn visit_expr(&self, expr: &Expr) -> String {
-// match expr {
-// Expr::Literal(i) => i.to_string(),
-// Expr::Add(lhs, rhs) => {
-// let lhs = self.visit_expr(lhs);
-// let rhs = self.visit_expr(rhs);
-// format!("({lhs} + {rhs})")
-// }
-// Expr::Sub(lhs, rhs) => {
-// let lhs = self.visit_expr(lhs);
-// let rhs = self.visit_expr(rhs);
-// format!("({lhs} - {rhs})")
-// }
-// Expr::Mul(lhs, rhs) => {
-// let lhs = self.visit_expr(lhs);
-// let rhs = self.visit_expr(rhs);
-// format!("({lhs} * {rhs})")
-// }
-// Expr::Div(lhs, rhs) => {
-// let lhs = self.visit_expr(lhs);
-// let rhs = self.visit_expr(rhs);
-// format!("({lhs} / {rhs})")
-// }
-// }
-// }
-// }
diff --git a/src/interpreter.rs b/src/interpreter.rs
deleted file mode 100644
index a7937db..0000000
--- a/src/interpreter.rs
+++ /dev/null
@@ -1,221 +0,0 @@
-use std::collections::HashMap;
-use std::fmt::Display;
-
-use itertools::Itertools;
-
-use crate::ast::{AstVisitor, Expr, Stmt};
-use crate::lexer::{Literal, TokenType};
-
-#[derive(Default)]
-pub struct AstInterpreter {
- pub callables: HashMap<String, Box<dyn SlothCallable>>,
- memory: HashMap<String, (Value, bool)>,
-}
-
-impl AstVisitor<Value> for AstInterpreter {
- fn visit_stmt(&mut self, stmt: &Stmt) -> Value {
- match stmt {
- Stmt::Block(stmts) => {
- self.interpret(stmts);
- }
- Stmt::Expr(expr) => {
- self.visit_expr(expr);
- }
- Stmt::Val { ident, value } => {
- let value = self.visit_expr(value);
- self.memory.insert(ident.clone(), (value, false));
- }
- Stmt::Var { ident, value } => {
- let value = self.visit_expr(value);
- self.memory.insert(ident.clone(), (value, true));
- }
- Stmt::Assignment { ident, value } => {
- if !self.memory.contains_key(ident) {
- panic!("Cannot assign to variable that doesn't exist");
- }
-
- if !self.memory[ident].1 {
- panic!("Cannot mutate value '{ident}'");
- }
-
- let value = self.visit_expr(value);
- self.memory.insert(ident.clone(), (value, true));
- }
- Stmt::Function {
- ident: _,
- arguments: _,
- return_type: _,
- body: _,
- } => todo!(),
- Stmt::If { condition, body } => {
- let result = self.visit_expr(condition);
- if result == Value::Bool(true) {
- self.interpret(body);
- }
- }
- Stmt::For {
- binding,
- range,
- body,
- } => {
- let Value::Number(lower_range) = self.visit_expr(&range.0) else { panic!("Lower range must be number") };
- let Value::Number(upper_range) = self.visit_expr(&range.1) else { panic!("Upper range must be number") };
-
- for i in lower_range..upper_range {
- self.memory
- .insert(binding.clone(), (Value::Number(i), false));
- self.interpret(body);
- }
-
- self.memory.remove(binding);
- }
- Stmt::While { condition, body } => {
- while self.visit_expr(condition) == Value::Bool(true) {
- self.interpret(body);
- }
- }
- Stmt::Return { value: _ } => todo!(),
- };
-
- // FIXME: Honestly should probably abandon this "visitor" pattern. 2 functions
- // with these match statements would work better
- Value::Nil
- }
-
- fn visit_expr(&mut self, expr: &Expr) -> Value {
- match expr {
- Expr::Literal(literal) => match literal {
- Literal::String(value) => Value::String(value.clone()),
- Literal::Character(value) => Value::String(value.to_string()),
- Literal::Number(value) => Value::Number(*value),
- Literal::Bool(value) => Value::Bool(*value),
- Literal::Nil => Value::Nil,
- },
- Expr::Variable(ident) => self.memory.get(ident).unwrap().clone().0,
- Expr::Grouping(child) => self.visit_expr(child),
- Expr::Binary { operator, lhs, rhs } => {
- let lhs = self.visit_expr(lhs);
- let rhs = self.visit_expr(rhs);
-
- if let Value::Number(lhs) = lhs && let Value::Number(rhs) = rhs {
- match operator {
- TokenType::Plus => Value::Number(lhs + rhs),
- TokenType::Minus => Value::Number(lhs - rhs),
- TokenType::Star => Value::Number(lhs * rhs),
- TokenType::Slash => Value::Number(lhs / rhs),
- TokenType::Perc => Value::Number(lhs % rhs),
-
- TokenType::Gt => Value::Bool(lhs > rhs),
- TokenType::GtEq => Value::Bool(lhs >= rhs),
- TokenType::Lt => Value::Bool(lhs < rhs),
- TokenType::LtEq => Value::Bool(lhs <= rhs),
-
- TokenType::BangEq => Value::Bool(lhs != rhs),
- TokenType::EqEq => Value::Bool(lhs == rhs),
-
- _ => panic!(),
- }
- } else if let Value::Bool(lhs) = lhs && let Value::Bool(rhs) = rhs {
- match operator {
- TokenType::AmpAmp => Value::Bool(lhs && rhs),
- TokenType::PipePipe => Value::Bool(lhs || rhs),
- _ => panic!()
- }
- } else if let Value::String(lhs) = lhs && let Value::String(rhs) = rhs {
- match operator {
- TokenType::Plus => {
- let mut value = lhs;
- value.push_str(&rhs);
- Value::String(value)
- },
- TokenType::BangEq => Value::Bool(lhs != rhs),
- TokenType::EqEq => Value::Bool(lhs == rhs),
- _ => panic!()
- }
- } else {
- panic!("Invalid operations for types");
- }
- }
- Expr::Unary { operator, expr } => {
- let value = self.visit_expr(expr);
-
- match operator {
- TokenType::Bang => {
- let Value::Bool(value) = value else {
- panic!("Invalid operations for types");
- };
-
- Value::Bool(!value)
- }
- TokenType::Plus => value,
- TokenType::Minus => {
- let Value::Number(value) = value else {
- panic!("Invalid operations for types");
- };
-
- Value::Number(-value)
- }
- _ => panic!(),
- }
- }
- Expr::Call { ident, arguments } => {
- let argument_values = arguments.iter().map(|it| self.visit_expr(it)).collect_vec();
- let Some(callable) = self.callables.remove(ident) else {
- panic!("Unkown callable '{ident}'");
- };
-
- let result = callable.call(self, &argument_values);
- self.callables.insert(ident.clone(), callable);
- result
- }
- }
- }
-}
-
-impl AstInterpreter {
- pub fn interpret(&mut self, stmts: &Vec<Stmt>) {
- for stmt in stmts {
- self.visit_stmt(stmt);
- }
- }
-}
-
-#[derive(Clone, Eq, PartialEq)]
-pub enum Value {
- Number(i32),
- String(String),
- Bool(bool),
- Nil,
-}
-
-pub trait SlothCallable {
- fn call(&self, interpreter: &mut AstInterpreter, args: &[Value]) -> Value;
-}
-
-pub struct InternalFunction<'a>(pub &'a dyn Fn(&[Value]) -> Value);
-
-impl<'a> SlothCallable for InternalFunction<'a> {
- fn call(&self, _interpreter: &mut AstInterpreter, args: &[Value]) -> Value {
- self.0(args)
- }
-}
-
-// pub struct SlothFunction(Vec<Stmt>);
-
-impl Display for Value {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- Value::Number(value) => write!(f, "{value}")?,
- Value::String(value) => write!(f, "{value}")?,
- Value::Bool(value) => write!(f, "{value}")?,
- Value::Nil => write!(f, "nil")?,
- }
-
- Ok(())
- }
-}
-
-#[cfg(test)]
-mod test {
- //
-}
diff --git a/src/lexer.rs b/src/lexer.rs
index 88d86bd..ef79716 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -1,84 +1,49 @@
#![allow(dead_code)]
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub enum TokenType {
- // Utility
- DocComment(String),
- Comment(String),
-
- // Short
- Plus, // +
- Minus, // -
- Slash, // /
- Star, // *
- Perc, // %
-
- PlusEq, // +=
- MinusEq, // -=
- SlashEq, // /=
- StarEq, // *=
- PercEq, // %=
-
- Eq, // =
- EqEq, // ==
- Bang, // !
- BangEq, // !=
+use thiserror::Error;
- Gt, // >
- GtGt, // >>
- GtEq, // >=
- Lt, // <
- LtLt, // <<
- LtEq, // <=
-
- Amp, // &
- AmpAmp, // &&
- Pipe, // |
- PipePipe, // ||
-
- DotDot, // .
-
- LeftParen, // (
- RightParen, // )
- LeftBracket, // [
- RightBracket, // ]
- LeftBrace, // {
- RightBrace, // }
-
- Comma, // ,
- Dot, // .
- Colon, // :
- SemiColon, // ;
+#[derive(Debug, Error)]
+pub enum LexerError {
+ #[error("Unexpected token")]
+ UnexpectedToken,
+}
- // Literals
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum TokenType {
+ // Meta
+ DocComment,
+ Comment,
+
+ // Operatiors
+ Plus,
+ Minus,
+ Star,
+ Slash,
+ Perc,
+
+ PlusEq,
+ MinusEq,
+ StarEq,
+ SlashEq,
+ PercEq,
+
+ // Misc
Literal(Literal),
- Identifier(String),
-
- // Keywords
- Val,
- Var,
- Fn,
-
- If,
- Else,
-
- For,
- In,
-
- While,
-
- Loop,
- Break,
- Continue,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Literal {
- String(String),
- Character(char),
- Number(i32),
- Bool(bool),
- Nil,
+ Numeric,
+ Boolean,
+ Character,
+ String,
+ Regex,
+}
+
+#[derive(Debug, Default)]
+pub struct Location {
+ row: u32,
+ column: u32,
}
#[derive(Debug)]
@@ -86,52 +51,24 @@ pub struct Token<'a> {
pub tt: TokenType,
pub lexeme: &'a str,
- start: usize,
- length: usize,
- line: u32,
+ start: Location,
+ end: Location,
}
pub struct Lexer<'a> {
source: &'a [u8],
- start: usize, // Start of the lexme
- pos: usize, // End of the lexme
- line: u32,
+
+ start: Location,
+ end: Location,
}
impl<'a> Lexer<'a> {
- pub fn new(source: &'a str) -> Lexer<'a> {
+ fn new(source: &'a str) -> Self {
Self {
source: source.as_bytes(),
- start: 0,
- pos: 0,
- line: 0,
- }
- }
-
- fn peek(&self) -> Option<char> {
- self.source.get(self.pos).map(|it| *it as char)
- }
-
- fn peek_nth(&self, nth: usize) -> Option<char> {
- self.source.get(self.pos + nth).map(|it| *it as char)
- }
-
- fn advance(&mut self) -> Option<char> {
- self.pos += 1;
- self.source.get(self.pos - 1).map(|it| *it as char)
- }
-
- fn advance_if(&mut self, next: impl FnOnce(Option<char>) -> bool) -> bool {
- if next(self.peek()) {
- self.advance();
- return true;
+ start: Default::default(),
+ end: Default::default(),
}
-
- false
- }
-
- fn advance_if_eq(&mut self, next: Option<char>) -> bool {
- self.advance_if(|it| it == next)
}
}
@@ -139,269 +76,6 @@ impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
- // Ignore all whitespace
- loop {
- match self.peek() {
- Some('\n') => self.line += 1,
- Some(' ') | Some('\r') | Some('\t') => (),
- _ => break,
- }
- self.advance();
- }
-
- // Resetting the lexeme
- self.start = self.pos;
-
- // Parse the next lexeme- If it is EOF return nothing
- let Some(character) = self.advance() else {
- return None;
- };
-
- let tt = match character {
- // Whitespace & Comments
- '#' if self.advance_if_eq(Some('#')) => {
- let mut value = String::new();
- while self.peek() != Some('\n') {
- value.push(self.advance().unwrap());
- }
-
- TokenType::DocComment(value)
- }
-
- '#' => {
- let mut value = String::new();
- while self.peek() != Some('\n') {
- value.push(self.advance().unwrap());
- }
-
- TokenType::Comment(value)
- }
-
- // Arithmetic
- '+' if self.advance_if_eq(Some('=')) => TokenType::PlusEq,
- '-' if self.advance_if_eq(Some('=')) => TokenType::MinusEq,
- '*' if self.advance_if_eq(Some('=')) => TokenType::StarEq,
- '/' if self.advance_if_eq(Some('=')) => TokenType::SlashEq,
- '%' if self.advance_if_eq(Some('=')) => TokenType::PercEq,
- '+' => TokenType::Plus,
- '-' => TokenType::Minus,
- '*' => TokenType::Star,
- '/' => TokenType::Slash,
- '%' => TokenType::Perc,
-
- '0'..='9' => {
- let mut value = String::new();
- value.push(character);
- while let Some('0'..='9') = &self.peek() {
- value.push(self.advance().unwrap());
- }
-
- if self.peek() == Some('.') && self.peek_nth(1) != Some('.') {
- self.advance();
- value.push('.');
- while self.peek().unwrap().is_ascii_digit() {
- value.push(self.advance().unwrap());
- }
- }
- TokenType::Literal(Literal::Number(value.parse::<i32>().unwrap()))
- }
-
- // Logical & Bitwise
- '!' if self.advance_if_eq(Some('=')) => TokenType::BangEq,
- '=' if self.advance_if_eq(Some('=')) => TokenType::EqEq,
- '>' if self.advance_if_eq(Some('>')) => TokenType::GtGt,
- '>' if self.advance_if_eq(Some('=')) => TokenType::GtEq,
- '<' if self.advance_if_eq(Some('<')) => TokenType::LtLt,
- '<' if self.advance_if_eq(Some('=')) => TokenType::LtEq,
- '!' => TokenType::Bang,
- '=' => TokenType::Eq,
- '>' => TokenType::Gt,
- '<' => TokenType::Lt,
-
- '&' if self.advance_if_eq(Some('&')) => TokenType::AmpAmp,
- '|' if self.advance_if_eq(Some('|')) => TokenType::PipePipe,
- '&' => TokenType::Amp,
- '|' => TokenType::Pipe,
-
- // Misc. Operators
- '.' if self.advance_if_eq(Some('.')) => TokenType::DotDot,
-
- // Scope
- '(' => TokenType::LeftParen,
- ')' => TokenType::RightParen,
- '[' => TokenType::LeftBracket,
- ']' => TokenType::RightBracket,
- '{' => TokenType::LeftBrace,
- '}' => TokenType::RightBrace,
- ',' => TokenType::Comma,
- '.' => TokenType::Dot,
- ':' => TokenType::Colon,
- ';' => TokenType::SemiColon,
-
- '"' => {
- let mut value = String::new();
- while self.peek() != Some('"') {
- let Some(character) = self.advance() else {
- panic!("Syntax Error: String invalid");
- };
-
- if character == '\\' {
- match self.advance().unwrap() {
- '\\' => value.push('\\'),
- '"' => value.push('"'),
- 'n' => value.push('\n'),
- _ => panic!(),
- }
- continue;
- }
-
- value.push(character);
- }
-
- self.advance();
- TokenType::Literal(Literal::String(value))
- }
-
- // Keywords & Identifiers
- 'a'..='z' | 'A'..='Z' | '_' => {
- let mut value = String::new();
- value.push(character);
-
- while let Some(character) = self.peek() && matches!(character, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
- value.push(self.advance().unwrap());
- }
-
- match value.as_str() {
- "val" => TokenType::Val,
- "var" => TokenType::Var,
- "fn" => TokenType::Fn,
- "if" => TokenType::If,
- "else" => TokenType::Else,
- "for" => TokenType::For,
- "in" => TokenType::In,
- "while" => TokenType::While,
- "loop" => TokenType::Loop,
- "break" => TokenType::Break,
- "continue" => TokenType::Continue,
- "true" => TokenType::Literal(Literal::Bool(true)),
- "false" => TokenType::Literal(Literal::Bool(false)),
- _ => TokenType::Identifier(value),
- }
- }
-
- // Misc.
- _ => panic!("Failed to parse"),
- };
-
- let lexeme = unsafe {
- // If it got to this point we know the slice is valid UTF-8. The only area in
- // the language that UTF-8 characters are recognized is within strings.
- std::str::from_utf8_unchecked(&self.source[self.start..self.pos])
- };
-
- let token = Token {
- tt,
- lexeme,
- start: self.start,
- length: self.pos - self.start,
- line: self.line,
- };
-
- Some(token)
- }
-}
-
-#[cfg(test)]
-mod tests {
- extern crate test;
-
- use test::Bencher;
-
- use super::{Lexer, Literal, TokenType};
-
- const SAMPLE_PROGRAM: &str = r#"
-val variable = 5;
-
-if variable >= 7 {
- print("Hello World");
-}
-
-if variable < 52 {
- variable += 1;
- print("Hello ${variable}");
-}
-
-for person in ["Cody", "Johnny"] {
- print("Hello ${person}");
-}
-"#;
-
- #[test]
- fn simple_code() {
- let tokens = vec![
- // top
- TokenType::Val,
- TokenType::Identifier("variable".to_owned()),
- TokenType::Eq,
- TokenType::Literal(Literal::Number(5)),
- TokenType::SemiColon,
- // 1st block
- TokenType::If,
- TokenType::Identifier("variable".to_owned()),
- TokenType::GtEq,
- TokenType::Literal(Literal::Number(7)),
- TokenType::LeftBrace,
- TokenType::Identifier("print".to_owned()),
- TokenType::LeftParen,
- TokenType::Literal(Literal::String("Hello World".to_owned())),
- TokenType::RightParen,
- TokenType::SemiColon,
- TokenType::RightBrace,
- // 2nd block
- TokenType::If,
- TokenType::Identifier("variable".to_owned()),
- TokenType::Lt,
- TokenType::Literal(Literal::Number(52)),
- TokenType::LeftBrace,
- TokenType::Identifier("variable".to_owned()),
- TokenType::PlusEq,
- TokenType::Literal(Literal::Number(1)),
- TokenType::SemiColon,
- TokenType::Identifier("print".to_owned()),
- TokenType::LeftParen,
- TokenType::Literal(Literal::String("Hello ${variable}".to_owned())),
- TokenType::RightParen,
- TokenType::SemiColon,
- TokenType::RightBrace,
- // 3rd block
- TokenType::For,
- TokenType::Identifier("person".to_owned()),
- TokenType::In,
- TokenType::LeftBracket,
- TokenType::Literal(Literal::String("Cody".to_owned())),
- TokenType::Comma,
- TokenType::Literal(Literal::String("Johnny".to_owned())),
- TokenType::RightBracket,
- TokenType::LeftBrace,
- TokenType::Identifier("print".to_owned()),
- TokenType::LeftParen,
- TokenType::Literal(Literal::String("Hello ${person}".to_owned())),
- TokenType::RightParen,
- TokenType::SemiColon,
- TokenType::RightBrace,
- ];
-
- let lexed_code = Lexer::new(SAMPLE_PROGRAM)
- .map(|it| it.tt)
- .collect::<Vec<_>>();
-
- assert_eq!(tokens, lexed_code);
- }
-
- #[bench]
- fn bench_lexer(b: &mut Bencher) {
- b.iter(|| {
- let _ = Lexer::new(SAMPLE_PROGRAM).collect::<Vec<_>>();
- });
+ unimplemented!()
}
}
diff --git a/src/main.rs b/src/main.rs
index 91db8c6..5d4cafc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,19 +7,11 @@
unused_lifetimes
)]
-pub mod ast;
-pub mod interpreter;
pub mod lexer;
-use std::io::{BufRead, Write};
-use std::{env, fs, io};
+use std::{env, fs};
use itertools::Itertools;
-use rand::Rng;
-
-use crate::ast::parser::AstParser;
-use crate::interpreter::{AstInterpreter, InternalFunction, Value};
-use crate::lexer::Lexer;
fn main() {
let args = env::args().collect_vec();
@@ -36,108 +28,5 @@ fn main() {
return;
};
- let lexer = Lexer::new(&source);
- let tokens = lexer.collect_vec();
- let mut parser = AstParser::new(tokens);
- let ast = parser.parse();
-
- println!("--- Program Output ---");
-
- let mut interpreter = AstInterpreter::default();
-
- // Defining some builtin callables for our interpreter
- interpreter.callables.insert(
- "print".to_owned(),
- Box::new(InternalFunction(&|args| {
- use std::fmt::Write;
-
- let mut buffer = String::new();
- for arg in args {
- write!(&mut buffer, "{}", arg).unwrap();
- }
-
- let mut stdout = io::stdout();
- stdout.lock().write_all(buffer.as_bytes()).unwrap();
- stdout.flush().unwrap();
-
- Value::Nil
- })),
- );
-
- interpreter.callables.insert(
- "println".to_owned(),
- Box::new(InternalFunction(&|args| {
- use std::fmt::Write;
-
- let mut buffer = String::new();
- for arg in args {
- write!(&mut buffer, "{}", arg).unwrap();
- }
- writeln!(&mut buffer).unwrap();
-
- let mut stdout = io::stdout();
- stdout.lock().write_all(buffer.as_bytes()).unwrap();
- stdout.flush().unwrap();
-
- Value::Nil
- })),
- );
-
- interpreter.callables.insert(
- "readln".to_owned(),
- Box::new(InternalFunction(&|_| {
- let stdin = io::stdin();
- let mut line = String::new();
- stdin
- .lock()
- .read_line(&mut line)
- .expect("Failed to read line from stdin");
- line.pop();
-
- Value::String(line)
- })),
- );
-
- interpreter.callables.insert(
- "random".to_owned(),
- Box::new(InternalFunction(&|args| {
- let result = match args {
- [] => rand::thread_rng().gen_range(1..=100),
- [Value::Number(max)] => rand::thread_rng().gen_range(0..=*max),
- [Value::Number(min), Value::Number(max)] => {
- rand::thread_rng().gen_range(*min..=*max)
- }
- _ => panic!("Invalid usage of 'random' function"),
- };
-
- Value::Number(result)
- })),
- );
-
- interpreter.callables.insert(
- "len".to_owned(),
- Box::new(InternalFunction(&|args| {
- let result = match &args[0] {
- Value::String(value) => value.len() as i32,
- _ => panic!("Invalid usage of 'len' function"),
- };
-
- Value::Number(result)
- })),
- );
-
- interpreter.callables.insert(
- "parse_int".to_owned(),
- Box::new(InternalFunction(&|args| {
- let result = match &args[0] {
- Value::String(value) => value.parse::<i32>(),
- _ => panic!("Invalid usage of 'parse_int' function"),
- }
- .expect("Provided string was not an intenger");
-
- Value::Number(result)
- })),
- );
-
- interpreter.interpret(&ast);
+ // TODO:
}