diff options
| -rw-r--r-- | crates/sloth/src/lexer.rs | 40 | ||||
| -rw-r--r-- | crates/sloth/src/parser/ast.rs | 32 | ||||
| -rw-r--r-- | documentation/grammar.txt | 57 | ||||
| -rw-r--r-- | documentation/order.txt | 18 |
4 files changed, 133 insertions, 14 deletions
diff --git a/crates/sloth/src/lexer.rs b/crates/sloth/src/lexer.rs index 2d3b25b..b58f2f7 100644 --- a/crates/sloth/src/lexer.rs +++ b/crates/sloth/src/lexer.rs @@ -1,7 +1,6 @@ #![allow(dead_code)] //! TODO: Lexing Regex Literals -//! TODO: Lexing Character Literals use std::str::Chars; @@ -50,6 +49,7 @@ pub enum TokenType { AmpAmp, // && Pipe, // | PipePipe, // || + Caret, // ^ Eq, // = EqEq, // == @@ -59,12 +59,12 @@ pub enum TokenType { Lt, // < LtLt, // << - LtLtLt, // <<< LtEq, // <= + LtLtEq, // <<= Gt, // > GtGt, // >> - GtGtGt, // >>> GtEq, // >= + GtGtEq, // >>= Comma, @@ -330,18 +330,20 @@ impl<'a> Iterator for Lexer<'a> { ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe), ['|', ..] => self.advance_with(TokenType::Pipe), + ['^', ..] => self.advance_by_with(2, TokenType::Caret), + ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq), ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq), ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang), ['=', ..] => self.advance_with(TokenType::Eq), ['!', ..] => self.advance_with(TokenType::Bang), - ['<', '<', '<'] => self.advance_by_with(3, TokenType::LtLtLt), + ['<', '<', '='] => self.advance_by_with(3, TokenType::LtLtEq), ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt), ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq), ['<', ..] => self.advance_with(TokenType::Lt), - ['>', '>', '>'] => self.advance_by_with(3, TokenType::GtGtGt), + ['>', '>', '='] => self.advance_by_with(3, TokenType::GtGtEq), ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt), ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq), ['>', ..] => self.advance_with(TokenType::Gt), @@ -359,12 +361,13 @@ impl<'a> Iterator for Lexer<'a> { [':', ..] => self.advance_with(TokenType::Colon), // Literals + ['\'', c, '\''] => self.advance_by_with(3, TokenType::Character(c)), ['0'..='9', ..] => self.lex_number(), ['"', ..] => self.lex_string(), - ['a'..='z' | 'A'..='Z' | '_', ..] => { + ['a'..='z' | 'A'..='Z' | '_' | '$', ..] => { let mut value = String::new(); - while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') { + while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$') { value.push(self.advance()); } @@ -415,8 +418,8 @@ mod tests { #[test] fn lex_operators() { - let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || = == ! !! != < << <<< \ - <= > >> >>> >= , ? ?. ?? . .. : :: ; -> =>"; + let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || ^ = == ! !! != < << \ + <<= <= > >> >>= >= , ? ?. ?? . .. : :: ; -> =>"; let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); assert_eq!(&tokens, &[ @@ -440,6 +443,7 @@ mod tests { TokenType::AmpAmp, TokenType::Pipe, TokenType::PipePipe, + TokenType::Caret, TokenType::Eq, TokenType::EqEq, TokenType::Bang, @@ -447,11 +451,11 @@ mod tests { TokenType::BangEq, TokenType::Lt, TokenType::LtLt, - TokenType::LtLtLt, + TokenType::LtLtEq, TokenType::LtEq, TokenType::Gt, TokenType::GtGt, - TokenType::GtGtGt, + TokenType::GtGtEq, TokenType::GtEq, TokenType::Comma, TokenType::Question, @@ -492,18 +496,26 @@ mod tests { #[test] fn lex_literals_a() { - let source = "iden \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" 93 3252 238 -382 -832 \ - 83 -25 52.9 83.7 12.4 35.2 3.3"; + let source = "foo bar _foo __bar $0 $$1 \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" \ + 'a' 'b' '\"' 93 3252 238 -382 -832 83 -25 52.9 83.7 12.4 35.2 3.3"; let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); assert_eq!(&tokens, &[ - TokenType::Identifier("iden".to_owned()), + TokenType::Identifier("foo".to_owned()), + TokenType::Identifier("bar".to_owned()), + TokenType::Identifier("_foo".to_owned()), + TokenType::Identifier("__bar".to_owned()), + TokenType::Identifier("$0".to_owned()), + TokenType::Identifier("$$1".to_owned()), TokenType::String("foo".to_owned()), TokenType::String("bar".to_owned()), TokenType::String("baz".to_owned()), TokenType::String("\"".to_owned()), TokenType::String("\n".to_owned()), TokenType::String("\t".to_owned()), + TokenType::Character('a'), + TokenType::Character('b'), + TokenType::Character('"'), TokenType::Integer(93), TokenType::Integer(3252), TokenType::Integer(238), diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs index 8b13789..85309a5 100644 --- a/crates/sloth/src/parser/ast.rs +++ b/crates/sloth/src/parser/ast.rs @@ -1 +1,33 @@ +pub enum BinaryOp { + Add, + Con, + Sub, + Mul, + Pow, + Div, + BWSftRight, + BWSftLeft, + BWAnd, + BWOr, + BWXor, +} + +pub enum UnaryOp { + Not, + Neg, + + BWComp, +} + +pub enum Expr { + BinaryOp { + op: BinaryOp, + lhs: Box<Expr>, + rhs: Box<Expr>, + }, + UnaryOp { + op: UnaryOp, + value: Box<Expr>, + }, +} diff --git a/documentation/grammar.txt b/documentation/grammar.txt new file mode 100644 index 0000000..1217751 --- /dev/null +++ b/documentation/grammar.txt @@ -0,0 +1,57 @@ +### Statements + +expr_statment : expression ( ";" | "\n" ) + +### Expressions + +expression : logical_or + +### Operators + +logical_or : logical_and ( "||" logical_and )* +logical_and : equality ( "&&" equality )* + +equality : comparison ( ( "==" | "!=" ) comparison )* +comparison : bitwise_or ( ( "<=" | ">=" | "<" | ">" ) bitwise_or )* + +bitwise_or : bitwise_xor ( "|" bitwise_xor )* +bitwise_xor : bitwise_and ( "^" bitwise_and )* +bitwise_and : bitwise_shift ( "&" bitwise_shift )* +bitwise_shift : additive ( ( "<<" | ">>" ) additive )* + +additive : multiplicative ( ( "+" | "++" | "-" ) multiplicative )* +multiplicative : unary ( ( "*" | "**" | "/" | "%" ) unary )* +unary : ( "!" | "-" | "~" ) unary | call + +call : primary ( "(" ( primary "," )* primary? ")" )* + +primary : identifier + | literal + | "(" expression ")" + +### Types + +identifier : ( ALPHA | "_" | "$" ) ( ALPHANUMERIC | "_" | "$" )* +literal : string + | char + | float + | int + | boolean + +string : '"' ALPHANUMERIC* '"' +char : "'" ALPHANUMERIC "'" +float : NUMERIC "." NUMERIC* +int : NUMERIC+ +boolean : "true" | "false" + +### Primitives + +ALPHANUMERIC = ALPHA | NUMERIC +NUMERIC = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +ALPHA = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" + | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" + | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" + | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" + | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" + | "y" | "z" + diff --git a/documentation/order.txt b/documentation/order.txt new file mode 100644 index 0000000..8727306 --- /dev/null +++ b/documentation/order.txt @@ -0,0 +1,18 @@ +| Name | Operators | Associates | +| -------------- | --------- | ---------- | +| parentheses | () | Left | +| member access | . ! !! ?. | Left | +| defaulting | ?: | Right | +| function call | () | Left | +| unary | ! - ~ | Right | +| multiplicative | * / % | Left | +| additive | + - | Left | +| bitwise shift | << >> | Left | +| comparison | < > <= >= | Left | +| equality | == != | Left | +| bitwise and | & | Left | +| bitwise xor | ^ | Left | +| bitwise or | | | Left | +| logical and | && | Left | +| logical or | || | Left | + |
