aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCody <cody@codyq.dev>2022-12-17 13:52:32 -0600
committerCody <cody@codyq.dev>2022-12-17 13:52:32 -0600
commit1790d6a9091167b8bcc5f9136480ff2659c0660a (patch)
treea319326a9073e8ffc316002314e6bb712159d743 /src
parent82d00772f036a80e8875207e2a11bd8ef3d2d615 (diff)
downloadsloth-1790d6a9091167b8bcc5f9136480ff2659c0660a.tar.gz
Improvements to lexer, add test and benchmarks to lexer
- Change lexer functions to return a option - Return None variant instead of \u0000 in case of EOF - Test lexer against a sample program
Diffstat (limited to 'src')
-rw-r--r--src/ast.rs0
-rw-r--r--src/lexer.rs203
-rw-r--r--src/main.rs3
3 files changed, 152 insertions, 54 deletions
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/ast.rs
diff --git a/src/lexer.rs b/src/lexer.rs
index 75b1993..74a006c 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -2,6 +2,9 @@
#[derive(Debug, Eq, PartialEq)]
pub enum TokenType {
+ // Utility
+ Comment(String),
+
// Short
Plus, // +
Minus, // -
@@ -102,35 +105,30 @@ impl<'a> Lexer<'a> {
}
}
- fn peek(&self) -> char {
- self.source
- .get(self.pos)
- .map(|it| *it as char)
- .unwrap_or('\u{0000}')
+ fn peek(&self) -> Option<char> {
+ self.source.get(self.pos).map(|it| *it as char)
}
- fn peek_nth(&self, nth: usize) -> char {
- self.source
- .get(self.pos + nth)
- .map(|it| *it as char)
- .unwrap_or('\u{0000}')
+ fn peek_nth(&self, nth: usize) -> Option<char> {
+ self.source.get(self.pos + nth).map(|it| *it as char)
}
- fn advance(&mut self) -> char {
+ fn advance(&mut self) -> Option<char> {
self.pos += 1;
- self.source
- .get(self.pos - 1)
- .map(|it| *it as char)
- .unwrap_or('\u{0000}')
+ self.source.get(self.pos - 1).map(|it| *it as char)
}
- fn advance_if(&mut self, next: char) -> bool {
- if self.peek() != next {
- return false;
+ fn advance_if(&mut self, next: impl FnOnce(Option<char>) -> bool) -> bool {
+ if next(self.peek()) {
+ self.advance();
+ return true;
}
- self.advance();
- true
+ false
+ }
+
+ fn advance_if_eq(&mut self, next: Option<char>) -> bool {
+ self.advance_if(|it| it == next)
}
}
@@ -138,16 +136,11 @@ impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
- // Ignore all whitespace & comments
+ // Ignore all whitespace
loop {
match self.peek() {
- '#' => {
- while self.peek() != '\n' {
- self.advance();
- }
- }
- '\n' => self.line += 1,
- ' ' | '\r' | '\t' => (),
+ Some('\n') => self.line += 1,
+ Some(' ') | Some('\r') | Some('\t') => (),
_ => break,
}
self.advance();
@@ -156,15 +149,28 @@ impl<'a> Iterator for Lexer<'a> {
// Resetting the lexeme
self.start = self.pos;
- // Parse the next lexeme
- let character = self.advance();
+ // Parse the next lexeme- If it is EOF return nothing
+ let Some(character) = self.advance() else {
+ return None;
+ };
+
let tt = match character {
+ // Whitespace & Comments
+ '#' => {
+ let mut value = String::new();
+ while self.peek() != Some('\n') {
+ value.push(self.advance().unwrap());
+ }
+
+ TokenType::Comment(value)
+ }
+
// Arithmetic
- '+' if self.advance_if('=') => TokenType::PlusEq,
- '-' if self.advance_if('=') => TokenType::MinusEq,
- '*' if self.advance_if('=') => TokenType::StarEq,
- '/' if self.advance_if('=') => TokenType::SlashEq,
- '%' if self.advance_if('=') => TokenType::PercEq,
+ '+' if self.advance_if_eq(Some('=')) => TokenType::PlusEq,
+ '-' if self.advance_if_eq(Some('=')) => TokenType::MinusEq,
+ '*' if self.advance_if_eq(Some('=')) => TokenType::StarEq,
+ '/' if self.advance_if_eq(Some('=')) => TokenType::SlashEq,
+ '%' if self.advance_if_eq(Some('=')) => TokenType::PercEq,
'+' => TokenType::Plus,
'-' => TokenType::Minus,
'*' => TokenType::Star,
@@ -174,31 +180,31 @@ impl<'a> Iterator for Lexer<'a> {
'0'..='9' => {
let mut value = String::new();
value.push(character);
- while ('0'..='9').contains(&self.peek()) {
- value.push(self.advance());
+ while ('0'..='9').contains(&self.peek().unwrap()) {
+ value.push(self.advance().unwrap());
}
- if self.advance_if('.') {
+
+ if self.advance_if_eq(Some('.')) {
value.push('.');
- while ('0'..='9').contains(&self.peek()) {
- let c = self.advance();
- value.push(c);
+ while ('0'..='9').contains(&self.peek().unwrap()) {
+ value.push(self.advance().unwrap());
}
}
TokenType::Literal(Literal::Number(value.parse::<i32>().unwrap()))
}
// Logical & Bitwise
- '!' if self.advance_if('=') => TokenType::BangEq,
- '=' if self.advance_if('=') => TokenType::EqEq,
- '>' if self.advance_if('=') => TokenType::GtEq,
- '<' if self.advance_if('=') => TokenType::LtEq,
+ '!' if self.advance_if_eq(Some('=')) => TokenType::BangEq,
+ '=' if self.advance_if_eq(Some('=')) => TokenType::EqEq,
+ '>' if self.advance_if_eq(Some('=')) => TokenType::GtEq,
+ '<' if self.advance_if_eq(Some('=')) => TokenType::LtEq,
'!' => TokenType::Bang,
'=' => TokenType::Eq,
'>' => TokenType::Gt,
'<' => TokenType::Lt,
- '&' if self.advance_if('&') => TokenType::AmpAmp,
- '|' if self.advance_if('|') => TokenType::PipePipe,
+ '&' if self.advance_if_eq(Some('&')) => TokenType::AmpAmp,
+ '|' if self.advance_if_eq(Some('|')) => TokenType::PipePipe,
'&' => TokenType::Amp,
'|' => TokenType::Pipe,
@@ -216,11 +222,13 @@ impl<'a> Iterator for Lexer<'a> {
'"' => {
let mut value = String::new();
- while self.peek() != '"' {
- let character = self.advance();
+ while self.peek() != Some('"') {
+ let Some(character) = self.advance() else {
+ panic!("Syntax Error: String invalid");
+ };
if character == '\\' {
- match self.advance() {
+ match self.advance().unwrap() {
'\\' => value.push('\\'),
'"' => value.push('"'),
_ => panic!(),
@@ -239,8 +247,9 @@ impl<'a> Iterator for Lexer<'a> {
'a'..='z' | 'A'..='Z' | '_' => {
let mut value = String::new();
value.push(character);
- while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
- value.push(self.advance())
+
+ while let Some(character) = self.peek() && matches!(character, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
+ value.push(self.advance().unwrap());
}
match value.as_str() {
@@ -261,7 +270,6 @@ impl<'a> Iterator for Lexer<'a> {
}
// Misc.
- '\u{0000}' => return None,
_ => panic!("Failed to parse"),
};
@@ -282,3 +290,92 @@ impl<'a> Iterator for Lexer<'a> {
Some(token)
}
}
+
+#[cfg(test)]
+mod tests {
+ extern crate test;
+
+ use test::Bencher;
+
+ use super::{Lexer, Literal, TokenType};
+
+ const SAMPLE_PROGRAM: &str = r#"
+val variable = 5;
+
+if variable >= 7 {
+ print "Hello World";
+}
+
+if variable < 52 {
+ variable += 1;
+ print "Hello ${variable}";
+}
+
+for person in ["Cody", "Johnny"] {
+ print "Hello ${person}";
+}
+"#;
+
+ #[test]
+ fn simple_code() {
+ let tokens = vec![
+ // top
+ TokenType::Val,
+ TokenType::Identifier("variable".to_owned()),
+ TokenType::Eq,
+ TokenType::Literal(Literal::Number(5)),
+ TokenType::SemiColon,
+ // 1st block
+ TokenType::If,
+ TokenType::Identifier("variable".to_owned()),
+ TokenType::GtEq,
+ TokenType::Literal(Literal::Number(7)),
+ TokenType::LeftBrace,
+ TokenType::Print,
+ TokenType::Literal(Literal::String("Hello World".to_owned())),
+ TokenType::SemiColon,
+ TokenType::RightBrace,
+ // 2nd block
+ TokenType::If,
+ TokenType::Identifier("variable".to_owned()),
+ TokenType::Lt,
+ TokenType::Literal(Literal::Number(52)),
+ TokenType::LeftBrace,
+ TokenType::Identifier("variable".to_owned()),
+ TokenType::PlusEq,
+ TokenType::Literal(Literal::Number(1)),
+ TokenType::SemiColon,
+ TokenType::Print,
+ TokenType::Literal(Literal::String("Hello ${variable}".to_owned())),
+ TokenType::SemiColon,
+ TokenType::RightBrace,
+ // 3rd block
+ TokenType::For,
+ TokenType::Identifier("person".to_owned()),
+ TokenType::In,
+ TokenType::LeftBracket,
+ TokenType::Literal(Literal::String("Cody".to_owned())),
+ TokenType::Comma,
+ TokenType::Literal(Literal::String("Johnny".to_owned())),
+ TokenType::RightBracket,
+ TokenType::LeftBrace,
+ TokenType::Print,
+ TokenType::Literal(Literal::String("Hello ${person}".to_owned())),
+ TokenType::SemiColon,
+ TokenType::RightBrace,
+ ];
+
+ let lexed_code = Lexer::new(SAMPLE_PROGRAM)
+ .map(|it| it.tt)
+ .collect::<Vec<_>>();
+
+ assert_eq!(tokens, lexed_code);
+ }
+
+ #[bench]
+ fn bench_lexer(b: &mut Bencher) {
+ b.iter(|| {
+ let _ = Lexer::new(SAMPLE_PROGRAM).collect::<Vec<_>>();
+ });
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index fa355c9..8f3c001 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,4 @@
+#![feature(test, let_chains)]
#![warn(
clippy::wildcard_imports,
clippy::string_add,
@@ -24,6 +25,6 @@ if variable >= 7 {
fn main() {
let lexer = Lexer::new(SOURCE);
for token in lexer {
- print!("({}) ", token.lexeme);
+ print!("{} ", token.lexeme);
}
}