From 82d00772f036a80e8875207e2a11bd8ef3d2d615 Mon Sep 17 00:00:00 2001 From: Cody Date: Fri, 16 Dec 2022 04:21:08 -0600 Subject: Remove allocation from token- instead use 'a lifetime This changes token from having an owned string instead opting to use a str with the same lifetime as the lexer itself. --- src/lexer.rs | 19 ++++++++----------- src/main.rs | 1 + 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/lexer.rs b/src/lexer.rs index e850cb0..75b1993 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,7 +1,5 @@ #![allow(dead_code)] -use itertools::Itertools; - #[derive(Debug, Eq, PartialEq)] pub enum TokenType { // Short @@ -78,9 +76,9 @@ pub enum Literal { } #[derive(Debug)] -pub struct Token { +pub struct Token<'a> { pub tt: TokenType, - pub lexeme: String, + pub lexeme: &'a str, start: usize, length: usize, @@ -137,7 +135,7 @@ impl<'a> Lexer<'a> { } impl<'a> Iterator for Lexer<'a> { - type Item = Token; + type Item = Token<'a>; fn next(&mut self) -> Option { // Ignore all whitespace & comments @@ -267,12 +265,11 @@ impl<'a> Iterator for Lexer<'a> { _ => panic!("Failed to parse"), }; - // Getting the lexeme and then making the token to be returned - // let lexeme = self.source[self.start..self.pos].iter().join(""); - let lexeme = self.source[self.start..self.pos] - .iter() - .map(|it| *it as char) - .join(""); + let lexeme = unsafe { + // If it got to this point we know the slice is valid UTF-8. The only area in + // the language that UTF-8 characters are recognized is within strings. + std::str::from_utf8_unchecked(&self.source[self.start..self.pos]) + }; let token = Token { tt, diff --git a/src/main.rs b/src/main.rs index 5334746..fa355c9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ unused_lifetimes )] +pub mod ast; pub mod lexer; use lexer::Lexer; -- cgit v1.2.3