aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.rs
diff options
context:
space:
mode:
authorCody <cody@codyq.dev>2022-12-16 04:21:08 -0600
committerCody <cody@codyq.dev>2022-12-16 04:21:08 -0600
commit82d00772f036a80e8875207e2a11bd8ef3d2d615 (patch)
tree3714583aa0f8dd7f3ac5584ad4f97933eacdccac /src/lexer.rs
parent900bd3d64ac4c5c4c1511ab8388da3f2ed77849f (diff)
downloadsloth-82d00772f036a80e8875207e2a11bd8ef3d2d615.tar.gz
Remove allocation from token- instead use 'a lifetime
This changes token from having an owned string instead opting to use a str with the same lifetime as the lexer itself.
Diffstat (limited to 'src/lexer.rs')
-rw-r--r--src/lexer.rs19
1 files changed, 8 insertions, 11 deletions
diff --git a/src/lexer.rs b/src/lexer.rs
index e850cb0..75b1993 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -1,7 +1,5 @@
#![allow(dead_code)]
-use itertools::Itertools;
-
#[derive(Debug, Eq, PartialEq)]
pub enum TokenType {
// Short
@@ -78,9 +76,9 @@ pub enum Literal {
}
#[derive(Debug)]
-pub struct Token {
+pub struct Token<'a> {
pub tt: TokenType,
- pub lexeme: String,
+ pub lexeme: &'a str,
start: usize,
length: usize,
@@ -137,7 +135,7 @@ impl<'a> Lexer<'a> {
}
impl<'a> Iterator for Lexer<'a> {
- type Item = Token;
+ type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
// Ignore all whitespace & comments
@@ -267,12 +265,11 @@ impl<'a> Iterator for Lexer<'a> {
_ => panic!("Failed to parse"),
};
- // Getting the lexeme and then making the token to be returned
- // let lexeme = self.source[self.start..self.pos].iter().join("");
- let lexeme = self.source[self.start..self.pos]
- .iter()
- .map(|it| *it as char)
- .join("");
+ let lexeme = unsafe {
+ // If it got to this point we know the slice is valid UTF-8. The only area in
+ // the language that UTF-8 characters are recognized is within strings.
+ std::str::from_utf8_unchecked(&self.source[self.start..self.pos])
+ };
let token = Token {
tt,