Started on lexer

author: Cody <cody@codyq.dev> 2023-04-08 03:48:24 -0500
committer: Cody <cody@codyq.dev> 2023-04-08 03:48:24 -0500
commit: 0eadd4c889049bd89b9dff437f1ed477277452fb (patch)
tree: 70d4dcd39e175af93c233910e24379f692028e86
parent: 2970520a9592b5c6d45291f54073552a474b71b4 (diff)
download: sloth-0eadd4c889049bd89b9dff437f1ed477277452fb.tar.gz
10 files changed, 759 insertions, 81 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3bd0eb7..157650a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,6 +3,12 @@
 version = 3
 
 [[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
 name = "either"
 version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -19,9 +25,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.53"
+version = "1.0.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73"
+checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
 dependencies = [
  "unicode-ident",
 ]
@@ -46,6 +52,19 @@ dependencies = [
 [[package]]
 name = "sloth_bytecode"
 version = "0.1.0"
+dependencies = [
+ "byteorder",
+ "sloth_bytecode_macros",
+]
+
+[[package]]
+name = "sloth_bytecode_macros"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
 
 [[package]]
 name = "sloth_vm"
@@ -53,9 +72,9 @@ version = "0.1.0"
 
 [[package]]
 name = "syn"
-version = "2.0.8"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc02725fd69ab9f26eab07fad303e2497fad6fb9eba4f96c4d1687bdf704ad9"
+checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index bc46f62..e4a78d0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,7 @@
 members = [
     "crates/sloth",
     "crates/sloth_bytecode",
+    "crates/sloth_bytecode/macros",
     "crates/sloth_vm",
 ]
 
diff --git a/crates/sloth/src/lexer.rs b/crates/sloth/src/lexer.rs
index 8631eef..2d3b25b 100644
--- a/crates/sloth/src/lexer.rs
+++ b/crates/sloth/src/lexer.rs
@@ -1,5 +1,10 @@
 #![allow(dead_code)]
 
+//! TODO: Lexing Regex Literals
+//! TODO: Lexing Character Literals
+
+use std::str::Chars;
+
 use thiserror::Error;
 
 #[derive(Debug, Error)]
@@ -8,7 +13,7 @@ pub enum LexerError {
     UnexpectedToken,
 }
 
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum TokenType {
     // Meta
     DocComment,
@@ -39,6 +44,7 @@ pub enum TokenType {
     StarStarEq, // **=
     SlashEq,    // /=
     PercEq,     // %=
+    TildeEq,    // ~=
 
     Amp,      // &
     AmpAmp,   // &&
@@ -51,12 +57,14 @@ pub enum TokenType {
     BangBang, // !!
     BangEq,   // !=
 
-    Lt,   // <
-    LtLt, // <<
-    LtEq, // <=
-    Gt,   // >
-    GtGt, // >>
-    GtEq, // >=
+    Lt,     // <
+    LtLt,   // <<
+    LtLtLt, // <<<
+    LtEq,   // <=
+    Gt,     // >
+    GtGt,   // >>
+    GtGtGt, // >>>
+    GtEq,   // >=
 
     Comma,
 
@@ -70,7 +78,8 @@ pub enum TokenType {
     ColonColon, // ::
     SemiColon,  // ;
 
-    Arrow, // ->
+    Arrow,    // ->
+    FatArrow, // =>
 
     // Keywords
     Val,
@@ -91,23 +100,34 @@ pub enum TokenType {
 
     As,
 
-    // Misc
-    Literal(Literal),
-}
+    // Literals
+    Integer(i128),
+    Float(f64),
+    Boolean(bool),
+    Character(char),
+    String(String),
+    Regex(String),
 
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub enum Literal {
-    Numeric,
-    Boolean,
-    Character,
-    String,
-    Regex,
+    Identifier(String),
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone, Copy)]
 pub struct Location {
-    row: u32,
-    column: u32,
+    index: usize,
+    pub row: u32,
+    pub col: u32,
+}
+
+impl Location {
+    fn advance(&mut self, len: usize, newline: bool) {
+        if newline {
+            self.row += 1;
+            self.col = 0;
+        } else {
+            self.col += 1;
+        }
+        self.index += len;
+    }
 }
 
 #[derive(Debug)]
@@ -121,33 +141,384 @@ pub struct Token<'a> {
 
 pub struct Lexer<'a> {
     source: &'a [u8],
+    window: [char; 3],
+    chars: Chars<'a>,
 
     start: Location,
-    end: Location,
+    current: Location,
 }
 
 impl<'a> Lexer<'a> {
-    fn new(source: &'a str) -> Self {
+    pub(crate) fn new(source: &'a str) -> Self {
+        let mut chars = source.chars();
+        let window = [
+            chars.next().unwrap_or('\0'),
+            chars.next().unwrap_or('\0'),
+            chars.next().unwrap_or('\0'),
+        ];
+
         Self {
             source: source.as_bytes(),
+            window,
+            chars,
             start: Default::default(),
-            end: Default::default(),
+            current: Default::default(),
+        }
+    }
+}
+
+impl<'a> Lexer<'a> {
+    fn pos(&self) -> usize {
+        self.current.index
+    }
+
+    fn peek(&self) -> char {
+        self.window[0]
+    }
+
+    fn eof(&self) -> bool {
+        self.peek() == '\0'
+    }
+
+    fn advance(&mut self) -> char {
+        let current = self.window[0];
+        self.window = [
+            self.window[1],
+            self.window[2],
+            self.chars.next().unwrap_or('\0'),
+        ];
+        self.current.advance(current.len_utf8(), current == '\n');
+        current
+    }
+
+    fn advance_with(&mut self, with: TokenType) -> TokenType {
+        self.advance();
+        with
+    }
+
+    fn advance_by(&mut self, amount: usize) {
+        for _ in 0..amount {
+            self.advance();
+        }
+    }
+
+    fn advance_by_with(&mut self, amount: usize, with: TokenType) -> TokenType {
+        self.advance_by(amount);
+        with
+    }
+
+    fn advance_while(&mut self, predicate: impl Fn([char; 3]) -> bool) {
+        while !self.eof() && predicate(self.window) {
+            self.advance();
         }
     }
 }
 
+impl<'a> Lexer<'a> {
+    fn lex_number(&mut self) -> TokenType {
+        let mut value = self.advance().to_string();
+
+        while self.peek().is_ascii_digit() {
+            value.push(self.advance());
+        }
+
+        if self.peek() == '.' {
+            value.push(self.advance());
+
+            while self.peek().is_ascii_digit() {
+                value.push(self.advance());
+            }
+
+            TokenType::Float(value.parse::<f64>().expect("Expected float"))
+        } else {
+            TokenType::Integer(value.parse::<i128>().expect("Expected integer"))
+        }
+    }
+
+    fn lex_string(&mut self) -> TokenType {
+        let mut value = String::new();
+
+        self.advance();
+        loop {
+            match self.window {
+                ['\\', '"', ..] => {
+                    self.advance_by(2);
+                    value.push('"');
+                }
+                ['\\', 't', ..] => {
+                    self.advance_by(2);
+                    value.push('\t');
+                }
+                ['\\', 'n', ..] => {
+                    self.advance_by(2);
+                    value.push('\n');
+                }
+                ['"', ..] => {
+                    self.advance();
+                    break;
+                }
+                _ => {
+                    value.push(self.advance());
+                    continue;
+                }
+            }
+        }
+
+        TokenType::String(value)
+    }
+}
+
 impl<'a> Iterator for Lexer<'a> {
     type Item = Token<'a>;
 
     fn next(&mut self) -> Option<Self::Item> {
-        unimplemented!()
+        // Skipping whitespace
+        self.advance_while(|it| it[0].is_whitespace());
+        self.start = self.current;
+
+        // If were at the end of the file return nothing
+        if self.eof() {
+            return None;
+        }
+
+        // Figuring out the token type
+        let tt = match self.window {
+            ['#', '#', ..] => {
+                self.advance_while(|it| it[0] != '\n');
+                TokenType::DocComment
+            }
+
+            ['#', ..] => {
+                self.advance_while(|it| it[0] != '\n');
+                TokenType::Comment
+            }
+
+            // Blocks
+            ['(', ..] => self.advance_with(TokenType::OpeningParen),
+            [')', ..] => self.advance_with(TokenType::ClosingParen),
+            ['[', ..] => self.advance_with(TokenType::OpeningBracket),
+            [']', ..] => self.advance_with(TokenType::ClosingBracket),
+            ['{', ..] => self.advance_with(TokenType::OpeningBrace),
+            ['}', ..] => self.advance_with(TokenType::ClosingBrace),
+
+            // Operators
+            ['-', '>', ..] => self.advance_by_with(2, TokenType::Arrow),
+            ['=', '>', ..] => self.advance_by_with(2, TokenType::FatArrow),
+
+            ['+', '+', '='] => self.advance_by_with(3, TokenType::PlusPlusEq),
+            ['*', '*', '='] => self.advance_by_with(3, TokenType::StarStarEq),
+            ['+', '+', ..] => self.advance_by_with(2, TokenType::PlusPlus),
+            ['*', '*', ..] => self.advance_by_with(2, TokenType::StarStar),
+
+            ['+', '=', ..] => self.advance_by_with(2, TokenType::PlusEq),
+            ['-', '=', ..] => self.advance_by_with(2, TokenType::MinusEq),
+            ['*', '=', ..] => self.advance_by_with(2, TokenType::StarEq),
+            ['/', '=', ..] => self.advance_by_with(2, TokenType::SlashEq),
+            ['%', '=', ..] => self.advance_by_with(2, TokenType::PercEq),
+            ['~', '=', ..] => self.advance_by_with(2, TokenType::TildeEq),
+
+            ['+', ..] => self.advance_with(TokenType::Plus),
+            ['-', ..] => self.advance_with(TokenType::Minus),
+            ['*', ..] => self.advance_with(TokenType::Star),
+            ['/', ..] => self.advance_with(TokenType::Slash), // TODO: Check for regex literals
+            ['%', ..] => self.advance_with(TokenType::Perc),
+            ['~', ..] => self.advance_with(TokenType::Tilde),
+
+            ['&', '&', ..] => self.advance_by_with(2, TokenType::AmpAmp),
+            ['&', ..] => self.advance_with(TokenType::Amp),
+
+            ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe),
+            ['|', ..] => self.advance_with(TokenType::Pipe),
+
+            ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq),
+            ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq),
+            ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang),
+            ['=', ..] => self.advance_with(TokenType::Eq),
+            ['!', ..] => self.advance_with(TokenType::Bang),
+
+            ['<', '<', '<'] => self.advance_by_with(3, TokenType::LtLtLt),
+            ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt),
+            ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq),
+            ['<', ..] => self.advance_with(TokenType::Lt),
+
+            ['>', '>', '>'] => self.advance_by_with(3, TokenType::GtGtGt),
+            ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt),
+            ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq),
+            ['>', ..] => self.advance_with(TokenType::Gt),
+
+            [',', ..] => self.advance_with(TokenType::Comma),
+
+            ['.', '.', ..] => self.advance_by_with(2, TokenType::DotDot),
+            ['.', ..] => self.advance_with(TokenType::Dot),
+            ['?', '?', ..] => self.advance_by_with(2, TokenType::QuestionQuestion),
+            ['?', '.', ..] => self.advance_by_with(2, TokenType::QuestionDot),
+            ['?', ..] => self.advance_with(TokenType::Question),
+
+            [';', ..] => self.advance_with(TokenType::SemiColon),
+            [':', ':', ..] => self.advance_by_with(2, TokenType::ColonColon),
+            [':', ..] => self.advance_with(TokenType::Colon),
+
+            // Literals
+            ['0'..='9', ..] => self.lex_number(),
+            ['"', ..] => self.lex_string(),
+
+            ['a'..='z' | 'A'..='Z' | '_', ..] => {
+                let mut value = String::new();
+                while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
+                    value.push(self.advance());
+                }
+
+                match value.as_str() {
+                    "val" => TokenType::Val,
+                    "var" => TokenType::Var,
+                    "fn" => TokenType::Fn,
+                    "if" => TokenType::If,
+                    "else" => TokenType::Else,
+                    "while" => TokenType::While,
+                    "for" => TokenType::For,
+                    "in" => TokenType::In,
+                    "loop" => TokenType::Loop,
+                    "break" => TokenType::Break,
+                    "continue" => TokenType::Continue,
+                    "as" => TokenType::As,
+                    "true" => TokenType::Boolean(true),
+                    "false" => TokenType::Boolean(false),
+                    _ => TokenType::Identifier(value),
+                }
+            }
+
+            _ => panic!("Error while parsing"),
+        };
+
+        let lexeme = unsafe {
+            // At this point it is already known that the string is valid UTF-8, might
+            // aswell not check again
+            std::str::from_utf8_unchecked(&self.source[self.start.index..self.pos()])
+        };
+
+        let token = Token {
+            tt,
+            lexeme,
+            start: self.start,
+            end: self.current,
+        };
+
+        Some(token)
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use itertools::Itertools;
+
+    use super::{Lexer, TokenType};
+
     #[test]
-    fn basic_test_a() {
-        //
+    fn lex_operators() {
+        let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || = == ! !! != < << <<< \
+                      <= > >> >>> >= , ? ?. ?? . .. : :: ; -> =>";
+        let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+        assert_eq!(&tokens, &[
+            TokenType::Plus,
+            TokenType::PlusPlus,
+            TokenType::Minus,
+            TokenType::Star,
+            TokenType::StarStar,
+            TokenType::Slash,
+            TokenType::Perc,
+            TokenType::Tilde,
+            TokenType::PlusEq,
+            TokenType::PlusPlusEq,
+            TokenType::MinusEq,
+            TokenType::StarEq,
+            TokenType::StarStarEq,
+            TokenType::SlashEq,
+            TokenType::PercEq,
+            TokenType::TildeEq,
+            TokenType::Amp,
+            TokenType::AmpAmp,
+            TokenType::Pipe,
+            TokenType::PipePipe,
+            TokenType::Eq,
+            TokenType::EqEq,
+            TokenType::Bang,
+            TokenType::BangBang,
+            TokenType::BangEq,
+            TokenType::Lt,
+            TokenType::LtLt,
+            TokenType::LtLtLt,
+            TokenType::LtEq,
+            TokenType::Gt,
+            TokenType::GtGt,
+            TokenType::GtGtGt,
+            TokenType::GtEq,
+            TokenType::Comma,
+            TokenType::Question,
+            TokenType::QuestionDot,
+            TokenType::QuestionQuestion,
+            TokenType::Dot,
+            TokenType::DotDot,
+            TokenType::Colon,
+            TokenType::ColonColon,
+            TokenType::SemiColon,
+            TokenType::Arrow,
+            TokenType::FatArrow,
+        ]);
+    }
+
+    #[test]
+    fn lex_keywords() {
+        let source = "val var fn if else while for in loop break continue as true false";
+        let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+        assert_eq!(&tokens, &[
+            TokenType::Val,
+            TokenType::Var,
+            TokenType::Fn,
+            TokenType::If,
+            TokenType::Else,
+            TokenType::While,
+            TokenType::For,
+            TokenType::In,
+            TokenType::Loop,
+            TokenType::Break,
+            TokenType::Continue,
+            TokenType::As,
+            TokenType::Boolean(true),
+            TokenType::Boolean(false),
+        ]);
+    }
+
+    #[test]
+    fn lex_literals_a() {
+        let source = "iden \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" 93 3252 238 -382 -832 \
+                      83 -25 52.9 83.7 12.4 35.2 3.3";
+        let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+        assert_eq!(&tokens, &[
+            TokenType::Identifier("iden".to_owned()),
+            TokenType::String("foo".to_owned()),
+            TokenType::String("bar".to_owned()),
+            TokenType::String("baz".to_owned()),
+            TokenType::String("\"".to_owned()),
+            TokenType::String("\n".to_owned()),
+            TokenType::String("\t".to_owned()),
+            TokenType::Integer(93),
+            TokenType::Integer(3252),
+            TokenType::Integer(238),
+            TokenType::Minus,
+            TokenType::Integer(382),
+            TokenType::Minus,
+            TokenType::Integer(832),
+            TokenType::Integer(83),
+            TokenType::Minus,
+            TokenType::Integer(25),
+            TokenType::Float(52.9),
+            TokenType::Float(83.7),
+            TokenType::Float(12.4),
+            TokenType::Float(35.2),
+            TokenType::Float(3.3),
+        ]);
     }
 }
diff --git a/crates/sloth/src/main.rs b/crates/sloth/src/main.rs
index 89ce7f9..6502f19 100644
--- a/crates/sloth/src/main.rs
+++ b/crates/sloth/src/main.rs
@@ -1,4 +1,3 @@
-#![feature(test, let_chains)]
 #![warn(
     clippy::wildcard_imports,
     clippy::string_add,
@@ -8,10 +7,12 @@
 )]
 
 pub mod lexer;
+pub mod parser;
 
 use std::{env, fs};
 
 use itertools::Itertools;
+use lexer::Lexer;
 
 fn main() {
     let args = env::args().collect_vec();
@@ -23,10 +24,15 @@ fn main() {
     }
 
     let source_path = &args[1];
-    let Ok(_source) = fs::read_to_string(source_path) else {
+    let Ok(source) = fs::read_to_string(source_path) else {
         println!("Error while reading '{source_path}'");
         return;
     };
 
+    let lexer = Lexer::new(&source);
+    for token in lexer {
+        println!("{token:?}");
+    }
+
     // TODO:
 }
diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/crates/sloth/src/parser/ast.rs
@@ -0,0 +1,2 @@
+
+
diff --git a/crates/sloth/src/parser/mod.rs b/crates/sloth/src/parser/mod.rs
new file mode 100644
index 0000000..851c0bc
--- /dev/null
+++ b/crates/sloth/src/parser/mod.rs
@@ -0,0 +1 @@
+pub mod ast;
diff --git a/crates/sloth_bytecode/Cargo.toml b/crates/sloth_bytecode/Cargo.toml
index a302c81..981b6ee 100644
--- a/crates/sloth_bytecode/Cargo.toml
+++ b/crates/sloth_bytecode/Cargo.toml
@@ -2,3 +2,8 @@
 name = "sloth_bytecode"
 version = "0.1.0"
 edition = "2021"
+
+[dependencies]
+sloth_bytecode_macros = { path = "./macros" }
+
+byteorder = "1.4.3"
diff --git a/crates/sloth_bytecode/macros/Cargo.toml b/crates/sloth_bytecode/macros/Cargo.toml
new file mode 100644
index 0000000..c75bc58
--- /dev/null
+++ b/crates/sloth_bytecode/macros/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "sloth_bytecode_macros"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+proc-macro2 = "1.0.54"
+quote = "1.0.26"
+syn = "2.0.12"
+
+[lib]
+proc-macro = true
+
diff --git a/crates/sloth_bytecode/macros/src/lib.rs b/crates/sloth_bytecode/macros/src/lib.rs
new file mode 100644
index 0000000..e07a027
--- /dev/null
+++ b/crates/sloth_bytecode/macros/src/lib.rs
@@ -0,0 +1,153 @@
+use proc_macro2::{Ident, TokenStream};
+use quote::{format_ident, quote};
+use syn::parse::Parse;
+use syn::punctuated::Punctuated;
+use syn::{bracketed, parse_macro_input, LitInt, LitStr, Token};
+
+struct DslInstructionInput {
+    opcode: LitInt,
+    name: Ident,
+    args: Punctuated<Ident, Token![,]>,
+    description: LitStr,
+}
+
+impl Parse for DslInstructionInput {
+    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+        let args_content;
+        Ok(Self {
+            opcode: input.parse()?,
+            name: input.parse()?,
+            args: {
+                bracketed!(args_content in input);
+                args_content.parse_terminated(Ident::parse, Token![,])?
+            },
+            description: input.parse()?,
+        })
+    }
+}
+
+struct DslInstructionsInput {
+    name: Ident,
+    instructions: Punctuated<DslInstructionInput, Token![,]>,
+}
+
+impl Parse for DslInstructionsInput {
+    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+        Ok(Self {
+            name: input.parse()?,
+            instructions: {
+                input.parse::<Token![;]>()?;
+                input.parse_terminated(DslInstructionInput::parse, Token![,])?
+            },
+        })
+    }
+}
+
+fn into_enum_field(instruction: &DslInstructionInput) -> TokenStream {
+    let DslInstructionInput {
+        opcode,
+        name,
+        args,
+        description,
+    } = instruction;
+
+    let args = args.iter();
+
+    quote! {
+        #[doc = #description]
+        #name ( #( #args ),*  ) = #opcode
+    }
+}
+
+fn into_bytecode_parser(instruction: &DslInstructionInput) -> TokenStream {
+    let DslInstructionInput {
+        opcode,
+        name,
+        args,
+        description: _,
+    } = instruction;
+
+    let args = args.iter().map(|arg| {
+        let read_ident = format_ident!("read_{}", arg);
+
+        let _chunk_codes = arg;
+
+        quote! {
+            {
+                let a: #arg = (chunk.code[*offset] << 56) + (chunk)
+                cursor . #read_ident ::<byteorder::LittleEndian>().unwrap()
+            }
+        }
+    });
+
+    quote! {
+        #opcode => {
+            Self:: #name (
+                #( #args ),*
+            )
+        }
+    }
+}
+
+#[proc_macro]
+pub fn instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    let input = parse_macro_input!(input as DslInstructionsInput);
+
+    // Getting values to construct the enum
+    let enum_name = input.name;
+    let enum_fields = input
+        .instructions
+        .iter()
+        .map(into_enum_field)
+        .collect::<Vec<_>>();
+
+    // Getting the values to parse bytecode
+    let bytecode_parsers = input
+        .instructions
+        .iter()
+        .map(into_bytecode_parser)
+        .collect::<Vec<_>>();
+
+    // Building out the expanded code
+    let expanded = quote! {
+        #[repr(u8)]
+        #[derive(Clone, Debug)]
+        enum #enum_name {
+            #( #enum_fields ),*
+        }
+
+        impl #enum_name {
+            fn disassemble(chunk: &Chunk, offset: &mut usize) -> #enum_name {
+                let opcode = chunk.code[*offset];
+                *offset += 1;
+
+                let instruction = match opcode {
+                    #( #bytecode_parsers ),*
+                    _ => panic!("Unknown bytecode encountered"),
+                };
+
+                instruction
+            }
+
+            fn assemble(chunk: &mut Chunk) {
+                //
+            }
+        }
+
+        // impl #enum_name {
+        //     fn from_bytecode(cursor: &mut Cursor<Vec<u8>>) -> Self {
+        //         let bytecode = cursor.read_u8().unwrap();
+        //
+        //         let instruction = match bytecode {
+        //             #( #bytecode_parsers ),*
+        //             _ => panic!("Unknown bytecode encountered"),
+        //         };
+        //
+        //         instruction
+        //     }
+        // }
+    };
+
+    // Returning the proc_macro version of TokenStream
+    expanded.into()
+}
diff --git a/crates/sloth_bytecode/src/lib.rs b/crates/sloth_bytecode/src/lib.rs
index f814f86..dbf53ae 100644
--- a/crates/sloth_bytecode/src/lib.rs
+++ b/crates/sloth_bytecode/src/lib.rs
@@ -1,4 +1,3 @@
-#![feature(macro_metavar_expr)]
 #![allow(dead_code)]
 #![warn(
     clippy::wildcard_imports,
@@ -8,56 +7,164 @@
     unused_lifetimes
 )]
 
-macro_rules! instructions {
-    ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal ),* ) => {
-        #[repr(u8)]
-        enum Instruction {
-            $(
-                #[doc = $doc]
-                $name ( $( $v_type ),* ) = $opcode
-            ),*
-        }
+use std::io::Cursor;
 
-        impl Instruction {
-            fn opcode(&self) -> u8 {
-                match self {
-                    $(
-                        Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode
-                    ),*
-                }
-            }
-
-            fn from_bytecode(bytecode: &[u8]) -> Option<Self> {
-                if bytecode.is_empty() {
-                    return None;
-                }
-
-                let opcode = bytecode[0];
-                let instruction = match opcode {
-                    $(
-                        $opcode => {
-                            // TODO: Get the actual values
-                            Some(Self::$name ( $( 0 ${ignore(v_type)} ),* ))
-                        }
-                    ),*,
-                    _ => None,
-                };
-
-                instruction
-            }
-        }
-    }
+use byteorder::ReadBytesExt;
+// use sloth_bytecode_macros::instructions;
+
+pub struct Chunk {
+    pub code: Vec<u8>,
+    pub constants: Vec<u64>,
+}
+
+// instructions! {
+//     Instructions;
+//
+//     0x00 Constant   [u64]   "Push a constant value onto the stack",
+//
+//     0x01 Pop        []      "Pop a value from the stack",
+//     0x02 Dup        []      "Duplicate a value on the stack",
+//
+//     0x10 Add        []      "Add the last 2 values on the stack",
+//     0x11 Sub        []      "Subtract the last 2 values on the stack",
+//     0x12 Mul        []      "Multiply the last 2 values on the stack",
+//     0x13 Div        []      "Divide the last 2 values on the stack",
+//     0x14 Mod        []      "Modulo the last 2 values on the stack"
+// }
+
+// impl Instructions {
+//     fn disassemble(chunk: &Chunk, offset: &mut usize) {
+//         //
+//     }
+//
+//     fn assemble(chunk: &mut Chunk) {
+//         //
+//     }
+// }
+
+// #[test]
+// fn test() {
+//     let mut cursor = Cursor::new(vec![0, 1, 0, 0, 1, 0, 0, 0, 0]);
+//     let instruction = Instructions::from_bytecode(&mut cursor);
+//     println!("{instruction:?}");
+//     assert!(1 == 0);
+// }
+
+// macro_rules! instructions {
+//     ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal
+// ),* ) => {         #[repr(u8)]
+//         enum Instruction {
+//             $(
+//                 #[doc = $doc]
+//                 $name ( $( $v_type ),* ) = $opcode
+//             ),*
+//         }
+//
+//         impl Instruction {
+//             fn opcode(&self) -> u8 {
+//                 match self {
+//                     $(
+//                         Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode
+//                     ),*
+//                 }
+//             }
+//
+//             fn from_bytecode(bytecode: &[u8]) -> Option<Self> {
+//                 if bstecode.is_empty() {
+//                     return None;
+//                 }
+//
+//                 let opcode = bytecode[0];
+//                 let instruction = match opcode {
+//                     $(
+//                         $opcode => {
+//                             // TODO: Get the actual values
+//                             Some(Self::$name ( $( 0 ${ignore(v_type)} ),* ))
+//                         }
+//                     ),*,
+//                     _ => None,
+//                 };
+//
+//                 instruction
+//             }
+//         }
+//     }
+// }
+
+// instructions! {
+//     Instructions;
+//
+//     0x00 Constant   [u64]   "Push a constant value onto the stack",
+//
+//     0x01 Pop        []      "Pop a value from the stack",
+//     0x02 Dup        []      "Duplicate a value on the stack",
+//
+//     0x10 Add        []      "Add the last 2 values on the stack",
+//     0x11 Sub        []      "Subtract the last 2 values on the stack",
+//     0x12 Mul        []      "Multiply the last 2 values on the stack",
+//     0x13 Div        []      "Divide the last 2 values on the stack",
+//     0x14 Mod        []      "Modulo the last 2 values on the stack"
+// }
+
+pub enum Error {
+    UnknownOpcode(u8),
+    InvalidArguments,
+    Eof,
 }
 
-instructions! {
-    0x00 Constant   [u64]   "Push a constant value onto the stack",
+pub enum Instruction {
+    Constant(u64),
 
-    0x01 Pop        []      "Pop a value from the stack",
-    0x02 Dup        []      "Duplicate a value on the stack",
+    Pop(),
+    Dup(),
 
-    0x10 Add        []      "Add the last 2 values on the stack",
-    0x11 Sub        []      "Subtract the last 2 values on the stack",
-    0x12 Mul        []      "Multiply the last 2 values on the stack",
-    0x13 Div        []      "Divide the last 2 values on the stack",
-    0x14 Mod        []      "Modulo the last 2 values on the stack"
+    Add(),
+    Sub(),
+    Mul(),
+    Div(),
+    Mod(),
 }
+
+// fn parse_bytecode(pos: usize, bc: &[u8]) -> Result<Bytecode, BytecodeError> {
+//     let Some(opcode) = bc.get(pos) else {
+//         return Err(BytecodeError::Eof);
+//     };
+//
+//     let instruction = match opcode {
+//         0x00 => {
+//             // let arg0: [u8; 8] = bc.get(1..1+size_of::<u64>()).unwrap();
+//             let arg0 = u64::from_ne_bytes(arg0);
+//         }
+//         _ => return Err(BytecodeError::UnknownOpcode(opcode)),
+//     }
+//
+//     todo!()
+// }
+
+fn parse_bytecode(cursor: &mut Cursor<&[u8]>) -> Result<Instruction, Error> {
+    let Ok(opcode) = cursor.read_u8() else {
+        return Err(Error::Eof);
+    };
+
+    let instruction = match opcode {
+        0x00 => {
+            let arg0 = cursor
+                .read_u64::<byteorder::LittleEndian>()
+                .map_err(|_| Error::InvalidArguments)?;
+
+            Instruction::Constant(arg0)
+        }
+        _ => return Err(Error::UnknownOpcode(opcode)),
+    };
+
+    Ok(instruction)
+}
+
+// impl<T: Iterator<Item = u8>> TryFrom<T> for Bytecode {
+//     type Error = BytecodeError;
+//
+//     fn try_from(value: T) -> Result<Self, Self::Error> {
+//         todo!()
+//         //
+//     }
+// }
author	Cody <cody@codyq.dev>	2023-04-08 03:48:24 -0500
committer	Cody <cody@codyq.dev>	2023-04-08 03:48:24 -0500
commit	0eadd4c889049bd89b9dff437f1ed477277452fb (patch)
tree	70d4dcd39e175af93c233910e24379f692028e86
parent	2970520a9592b5c6d45291f54073552a474b71b4 (diff)
download	sloth-0eadd4c889049bd89b9dff437f1ed477277452fb.tar.gz