diff options
| author | Cody <cody@codyq.dev> | 2023-04-08 03:48:24 -0500 |
|---|---|---|
| committer | Cody <cody@codyq.dev> | 2023-04-08 03:48:24 -0500 |
| commit | 0eadd4c889049bd89b9dff437f1ed477277452fb (patch) | |
| tree | 70d4dcd39e175af93c233910e24379f692028e86 | |
| parent | 2970520a9592b5c6d45291f54073552a474b71b4 (diff) | |
| download | sloth-0eadd4c889049bd89b9dff437f1ed477277452fb.tar.gz | |
Started on lexer
| -rw-r--r-- | Cargo.lock | 27 | ||||
| -rw-r--r-- | Cargo.toml | 1 | ||||
| -rw-r--r-- | crates/sloth/src/lexer.rs | 425 | ||||
| -rw-r--r-- | crates/sloth/src/main.rs | 10 | ||||
| -rw-r--r-- | crates/sloth/src/parser/ast.rs | 2 | ||||
| -rw-r--r-- | crates/sloth/src/parser/mod.rs | 1 | ||||
| -rw-r--r-- | crates/sloth_bytecode/Cargo.toml | 5 | ||||
| -rw-r--r-- | crates/sloth_bytecode/macros/Cargo.toml | 13 | ||||
| -rw-r--r-- | crates/sloth_bytecode/macros/src/lib.rs | 153 | ||||
| -rw-r--r-- | crates/sloth_bytecode/src/lib.rs | 203 |
10 files changed, 759 insertions, 81 deletions
@@ -3,6 +3,12 @@ version = 3 [[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] name = "either" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -19,9 +25,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.53" +version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" +checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" dependencies = [ "unicode-ident", ] @@ -46,6 +52,19 @@ dependencies = [ [[package]] name = "sloth_bytecode" version = "0.1.0" +dependencies = [ + "byteorder", + "sloth_bytecode_macros", +] + +[[package]] +name = "sloth_bytecode_macros" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "sloth_vm" @@ -53,9 +72,9 @@ version = "0.1.0" [[package]] name = "syn" -version = "2.0.8" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc02725fd69ab9f26eab07fad303e2497fad6fb9eba4f96c4d1687bdf704ad9" +checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927" dependencies = [ "proc-macro2", "quote", @@ -2,6 +2,7 @@ members = [ "crates/sloth", "crates/sloth_bytecode", + "crates/sloth_bytecode/macros", "crates/sloth_vm", ] diff --git a/crates/sloth/src/lexer.rs b/crates/sloth/src/lexer.rs index 8631eef..2d3b25b 100644 --- a/crates/sloth/src/lexer.rs +++ b/crates/sloth/src/lexer.rs @@ -1,5 +1,10 @@ #![allow(dead_code)] +//! TODO: Lexing Regex Literals +//! TODO: Lexing Character Literals + +use std::str::Chars; + use thiserror::Error; #[derive(Debug, Error)] @@ -8,7 +13,7 @@ pub enum LexerError { UnexpectedToken, } -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum TokenType { // Meta DocComment, @@ -39,6 +44,7 @@ pub enum TokenType { StarStarEq, // **= SlashEq, // /= PercEq, // %= + TildeEq, // ~= Amp, // & AmpAmp, // && @@ -51,12 +57,14 @@ pub enum TokenType { BangBang, // !! BangEq, // != - Lt, // < - LtLt, // << - LtEq, // <= - Gt, // > - GtGt, // >> - GtEq, // >= + Lt, // < + LtLt, // << + LtLtLt, // <<< + LtEq, // <= + Gt, // > + GtGt, // >> + GtGtGt, // >>> + GtEq, // >= Comma, @@ -70,7 +78,8 @@ pub enum TokenType { ColonColon, // :: SemiColon, // ; - Arrow, // -> + Arrow, // -> + FatArrow, // => // Keywords Val, @@ -91,23 +100,34 @@ pub enum TokenType { As, - // Misc - Literal(Literal), -} + // Literals + Integer(i128), + Float(f64), + Boolean(bool), + Character(char), + String(String), + Regex(String), -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum Literal { - Numeric, - Boolean, - Character, - String, - Regex, + Identifier(String), } -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy)] pub struct Location { - row: u32, - column: u32, + index: usize, + pub row: u32, + pub col: u32, +} + +impl Location { + fn advance(&mut self, len: usize, newline: bool) { + if newline { + self.row += 1; + self.col = 0; + } else { + self.col += 1; + } + self.index += len; + } } #[derive(Debug)] @@ -121,33 +141,384 @@ pub struct Token<'a> { pub struct Lexer<'a> { source: &'a [u8], + window: [char; 3], + chars: Chars<'a>, start: Location, - end: Location, + current: Location, } impl<'a> Lexer<'a> { - fn new(source: &'a str) -> Self { + pub(crate) fn new(source: &'a str) -> Self { + let mut chars = source.chars(); + let window = [ + chars.next().unwrap_or('\0'), + chars.next().unwrap_or('\0'), + chars.next().unwrap_or('\0'), + ]; + Self { source: source.as_bytes(), + window, + chars, start: Default::default(), - end: Default::default(), + current: Default::default(), + } + } +} + +impl<'a> Lexer<'a> { + fn pos(&self) -> usize { + self.current.index + } + + fn peek(&self) -> char { + self.window[0] + } + + fn eof(&self) -> bool { + self.peek() == '\0' + } + + fn advance(&mut self) -> char { + let current = self.window[0]; + self.window = [ + self.window[1], + self.window[2], + self.chars.next().unwrap_or('\0'), + ]; + self.current.advance(current.len_utf8(), current == '\n'); + current + } + + fn advance_with(&mut self, with: TokenType) -> TokenType { + self.advance(); + with + } + + fn advance_by(&mut self, amount: usize) { + for _ in 0..amount { + self.advance(); + } + } + + fn advance_by_with(&mut self, amount: usize, with: TokenType) -> TokenType { + self.advance_by(amount); + with + } + + fn advance_while(&mut self, predicate: impl Fn([char; 3]) -> bool) { + while !self.eof() && predicate(self.window) { + self.advance(); } } } +impl<'a> Lexer<'a> { + fn lex_number(&mut self) -> TokenType { + let mut value = self.advance().to_string(); + + while self.peek().is_ascii_digit() { + value.push(self.advance()); + } + + if self.peek() == '.' { + value.push(self.advance()); + + while self.peek().is_ascii_digit() { + value.push(self.advance()); + } + + TokenType::Float(value.parse::<f64>().expect("Expected float")) + } else { + TokenType::Integer(value.parse::<i128>().expect("Expected integer")) + } + } + + fn lex_string(&mut self) -> TokenType { + let mut value = String::new(); + + self.advance(); + loop { + match self.window { + ['\\', '"', ..] => { + self.advance_by(2); + value.push('"'); + } + ['\\', 't', ..] => { + self.advance_by(2); + value.push('\t'); + } + ['\\', 'n', ..] => { + self.advance_by(2); + value.push('\n'); + } + ['"', ..] => { + self.advance(); + break; + } + _ => { + value.push(self.advance()); + continue; + } + } + } + + TokenType::String(value) + } +} + impl<'a> Iterator for Lexer<'a> { type Item = Token<'a>; fn next(&mut self) -> Option<Self::Item> { - unimplemented!() + // Skipping whitespace + self.advance_while(|it| it[0].is_whitespace()); + self.start = self.current; + + // If were at the end of the file return nothing + if self.eof() { + return None; + } + + // Figuring out the token type + let tt = match self.window { + ['#', '#', ..] => { + self.advance_while(|it| it[0] != '\n'); + TokenType::DocComment + } + + ['#', ..] => { + self.advance_while(|it| it[0] != '\n'); + TokenType::Comment + } + + // Blocks + ['(', ..] => self.advance_with(TokenType::OpeningParen), + [')', ..] => self.advance_with(TokenType::ClosingParen), + ['[', ..] => self.advance_with(TokenType::OpeningBracket), + [']', ..] => self.advance_with(TokenType::ClosingBracket), + ['{', ..] => self.advance_with(TokenType::OpeningBrace), + ['}', ..] => self.advance_with(TokenType::ClosingBrace), + + // Operators + ['-', '>', ..] => self.advance_by_with(2, TokenType::Arrow), + ['=', '>', ..] => self.advance_by_with(2, TokenType::FatArrow), + + ['+', '+', '='] => self.advance_by_with(3, TokenType::PlusPlusEq), + ['*', '*', '='] => self.advance_by_with(3, TokenType::StarStarEq), + ['+', '+', ..] => self.advance_by_with(2, TokenType::PlusPlus), + ['*', '*', ..] => self.advance_by_with(2, TokenType::StarStar), + + ['+', '=', ..] => self.advance_by_with(2, TokenType::PlusEq), + ['-', '=', ..] => self.advance_by_with(2, TokenType::MinusEq), + ['*', '=', ..] => self.advance_by_with(2, TokenType::StarEq), + ['/', '=', ..] => self.advance_by_with(2, TokenType::SlashEq), + ['%', '=', ..] => self.advance_by_with(2, TokenType::PercEq), + ['~', '=', ..] => self.advance_by_with(2, TokenType::TildeEq), + + ['+', ..] => self.advance_with(TokenType::Plus), + ['-', ..] => self.advance_with(TokenType::Minus), + ['*', ..] => self.advance_with(TokenType::Star), + ['/', ..] => self.advance_with(TokenType::Slash), // TODO: Check for regex literals + ['%', ..] => self.advance_with(TokenType::Perc), + ['~', ..] => self.advance_with(TokenType::Tilde), + + ['&', '&', ..] => self.advance_by_with(2, TokenType::AmpAmp), + ['&', ..] => self.advance_with(TokenType::Amp), + + ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe), + ['|', ..] => self.advance_with(TokenType::Pipe), + + ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq), + ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq), + ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang), + ['=', ..] => self.advance_with(TokenType::Eq), + ['!', ..] => self.advance_with(TokenType::Bang), + + ['<', '<', '<'] => self.advance_by_with(3, TokenType::LtLtLt), + ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt), + ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq), + ['<', ..] => self.advance_with(TokenType::Lt), + + ['>', '>', '>'] => self.advance_by_with(3, TokenType::GtGtGt), + ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt), + ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq), + ['>', ..] => self.advance_with(TokenType::Gt), + + [',', ..] => self.advance_with(TokenType::Comma), + + ['.', '.', ..] => self.advance_by_with(2, TokenType::DotDot), + ['.', ..] => self.advance_with(TokenType::Dot), + ['?', '?', ..] => self.advance_by_with(2, TokenType::QuestionQuestion), + ['?', '.', ..] => self.advance_by_with(2, TokenType::QuestionDot), + ['?', ..] => self.advance_with(TokenType::Question), + + [';', ..] => self.advance_with(TokenType::SemiColon), + [':', ':', ..] => self.advance_by_with(2, TokenType::ColonColon), + [':', ..] => self.advance_with(TokenType::Colon), + + // Literals + ['0'..='9', ..] => self.lex_number(), + ['"', ..] => self.lex_string(), + + ['a'..='z' | 'A'..='Z' | '_', ..] => { + let mut value = String::new(); + while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') { + value.push(self.advance()); + } + + match value.as_str() { + "val" => TokenType::Val, + "var" => TokenType::Var, + "fn" => TokenType::Fn, + "if" => TokenType::If, + "else" => TokenType::Else, + "while" => TokenType::While, + "for" => TokenType::For, + "in" => TokenType::In, + "loop" => TokenType::Loop, + "break" => TokenType::Break, + "continue" => TokenType::Continue, + "as" => TokenType::As, + "true" => TokenType::Boolean(true), + "false" => TokenType::Boolean(false), + _ => TokenType::Identifier(value), + } + } + + _ => panic!("Error while parsing"), + }; + + let lexeme = unsafe { + // At this point it is already known that the string is valid UTF-8, might + // aswell not check again + std::str::from_utf8_unchecked(&self.source[self.start.index..self.pos()]) + }; + + let token = Token { + tt, + lexeme, + start: self.start, + end: self.current, + }; + + Some(token) } } #[cfg(test)] mod tests { + use itertools::Itertools; + + use super::{Lexer, TokenType}; + #[test] - fn basic_test_a() { - // + fn lex_operators() { + let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || = == ! !! != < << <<< \ + <= > >> >>> >= , ? ?. ?? . .. : :: ; -> =>"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Plus, + TokenType::PlusPlus, + TokenType::Minus, + TokenType::Star, + TokenType::StarStar, + TokenType::Slash, + TokenType::Perc, + TokenType::Tilde, + TokenType::PlusEq, + TokenType::PlusPlusEq, + TokenType::MinusEq, + TokenType::StarEq, + TokenType::StarStarEq, + TokenType::SlashEq, + TokenType::PercEq, + TokenType::TildeEq, + TokenType::Amp, + TokenType::AmpAmp, + TokenType::Pipe, + TokenType::PipePipe, + TokenType::Eq, + TokenType::EqEq, + TokenType::Bang, + TokenType::BangBang, + TokenType::BangEq, + TokenType::Lt, + TokenType::LtLt, + TokenType::LtLtLt, + TokenType::LtEq, + TokenType::Gt, + TokenType::GtGt, + TokenType::GtGtGt, + TokenType::GtEq, + TokenType::Comma, + TokenType::Question, + TokenType::QuestionDot, + TokenType::QuestionQuestion, + TokenType::Dot, + TokenType::DotDot, + TokenType::Colon, + TokenType::ColonColon, + TokenType::SemiColon, + TokenType::Arrow, + TokenType::FatArrow, + ]); + } + + #[test] + fn lex_keywords() { + let source = "val var fn if else while for in loop break continue as true false"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Val, + TokenType::Var, + TokenType::Fn, + TokenType::If, + TokenType::Else, + TokenType::While, + TokenType::For, + TokenType::In, + TokenType::Loop, + TokenType::Break, + TokenType::Continue, + TokenType::As, + TokenType::Boolean(true), + TokenType::Boolean(false), + ]); + } + + #[test] + fn lex_literals_a() { + let source = "iden \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" 93 3252 238 -382 -832 \ + 83 -25 52.9 83.7 12.4 35.2 3.3"; + let tokens = Lexer::new(source).map(|it| it.tt).collect_vec(); + + assert_eq!(&tokens, &[ + TokenType::Identifier("iden".to_owned()), + TokenType::String("foo".to_owned()), + TokenType::String("bar".to_owned()), + TokenType::String("baz".to_owned()), + TokenType::String("\"".to_owned()), + TokenType::String("\n".to_owned()), + TokenType::String("\t".to_owned()), + TokenType::Integer(93), + TokenType::Integer(3252), + TokenType::Integer(238), + TokenType::Minus, + TokenType::Integer(382), + TokenType::Minus, + TokenType::Integer(832), + TokenType::Integer(83), + TokenType::Minus, + TokenType::Integer(25), + TokenType::Float(52.9), + TokenType::Float(83.7), + TokenType::Float(12.4), + TokenType::Float(35.2), + TokenType::Float(3.3), + ]); } } diff --git a/crates/sloth/src/main.rs b/crates/sloth/src/main.rs index 89ce7f9..6502f19 100644 --- a/crates/sloth/src/main.rs +++ b/crates/sloth/src/main.rs @@ -1,4 +1,3 @@ -#![feature(test, let_chains)] #![warn( clippy::wildcard_imports, clippy::string_add, @@ -8,10 +7,12 @@ )] pub mod lexer; +pub mod parser; use std::{env, fs}; use itertools::Itertools; +use lexer::Lexer; fn main() { let args = env::args().collect_vec(); @@ -23,10 +24,15 @@ fn main() { } let source_path = &args[1]; - let Ok(_source) = fs::read_to_string(source_path) else { + let Ok(source) = fs::read_to_string(source_path) else { println!("Error while reading '{source_path}'"); return; }; + let lexer = Lexer::new(&source); + for token in lexer { + println!("{token:?}"); + } + // TODO: } diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs new file mode 100644 index 0000000..139597f --- /dev/null +++ b/crates/sloth/src/parser/ast.rs @@ -0,0 +1,2 @@ + + diff --git a/crates/sloth/src/parser/mod.rs b/crates/sloth/src/parser/mod.rs new file mode 100644 index 0000000..851c0bc --- /dev/null +++ b/crates/sloth/src/parser/mod.rs @@ -0,0 +1 @@ +pub mod ast; diff --git a/crates/sloth_bytecode/Cargo.toml b/crates/sloth_bytecode/Cargo.toml index a302c81..981b6ee 100644 --- a/crates/sloth_bytecode/Cargo.toml +++ b/crates/sloth_bytecode/Cargo.toml @@ -2,3 +2,8 @@ name = "sloth_bytecode" version = "0.1.0" edition = "2021" + +[dependencies] +sloth_bytecode_macros = { path = "./macros" } + +byteorder = "1.4.3" diff --git a/crates/sloth_bytecode/macros/Cargo.toml b/crates/sloth_bytecode/macros/Cargo.toml new file mode 100644 index 0000000..c75bc58 --- /dev/null +++ b/crates/sloth_bytecode/macros/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "sloth_bytecode_macros" +version = "0.1.0" +edition = "2021" + +[dependencies] +proc-macro2 = "1.0.54" +quote = "1.0.26" +syn = "2.0.12" + +[lib] +proc-macro = true + diff --git a/crates/sloth_bytecode/macros/src/lib.rs b/crates/sloth_bytecode/macros/src/lib.rs new file mode 100644 index 0000000..e07a027 --- /dev/null +++ b/crates/sloth_bytecode/macros/src/lib.rs @@ -0,0 +1,153 @@ +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; +use syn::parse::Parse; +use syn::punctuated::Punctuated; +use syn::{bracketed, parse_macro_input, LitInt, LitStr, Token}; + +struct DslInstructionInput { + opcode: LitInt, + name: Ident, + args: Punctuated<Ident, Token![,]>, + description: LitStr, +} + +impl Parse for DslInstructionInput { + fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> { + let args_content; + Ok(Self { + opcode: input.parse()?, + name: input.parse()?, + args: { + bracketed!(args_content in input); + args_content.parse_terminated(Ident::parse, Token![,])? + }, + description: input.parse()?, + }) + } +} + +struct DslInstructionsInput { + name: Ident, + instructions: Punctuated<DslInstructionInput, Token![,]>, +} + +impl Parse for DslInstructionsInput { + fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> { + Ok(Self { + name: input.parse()?, + instructions: { + input.parse::<Token![;]>()?; + input.parse_terminated(DslInstructionInput::parse, Token![,])? + }, + }) + } +} + +fn into_enum_field(instruction: &DslInstructionInput) -> TokenStream { + let DslInstructionInput { + opcode, + name, + args, + description, + } = instruction; + + let args = args.iter(); + + quote! { + #[doc = #description] + #name ( #( #args ),* ) = #opcode + } +} + +fn into_bytecode_parser(instruction: &DslInstructionInput) -> TokenStream { + let DslInstructionInput { + opcode, + name, + args, + description: _, + } = instruction; + + let args = args.iter().map(|arg| { + let read_ident = format_ident!("read_{}", arg); + + let _chunk_codes = arg; + + quote! { + { + let a: #arg = (chunk.code[*offset] << 56) + (chunk) + cursor . #read_ident ::<byteorder::LittleEndian>().unwrap() + } + } + }); + + quote! { + #opcode => { + Self:: #name ( + #( #args ),* + ) + } + } +} + +#[proc_macro] +pub fn instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DslInstructionsInput); + + // Getting values to construct the enum + let enum_name = input.name; + let enum_fields = input + .instructions + .iter() + .map(into_enum_field) + .collect::<Vec<_>>(); + + // Getting the values to parse bytecode + let bytecode_parsers = input + .instructions + .iter() + .map(into_bytecode_parser) + .collect::<Vec<_>>(); + + // Building out the expanded code + let expanded = quote! { + #[repr(u8)] + #[derive(Clone, Debug)] + enum #enum_name { + #( #enum_fields ),* + } + + impl #enum_name { + fn disassemble(chunk: &Chunk, offset: &mut usize) -> #enum_name { + let opcode = chunk.code[*offset]; + *offset += 1; + + let instruction = match opcode { + #( #bytecode_parsers ),* + _ => panic!("Unknown bytecode encountered"), + }; + + instruction + } + + fn assemble(chunk: &mut Chunk) { + // + } + } + + // impl #enum_name { + // fn from_bytecode(cursor: &mut Cursor<Vec<u8>>) -> Self { + // let bytecode = cursor.read_u8().unwrap(); + // + // let instruction = match bytecode { + // #( #bytecode_parsers ),* + // _ => panic!("Unknown bytecode encountered"), + // }; + // + // instruction + // } + // } + }; + + // Returning the proc_macro version of TokenStream + expanded.into() +} diff --git a/crates/sloth_bytecode/src/lib.rs b/crates/sloth_bytecode/src/lib.rs index f814f86..dbf53ae 100644 --- a/crates/sloth_bytecode/src/lib.rs +++ b/crates/sloth_bytecode/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(macro_metavar_expr)] #![allow(dead_code)] #![warn( clippy::wildcard_imports, @@ -8,56 +7,164 @@ unused_lifetimes )] -macro_rules! instructions { - ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal ),* ) => { - #[repr(u8)] - enum Instruction { - $( - #[doc = $doc] - $name ( $( $v_type ),* ) = $opcode - ),* - } +use std::io::Cursor; - impl Instruction { - fn opcode(&self) -> u8 { - match self { - $( - Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode - ),* - } - } - - fn from_bytecode(bytecode: &[u8]) -> Option<Self> { - if bytecode.is_empty() { - return None; - } - - let opcode = bytecode[0]; - let instruction = match opcode { - $( - $opcode => { - // TODO: Get the actual values - Some(Self::$name ( $( 0 ${ignore(v_type)} ),* )) - } - ),*, - _ => None, - }; - - instruction - } - } - } +use byteorder::ReadBytesExt; +// use sloth_bytecode_macros::instructions; + +pub struct Chunk { + pub code: Vec<u8>, + pub constants: Vec<u64>, +} + +// instructions! { +// Instructions; +// +// 0x00 Constant [u64] "Push a constant value onto the stack", +// +// 0x01 Pop [] "Pop a value from the stack", +// 0x02 Dup [] "Duplicate a value on the stack", +// +// 0x10 Add [] "Add the last 2 values on the stack", +// 0x11 Sub [] "Subtract the last 2 values on the stack", +// 0x12 Mul [] "Multiply the last 2 values on the stack", +// 0x13 Div [] "Divide the last 2 values on the stack", +// 0x14 Mod [] "Modulo the last 2 values on the stack" +// } + +// impl Instructions { +// fn disassemble(chunk: &Chunk, offset: &mut usize) { +// // +// } +// +// fn assemble(chunk: &mut Chunk) { +// // +// } +// } + +// #[test] +// fn test() { +// let mut cursor = Cursor::new(vec![0, 1, 0, 0, 1, 0, 0, 0, 0]); +// let instruction = Instructions::from_bytecode(&mut cursor); +// println!("{instruction:?}"); +// assert!(1 == 0); +// } + +// macro_rules! instructions { +// ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal +// ),* ) => { #[repr(u8)] +// enum Instruction { +// $( +// #[doc = $doc] +// $name ( $( $v_type ),* ) = $opcode +// ),* +// } +// +// impl Instruction { +// fn opcode(&self) -> u8 { +// match self { +// $( +// Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode +// ),* +// } +// } +// +// fn from_bytecode(bytecode: &[u8]) -> Option<Self> { +// if bstecode.is_empty() { +// return None; +// } +// +// let opcode = bytecode[0]; +// let instruction = match opcode { +// $( +// $opcode => { +// // TODO: Get the actual values +// Some(Self::$name ( $( 0 ${ignore(v_type)} ),* )) +// } +// ),*, +// _ => None, +// }; +// +// instruction +// } +// } +// } +// } + +// instructions! { +// Instructions; +// +// 0x00 Constant [u64] "Push a constant value onto the stack", +// +// 0x01 Pop [] "Pop a value from the stack", +// 0x02 Dup [] "Duplicate a value on the stack", +// +// 0x10 Add [] "Add the last 2 values on the stack", +// 0x11 Sub [] "Subtract the last 2 values on the stack", +// 0x12 Mul [] "Multiply the last 2 values on the stack", +// 0x13 Div [] "Divide the last 2 values on the stack", +// 0x14 Mod [] "Modulo the last 2 values on the stack" +// } + +pub enum Error { + UnknownOpcode(u8), + InvalidArguments, + Eof, } -instructions! { - 0x00 Constant [u64] "Push a constant value onto the stack", +pub enum Instruction { + Constant(u64), - 0x01 Pop [] "Pop a value from the stack", - 0x02 Dup [] "Duplicate a value on the stack", + Pop(), + Dup(), - 0x10 Add [] "Add the last 2 values on the stack", - 0x11 Sub [] "Subtract the last 2 values on the stack", - 0x12 Mul [] "Multiply the last 2 values on the stack", - 0x13 Div [] "Divide the last 2 values on the stack", - 0x14 Mod [] "Modulo the last 2 values on the stack" + Add(), + Sub(), + Mul(), + Div(), + Mod(), } + +// fn parse_bytecode(pos: usize, bc: &[u8]) -> Result<Bytecode, BytecodeError> { +// let Some(opcode) = bc.get(pos) else { +// return Err(BytecodeError::Eof); +// }; +// +// let instruction = match opcode { +// 0x00 => { +// // let arg0: [u8; 8] = bc.get(1..1+size_of::<u64>()).unwrap(); +// let arg0 = u64::from_ne_bytes(arg0); +// } +// _ => return Err(BytecodeError::UnknownOpcode(opcode)), +// } +// +// todo!() +// } + +fn parse_bytecode(cursor: &mut Cursor<&[u8]>) -> Result<Instruction, Error> { + let Ok(opcode) = cursor.read_u8() else { + return Err(Error::Eof); + }; + + let instruction = match opcode { + 0x00 => { + let arg0 = cursor + .read_u64::<byteorder::LittleEndian>() + .map_err(|_| Error::InvalidArguments)?; + + Instruction::Constant(arg0) + } + _ => return Err(Error::UnknownOpcode(opcode)), + }; + + Ok(instruction) +} + +// impl<T: Iterator<Item = u8>> TryFrom<T> for Bytecode { +// type Error = BytecodeError; +// +// fn try_from(value: T) -> Result<Self, Self::Error> { +// todo!() +// // +// } +// } |
