aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock27
-rw-r--r--Cargo.toml1
-rw-r--r--crates/sloth/src/lexer.rs425
-rw-r--r--crates/sloth/src/main.rs10
-rw-r--r--crates/sloth/src/parser/ast.rs2
-rw-r--r--crates/sloth/src/parser/mod.rs1
-rw-r--r--crates/sloth_bytecode/Cargo.toml5
-rw-r--r--crates/sloth_bytecode/macros/Cargo.toml13
-rw-r--r--crates/sloth_bytecode/macros/src/lib.rs153
-rw-r--r--crates/sloth_bytecode/src/lib.rs203
10 files changed, 759 insertions, 81 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 3bd0eb7..157650a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,6 +3,12 @@
version = 3
[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
name = "either"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -19,9 +25,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
-version = "1.0.53"
+version = "1.0.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73"
+checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
dependencies = [
"unicode-ident",
]
@@ -46,6 +52,19 @@ dependencies = [
[[package]]
name = "sloth_bytecode"
version = "0.1.0"
+dependencies = [
+ "byteorder",
+ "sloth_bytecode_macros",
+]
+
+[[package]]
+name = "sloth_bytecode_macros"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
[[package]]
name = "sloth_vm"
@@ -53,9 +72,9 @@ version = "0.1.0"
[[package]]
name = "syn"
-version = "2.0.8"
+version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc02725fd69ab9f26eab07fad303e2497fad6fb9eba4f96c4d1687bdf704ad9"
+checksum = "79d9531f94112cfc3e4c8f5f02cb2b58f72c97b7efd85f70203cc6d8efda5927"
dependencies = [
"proc-macro2",
"quote",
diff --git a/Cargo.toml b/Cargo.toml
index bc46f62..e4a78d0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,7 @@
members = [
"crates/sloth",
"crates/sloth_bytecode",
+ "crates/sloth_bytecode/macros",
"crates/sloth_vm",
]
diff --git a/crates/sloth/src/lexer.rs b/crates/sloth/src/lexer.rs
index 8631eef..2d3b25b 100644
--- a/crates/sloth/src/lexer.rs
+++ b/crates/sloth/src/lexer.rs
@@ -1,5 +1,10 @@
#![allow(dead_code)]
+//! TODO: Lexing Regex Literals
+//! TODO: Lexing Character Literals
+
+use std::str::Chars;
+
use thiserror::Error;
#[derive(Debug, Error)]
@@ -8,7 +13,7 @@ pub enum LexerError {
UnexpectedToken,
}
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// Meta
DocComment,
@@ -39,6 +44,7 @@ pub enum TokenType {
StarStarEq, // **=
SlashEq, // /=
PercEq, // %=
+ TildeEq, // ~=
Amp, // &
AmpAmp, // &&
@@ -51,12 +57,14 @@ pub enum TokenType {
BangBang, // !!
BangEq, // !=
- Lt, // <
- LtLt, // <<
- LtEq, // <=
- Gt, // >
- GtGt, // >>
- GtEq, // >=
+ Lt, // <
+ LtLt, // <<
+ LtLtLt, // <<<
+ LtEq, // <=
+ Gt, // >
+ GtGt, // >>
+ GtGtGt, // >>>
+ GtEq, // >=
Comma,
@@ -70,7 +78,8 @@ pub enum TokenType {
ColonColon, // ::
SemiColon, // ;
- Arrow, // ->
+ Arrow, // ->
+ FatArrow, // =>
// Keywords
Val,
@@ -91,23 +100,34 @@ pub enum TokenType {
As,
- // Misc
- Literal(Literal),
-}
+ // Literals
+ Integer(i128),
+ Float(f64),
+ Boolean(bool),
+ Character(char),
+ String(String),
+ Regex(String),
-#[derive(Debug, Clone, Eq, PartialEq)]
-pub enum Literal {
- Numeric,
- Boolean,
- Character,
- String,
- Regex,
+ Identifier(String),
}
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Clone, Copy)]
pub struct Location {
- row: u32,
- column: u32,
+ index: usize,
+ pub row: u32,
+ pub col: u32,
+}
+
+impl Location {
+ fn advance(&mut self, len: usize, newline: bool) {
+ if newline {
+ self.row += 1;
+ self.col = 0;
+ } else {
+ self.col += 1;
+ }
+ self.index += len;
+ }
}
#[derive(Debug)]
@@ -121,33 +141,384 @@ pub struct Token<'a> {
pub struct Lexer<'a> {
source: &'a [u8],
+ window: [char; 3],
+ chars: Chars<'a>,
start: Location,
- end: Location,
+ current: Location,
}
impl<'a> Lexer<'a> {
- fn new(source: &'a str) -> Self {
+ pub(crate) fn new(source: &'a str) -> Self {
+ let mut chars = source.chars();
+ let window = [
+ chars.next().unwrap_or('\0'),
+ chars.next().unwrap_or('\0'),
+ chars.next().unwrap_or('\0'),
+ ];
+
Self {
source: source.as_bytes(),
+ window,
+ chars,
start: Default::default(),
- end: Default::default(),
+ current: Default::default(),
+ }
+ }
+}
+
+impl<'a> Lexer<'a> {
+ fn pos(&self) -> usize {
+ self.current.index
+ }
+
+ fn peek(&self) -> char {
+ self.window[0]
+ }
+
+ fn eof(&self) -> bool {
+ self.peek() == '\0'
+ }
+
+ fn advance(&mut self) -> char {
+ let current = self.window[0];
+ self.window = [
+ self.window[1],
+ self.window[2],
+ self.chars.next().unwrap_or('\0'),
+ ];
+ self.current.advance(current.len_utf8(), current == '\n');
+ current
+ }
+
+ fn advance_with(&mut self, with: TokenType) -> TokenType {
+ self.advance();
+ with
+ }
+
+ fn advance_by(&mut self, amount: usize) {
+ for _ in 0..amount {
+ self.advance();
+ }
+ }
+
+ fn advance_by_with(&mut self, amount: usize, with: TokenType) -> TokenType {
+ self.advance_by(amount);
+ with
+ }
+
+ fn advance_while(&mut self, predicate: impl Fn([char; 3]) -> bool) {
+ while !self.eof() && predicate(self.window) {
+ self.advance();
}
}
}
+impl<'a> Lexer<'a> {
+ fn lex_number(&mut self) -> TokenType {
+ let mut value = self.advance().to_string();
+
+ while self.peek().is_ascii_digit() {
+ value.push(self.advance());
+ }
+
+ if self.peek() == '.' {
+ value.push(self.advance());
+
+ while self.peek().is_ascii_digit() {
+ value.push(self.advance());
+ }
+
+ TokenType::Float(value.parse::<f64>().expect("Expected float"))
+ } else {
+ TokenType::Integer(value.parse::<i128>().expect("Expected integer"))
+ }
+ }
+
+ fn lex_string(&mut self) -> TokenType {
+ let mut value = String::new();
+
+ self.advance();
+ loop {
+ match self.window {
+ ['\\', '"', ..] => {
+ self.advance_by(2);
+ value.push('"');
+ }
+ ['\\', 't', ..] => {
+ self.advance_by(2);
+ value.push('\t');
+ }
+ ['\\', 'n', ..] => {
+ self.advance_by(2);
+ value.push('\n');
+ }
+ ['"', ..] => {
+ self.advance();
+ break;
+ }
+ _ => {
+ value.push(self.advance());
+ continue;
+ }
+ }
+ }
+
+ TokenType::String(value)
+ }
+}
+
impl<'a> Iterator for Lexer<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
- unimplemented!()
+ // Skipping whitespace
+ self.advance_while(|it| it[0].is_whitespace());
+ self.start = self.current;
+
+ // If were at the end of the file return nothing
+ if self.eof() {
+ return None;
+ }
+
+ // Figuring out the token type
+ let tt = match self.window {
+ ['#', '#', ..] => {
+ self.advance_while(|it| it[0] != '\n');
+ TokenType::DocComment
+ }
+
+ ['#', ..] => {
+ self.advance_while(|it| it[0] != '\n');
+ TokenType::Comment
+ }
+
+ // Blocks
+ ['(', ..] => self.advance_with(TokenType::OpeningParen),
+ [')', ..] => self.advance_with(TokenType::ClosingParen),
+ ['[', ..] => self.advance_with(TokenType::OpeningBracket),
+ [']', ..] => self.advance_with(TokenType::ClosingBracket),
+ ['{', ..] => self.advance_with(TokenType::OpeningBrace),
+ ['}', ..] => self.advance_with(TokenType::ClosingBrace),
+
+ // Operators
+ ['-', '>', ..] => self.advance_by_with(2, TokenType::Arrow),
+ ['=', '>', ..] => self.advance_by_with(2, TokenType::FatArrow),
+
+ ['+', '+', '='] => self.advance_by_with(3, TokenType::PlusPlusEq),
+ ['*', '*', '='] => self.advance_by_with(3, TokenType::StarStarEq),
+ ['+', '+', ..] => self.advance_by_with(2, TokenType::PlusPlus),
+ ['*', '*', ..] => self.advance_by_with(2, TokenType::StarStar),
+
+ ['+', '=', ..] => self.advance_by_with(2, TokenType::PlusEq),
+ ['-', '=', ..] => self.advance_by_with(2, TokenType::MinusEq),
+ ['*', '=', ..] => self.advance_by_with(2, TokenType::StarEq),
+ ['/', '=', ..] => self.advance_by_with(2, TokenType::SlashEq),
+ ['%', '=', ..] => self.advance_by_with(2, TokenType::PercEq),
+ ['~', '=', ..] => self.advance_by_with(2, TokenType::TildeEq),
+
+ ['+', ..] => self.advance_with(TokenType::Plus),
+ ['-', ..] => self.advance_with(TokenType::Minus),
+ ['*', ..] => self.advance_with(TokenType::Star),
+ ['/', ..] => self.advance_with(TokenType::Slash), // TODO: Check for regex literals
+ ['%', ..] => self.advance_with(TokenType::Perc),
+ ['~', ..] => self.advance_with(TokenType::Tilde),
+
+ ['&', '&', ..] => self.advance_by_with(2, TokenType::AmpAmp),
+ ['&', ..] => self.advance_with(TokenType::Amp),
+
+ ['|', '|', ..] => self.advance_by_with(2, TokenType::PipePipe),
+ ['|', ..] => self.advance_with(TokenType::Pipe),
+
+ ['=', '=', ..] => self.advance_by_with(2, TokenType::EqEq),
+ ['!', '=', ..] => self.advance_by_with(2, TokenType::BangEq),
+ ['!', '!', ..] => self.advance_by_with(2, TokenType::BangBang),
+ ['=', ..] => self.advance_with(TokenType::Eq),
+ ['!', ..] => self.advance_with(TokenType::Bang),
+
+ ['<', '<', '<'] => self.advance_by_with(3, TokenType::LtLtLt),
+ ['<', '<', ..] => self.advance_by_with(2, TokenType::LtLt),
+ ['<', '=', ..] => self.advance_by_with(2, TokenType::LtEq),
+ ['<', ..] => self.advance_with(TokenType::Lt),
+
+ ['>', '>', '>'] => self.advance_by_with(3, TokenType::GtGtGt),
+ ['>', '>', ..] => self.advance_by_with(2, TokenType::GtGt),
+ ['>', '=', ..] => self.advance_by_with(2, TokenType::GtEq),
+ ['>', ..] => self.advance_with(TokenType::Gt),
+
+ [',', ..] => self.advance_with(TokenType::Comma),
+
+ ['.', '.', ..] => self.advance_by_with(2, TokenType::DotDot),
+ ['.', ..] => self.advance_with(TokenType::Dot),
+ ['?', '?', ..] => self.advance_by_with(2, TokenType::QuestionQuestion),
+ ['?', '.', ..] => self.advance_by_with(2, TokenType::QuestionDot),
+ ['?', ..] => self.advance_with(TokenType::Question),
+
+ [';', ..] => self.advance_with(TokenType::SemiColon),
+ [':', ':', ..] => self.advance_by_with(2, TokenType::ColonColon),
+ [':', ..] => self.advance_with(TokenType::Colon),
+
+ // Literals
+ ['0'..='9', ..] => self.lex_number(),
+ ['"', ..] => self.lex_string(),
+
+ ['a'..='z' | 'A'..='Z' | '_', ..] => {
+ let mut value = String::new();
+ while matches!(self.peek(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
+ value.push(self.advance());
+ }
+
+ match value.as_str() {
+ "val" => TokenType::Val,
+ "var" => TokenType::Var,
+ "fn" => TokenType::Fn,
+ "if" => TokenType::If,
+ "else" => TokenType::Else,
+ "while" => TokenType::While,
+ "for" => TokenType::For,
+ "in" => TokenType::In,
+ "loop" => TokenType::Loop,
+ "break" => TokenType::Break,
+ "continue" => TokenType::Continue,
+ "as" => TokenType::As,
+ "true" => TokenType::Boolean(true),
+ "false" => TokenType::Boolean(false),
+ _ => TokenType::Identifier(value),
+ }
+ }
+
+ _ => panic!("Error while parsing"),
+ };
+
+ let lexeme = unsafe {
+ // At this point it is already known that the string is valid UTF-8, might
+ // aswell not check again
+ std::str::from_utf8_unchecked(&self.source[self.start.index..self.pos()])
+ };
+
+ let token = Token {
+ tt,
+ lexeme,
+ start: self.start,
+ end: self.current,
+ };
+
+ Some(token)
}
}
#[cfg(test)]
mod tests {
+ use itertools::Itertools;
+
+ use super::{Lexer, TokenType};
+
#[test]
- fn basic_test_a() {
- //
+ fn lex_operators() {
+ let source = "+ ++ - * ** / % ~ += ++= -= *= **= /= %= ~= & && | || = == ! !! != < << <<< \
+ <= > >> >>> >= , ? ?. ?? . .. : :: ; -> =>";
+ let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+ assert_eq!(&tokens, &[
+ TokenType::Plus,
+ TokenType::PlusPlus,
+ TokenType::Minus,
+ TokenType::Star,
+ TokenType::StarStar,
+ TokenType::Slash,
+ TokenType::Perc,
+ TokenType::Tilde,
+ TokenType::PlusEq,
+ TokenType::PlusPlusEq,
+ TokenType::MinusEq,
+ TokenType::StarEq,
+ TokenType::StarStarEq,
+ TokenType::SlashEq,
+ TokenType::PercEq,
+ TokenType::TildeEq,
+ TokenType::Amp,
+ TokenType::AmpAmp,
+ TokenType::Pipe,
+ TokenType::PipePipe,
+ TokenType::Eq,
+ TokenType::EqEq,
+ TokenType::Bang,
+ TokenType::BangBang,
+ TokenType::BangEq,
+ TokenType::Lt,
+ TokenType::LtLt,
+ TokenType::LtLtLt,
+ TokenType::LtEq,
+ TokenType::Gt,
+ TokenType::GtGt,
+ TokenType::GtGtGt,
+ TokenType::GtEq,
+ TokenType::Comma,
+ TokenType::Question,
+ TokenType::QuestionDot,
+ TokenType::QuestionQuestion,
+ TokenType::Dot,
+ TokenType::DotDot,
+ TokenType::Colon,
+ TokenType::ColonColon,
+ TokenType::SemiColon,
+ TokenType::Arrow,
+ TokenType::FatArrow,
+ ]);
+ }
+
+ #[test]
+ fn lex_keywords() {
+ let source = "val var fn if else while for in loop break continue as true false";
+ let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+ assert_eq!(&tokens, &[
+ TokenType::Val,
+ TokenType::Var,
+ TokenType::Fn,
+ TokenType::If,
+ TokenType::Else,
+ TokenType::While,
+ TokenType::For,
+ TokenType::In,
+ TokenType::Loop,
+ TokenType::Break,
+ TokenType::Continue,
+ TokenType::As,
+ TokenType::Boolean(true),
+ TokenType::Boolean(false),
+ ]);
+ }
+
+ #[test]
+ fn lex_literals_a() {
+ let source = "iden \"foo\" \"bar\" \"baz\" \"\\\"\" \"\\n\" \"\\t\" 93 3252 238 -382 -832 \
+ 83 -25 52.9 83.7 12.4 35.2 3.3";
+ let tokens = Lexer::new(source).map(|it| it.tt).collect_vec();
+
+ assert_eq!(&tokens, &[
+ TokenType::Identifier("iden".to_owned()),
+ TokenType::String("foo".to_owned()),
+ TokenType::String("bar".to_owned()),
+ TokenType::String("baz".to_owned()),
+ TokenType::String("\"".to_owned()),
+ TokenType::String("\n".to_owned()),
+ TokenType::String("\t".to_owned()),
+ TokenType::Integer(93),
+ TokenType::Integer(3252),
+ TokenType::Integer(238),
+ TokenType::Minus,
+ TokenType::Integer(382),
+ TokenType::Minus,
+ TokenType::Integer(832),
+ TokenType::Integer(83),
+ TokenType::Minus,
+ TokenType::Integer(25),
+ TokenType::Float(52.9),
+ TokenType::Float(83.7),
+ TokenType::Float(12.4),
+ TokenType::Float(35.2),
+ TokenType::Float(3.3),
+ ]);
}
}
diff --git a/crates/sloth/src/main.rs b/crates/sloth/src/main.rs
index 89ce7f9..6502f19 100644
--- a/crates/sloth/src/main.rs
+++ b/crates/sloth/src/main.rs
@@ -1,4 +1,3 @@
-#![feature(test, let_chains)]
#![warn(
clippy::wildcard_imports,
clippy::string_add,
@@ -8,10 +7,12 @@
)]
pub mod lexer;
+pub mod parser;
use std::{env, fs};
use itertools::Itertools;
+use lexer::Lexer;
fn main() {
let args = env::args().collect_vec();
@@ -23,10 +24,15 @@ fn main() {
}
let source_path = &args[1];
- let Ok(_source) = fs::read_to_string(source_path) else {
+ let Ok(source) = fs::read_to_string(source_path) else {
println!("Error while reading '{source_path}'");
return;
};
+ let lexer = Lexer::new(&source);
+ for token in lexer {
+ println!("{token:?}");
+ }
+
// TODO:
}
diff --git a/crates/sloth/src/parser/ast.rs b/crates/sloth/src/parser/ast.rs
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/crates/sloth/src/parser/ast.rs
@@ -0,0 +1,2 @@
+
+
diff --git a/crates/sloth/src/parser/mod.rs b/crates/sloth/src/parser/mod.rs
new file mode 100644
index 0000000..851c0bc
--- /dev/null
+++ b/crates/sloth/src/parser/mod.rs
@@ -0,0 +1 @@
+pub mod ast;
diff --git a/crates/sloth_bytecode/Cargo.toml b/crates/sloth_bytecode/Cargo.toml
index a302c81..981b6ee 100644
--- a/crates/sloth_bytecode/Cargo.toml
+++ b/crates/sloth_bytecode/Cargo.toml
@@ -2,3 +2,8 @@
name = "sloth_bytecode"
version = "0.1.0"
edition = "2021"
+
+[dependencies]
+sloth_bytecode_macros = { path = "./macros" }
+
+byteorder = "1.4.3"
diff --git a/crates/sloth_bytecode/macros/Cargo.toml b/crates/sloth_bytecode/macros/Cargo.toml
new file mode 100644
index 0000000..c75bc58
--- /dev/null
+++ b/crates/sloth_bytecode/macros/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "sloth_bytecode_macros"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+proc-macro2 = "1.0.54"
+quote = "1.0.26"
+syn = "2.0.12"
+
+[lib]
+proc-macro = true
+
diff --git a/crates/sloth_bytecode/macros/src/lib.rs b/crates/sloth_bytecode/macros/src/lib.rs
new file mode 100644
index 0000000..e07a027
--- /dev/null
+++ b/crates/sloth_bytecode/macros/src/lib.rs
@@ -0,0 +1,153 @@
+use proc_macro2::{Ident, TokenStream};
+use quote::{format_ident, quote};
+use syn::parse::Parse;
+use syn::punctuated::Punctuated;
+use syn::{bracketed, parse_macro_input, LitInt, LitStr, Token};
+
+struct DslInstructionInput {
+ opcode: LitInt,
+ name: Ident,
+ args: Punctuated<Ident, Token![,]>,
+ description: LitStr,
+}
+
+impl Parse for DslInstructionInput {
+ fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+ let args_content;
+ Ok(Self {
+ opcode: input.parse()?,
+ name: input.parse()?,
+ args: {
+ bracketed!(args_content in input);
+ args_content.parse_terminated(Ident::parse, Token![,])?
+ },
+ description: input.parse()?,
+ })
+ }
+}
+
+struct DslInstructionsInput {
+ name: Ident,
+ instructions: Punctuated<DslInstructionInput, Token![,]>,
+}
+
+impl Parse for DslInstructionsInput {
+ fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
+ Ok(Self {
+ name: input.parse()?,
+ instructions: {
+ input.parse::<Token![;]>()?;
+ input.parse_terminated(DslInstructionInput::parse, Token![,])?
+ },
+ })
+ }
+}
+
+fn into_enum_field(instruction: &DslInstructionInput) -> TokenStream {
+ let DslInstructionInput {
+ opcode,
+ name,
+ args,
+ description,
+ } = instruction;
+
+ let args = args.iter();
+
+ quote! {
+ #[doc = #description]
+ #name ( #( #args ),* ) = #opcode
+ }
+}
+
+fn into_bytecode_parser(instruction: &DslInstructionInput) -> TokenStream {
+ let DslInstructionInput {
+ opcode,
+ name,
+ args,
+ description: _,
+ } = instruction;
+
+ let args = args.iter().map(|arg| {
+ let read_ident = format_ident!("read_{}", arg);
+
+ let _chunk_codes = arg;
+
+ quote! {
+ {
+ let a: #arg = (chunk.code[*offset] << 56) + (chunk)
+ cursor . #read_ident ::<byteorder::LittleEndian>().unwrap()
+ }
+ }
+ });
+
+ quote! {
+ #opcode => {
+ Self:: #name (
+ #( #args ),*
+ )
+ }
+ }
+}
+
+#[proc_macro]
+pub fn instructions(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+ let input = parse_macro_input!(input as DslInstructionsInput);
+
+ // Getting values to construct the enum
+ let enum_name = input.name;
+ let enum_fields = input
+ .instructions
+ .iter()
+ .map(into_enum_field)
+ .collect::<Vec<_>>();
+
+ // Getting the values to parse bytecode
+ let bytecode_parsers = input
+ .instructions
+ .iter()
+ .map(into_bytecode_parser)
+ .collect::<Vec<_>>();
+
+ // Building out the expanded code
+ let expanded = quote! {
+ #[repr(u8)]
+ #[derive(Clone, Debug)]
+ enum #enum_name {
+ #( #enum_fields ),*
+ }
+
+ impl #enum_name {
+ fn disassemble(chunk: &Chunk, offset: &mut usize) -> #enum_name {
+ let opcode = chunk.code[*offset];
+ *offset += 1;
+
+ let instruction = match opcode {
+ #( #bytecode_parsers ),*
+ _ => panic!("Unknown bytecode encountered"),
+ };
+
+ instruction
+ }
+
+ fn assemble(chunk: &mut Chunk) {
+ //
+ }
+ }
+
+ // impl #enum_name {
+ // fn from_bytecode(cursor: &mut Cursor<Vec<u8>>) -> Self {
+ // let bytecode = cursor.read_u8().unwrap();
+ //
+ // let instruction = match bytecode {
+ // #( #bytecode_parsers ),*
+ // _ => panic!("Unknown bytecode encountered"),
+ // };
+ //
+ // instruction
+ // }
+ // }
+ };
+
+ // Returning the proc_macro version of TokenStream
+ expanded.into()
+}
diff --git a/crates/sloth_bytecode/src/lib.rs b/crates/sloth_bytecode/src/lib.rs
index f814f86..dbf53ae 100644
--- a/crates/sloth_bytecode/src/lib.rs
+++ b/crates/sloth_bytecode/src/lib.rs
@@ -1,4 +1,3 @@
-#![feature(macro_metavar_expr)]
#![allow(dead_code)]
#![warn(
clippy::wildcard_imports,
@@ -8,56 +7,164 @@
unused_lifetimes
)]
-macro_rules! instructions {
- ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal ),* ) => {
- #[repr(u8)]
- enum Instruction {
- $(
- #[doc = $doc]
- $name ( $( $v_type ),* ) = $opcode
- ),*
- }
+use std::io::Cursor;
- impl Instruction {
- fn opcode(&self) -> u8 {
- match self {
- $(
- Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode
- ),*
- }
- }
-
- fn from_bytecode(bytecode: &[u8]) -> Option<Self> {
- if bytecode.is_empty() {
- return None;
- }
-
- let opcode = bytecode[0];
- let instruction = match opcode {
- $(
- $opcode => {
- // TODO: Get the actual values
- Some(Self::$name ( $( 0 ${ignore(v_type)} ),* ))
- }
- ),*,
- _ => None,
- };
-
- instruction
- }
- }
- }
+use byteorder::ReadBytesExt;
+// use sloth_bytecode_macros::instructions;
+
+pub struct Chunk {
+ pub code: Vec<u8>,
+ pub constants: Vec<u64>,
+}
+
+// instructions! {
+// Instructions;
+//
+// 0x00 Constant [u64] "Push a constant value onto the stack",
+//
+// 0x01 Pop [] "Pop a value from the stack",
+// 0x02 Dup [] "Duplicate a value on the stack",
+//
+// 0x10 Add [] "Add the last 2 values on the stack",
+// 0x11 Sub [] "Subtract the last 2 values on the stack",
+// 0x12 Mul [] "Multiply the last 2 values on the stack",
+// 0x13 Div [] "Divide the last 2 values on the stack",
+// 0x14 Mod [] "Modulo the last 2 values on the stack"
+// }
+
+// impl Instructions {
+// fn disassemble(chunk: &Chunk, offset: &mut usize) {
+// //
+// }
+//
+// fn assemble(chunk: &mut Chunk) {
+// //
+// }
+// }
+
+// #[test]
+// fn test() {
+// let mut cursor = Cursor::new(vec![0, 1, 0, 0, 1, 0, 0, 0, 0]);
+// let instruction = Instructions::from_bytecode(&mut cursor);
+// println!("{instruction:?}");
+// assert!(1 == 0);
+// }
+
+// macro_rules! instructions {
+// ( $( $opcode:literal $name:ident [ $( $v_type:ident ),* ] $doc:literal
+// ),* ) => { #[repr(u8)]
+// enum Instruction {
+// $(
+// #[doc = $doc]
+// $name ( $( $v_type ),* ) = $opcode
+// ),*
+// }
+//
+// impl Instruction {
+// fn opcode(&self) -> u8 {
+// match self {
+// $(
+// Self::$name ( $( _ ${ignore(v_type)} ),* ) => $opcode
+// ),*
+// }
+// }
+//
+// fn from_bytecode(bytecode: &[u8]) -> Option<Self> {
+// if bstecode.is_empty() {
+// return None;
+// }
+//
+// let opcode = bytecode[0];
+// let instruction = match opcode {
+// $(
+// $opcode => {
+// // TODO: Get the actual values
+// Some(Self::$name ( $( 0 ${ignore(v_type)} ),* ))
+// }
+// ),*,
+// _ => None,
+// };
+//
+// instruction
+// }
+// }
+// }
+// }
+
+// instructions! {
+// Instructions;
+//
+// 0x00 Constant [u64] "Push a constant value onto the stack",
+//
+// 0x01 Pop [] "Pop a value from the stack",
+// 0x02 Dup [] "Duplicate a value on the stack",
+//
+// 0x10 Add [] "Add the last 2 values on the stack",
+// 0x11 Sub [] "Subtract the last 2 values on the stack",
+// 0x12 Mul [] "Multiply the last 2 values on the stack",
+// 0x13 Div [] "Divide the last 2 values on the stack",
+// 0x14 Mod [] "Modulo the last 2 values on the stack"
+// }
+
+pub enum Error {
+ UnknownOpcode(u8),
+ InvalidArguments,
+ Eof,
}
-instructions! {
- 0x00 Constant [u64] "Push a constant value onto the stack",
+pub enum Instruction {
+ Constant(u64),
- 0x01 Pop [] "Pop a value from the stack",
- 0x02 Dup [] "Duplicate a value on the stack",
+ Pop(),
+ Dup(),
- 0x10 Add [] "Add the last 2 values on the stack",
- 0x11 Sub [] "Subtract the last 2 values on the stack",
- 0x12 Mul [] "Multiply the last 2 values on the stack",
- 0x13 Div [] "Divide the last 2 values on the stack",
- 0x14 Mod [] "Modulo the last 2 values on the stack"
+ Add(),
+ Sub(),
+ Mul(),
+ Div(),
+ Mod(),
}
+
+// fn parse_bytecode(pos: usize, bc: &[u8]) -> Result<Bytecode, BytecodeError> {
+// let Some(opcode) = bc.get(pos) else {
+// return Err(BytecodeError::Eof);
+// };
+//
+// let instruction = match opcode {
+// 0x00 => {
+// // let arg0: [u8; 8] = bc.get(1..1+size_of::<u64>()).unwrap();
+// let arg0 = u64::from_ne_bytes(arg0);
+// }
+// _ => return Err(BytecodeError::UnknownOpcode(opcode)),
+// }
+//
+// todo!()
+// }
+
+fn parse_bytecode(cursor: &mut Cursor<&[u8]>) -> Result<Instruction, Error> {
+ let Ok(opcode) = cursor.read_u8() else {
+ return Err(Error::Eof);
+ };
+
+ let instruction = match opcode {
+ 0x00 => {
+ let arg0 = cursor
+ .read_u64::<byteorder::LittleEndian>()
+ .map_err(|_| Error::InvalidArguments)?;
+
+ Instruction::Constant(arg0)
+ }
+ _ => return Err(Error::UnknownOpcode(opcode)),
+ };
+
+ Ok(instruction)
+}
+
+// impl<T: Iterator<Item = u8>> TryFrom<T> for Bytecode {
+// type Error = BytecodeError;
+//
+// fn try_from(value: T) -> Result<Self, Self::Error> {
+// todo!()
+// //
+// }
+// }