From 3c531d4bbc754cc1dac6a050b30c3851eb7a2223 Mon Sep 17 00:00:00 2001 From: Nic Gaffney Date: Thu, 11 Jul 2024 22:07:03 -0500 Subject: chore(everything): Changed up some small stuff, comments and the like. totally not me forgetting what i put in this commit noooo... --- .gitignore | 3 +- build.zig | 8 ++++ examples/test1.gft | 2 +- src/main.zig | 13 ++++--- src/tokenize.zig | 111 ++++++++++++++++++++++++++++++++++------------------- 5 files changed, 90 insertions(+), 47 deletions(-) diff --git a/.gitignore b/.gitignore index 093eb95..d7e212f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,2 @@ -calico-out/ -zig-out/ +*-out/ .zig-cache/ diff --git a/build.zig b/build.zig index 052d68b..5627e43 100644 --- a/build.zig +++ b/build.zig @@ -31,8 +31,16 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + const token_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/tokenize.zig"), + .target = target, + .optimize = optimize, + }); + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + const run_token_unit_tests = b.addRunArtifact(token_unit_tests); const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_exe_unit_tests.step); + test_step.dependOn(&run_token_unit_tests.step); } diff --git a/examples/test1.gft b/examples/test1.gft index 16f566f..11388cd 100644 --- a/examples/test1.gft +++ b/examples/test1.gft @@ -1 +1 @@ -return 69; +exit 69; diff --git a/src/main.zig b/src/main.zig index 99b8130..c24f8a8 100644 --- a/src/main.zig +++ b/src/main.zig @@ -22,20 +22,23 @@ pub fn main() !void { std.fs.cwd().makeDir("calico-out") catch |err| if (err != error.PathAlreadyExists) return err; + // Setup native code writer const outFileName = try getFileName(gpa.allocator(), out_name, "asm"); defer gpa.allocator().free(outFileName); const outfile = try std.fs.cwd().createFile(outFileName, .{}); const outWriter = outfile.writer(); defer outfile.close(); - // Logic here to compile language + // Turn the input file into a string const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048); defer gpa.allocator().free(all); + // Tokenize var tokenizer = tok.Tokenizer.init(gpa.allocator(), all); defer tokenizer.deinit(); - var tokIter = tok.TokenIterator{ .tokens = try tokenizer.tokenize() }; + var tokIter = tok.Iterator(tok.Token).init((try tokenizer.tokenize()).items); + // Parse tokens try outWriter.print("global _start:\n", .{}); while (tokIter.next()) |t| { switch (t) { @@ -57,15 +60,14 @@ pub fn main() !void { , .{num.?.intLit}); gpa.allocator().free(t.ret); }, + // No other commands else => {}, } } // Run nasm and ld to build the executable // TODO: switch to qbe or llvm (preferabbly qbe) - const nasmFile = try getFileName(gpa.allocator(), out_name, "asm"); - defer gpa.allocator().free(nasmFile); - const nasmargv = [_][]const u8{ "nasm", "-felf64", nasmFile }; + const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName }; const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() }); defer gpa.allocator().free(nasmproc.stdout); defer gpa.allocator().free(nasmproc.stderr); @@ -80,6 +82,7 @@ pub fn main() !void { defer gpa.allocator().free(ldproc.stderr); } +/// Get file extension based on filename inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileType: []const u8) ![]const u8 { var hasDot: []const u8 = "."; if (fileType.len == 0) hasDot = ""; diff --git a/src/tokenize.zig b/src/tokenize.zig index 6225814..37080b4 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -2,74 +2,75 @@ const std = @import("std"); const TokenError = error{UnknownToken}; -const Token = union(enum) { +pub const Token = union(enum) { ret: []const u8, intLit: i32, + binaryOp: u8, semiCol, nil, }; -pub const TokenIterator = struct { - tokens: std.ArrayList(Token), - index: usize = 0, +/// Creates a tokenizer over a slice of typ +pub fn Iterator(comptime typ: type) type { + return struct { + items: []const typ, + index: usize = 0, - pub fn next(self: *TokenIterator) ?Token { - defer self.index = self.index + 1; - if (self.index >= self.tokens.items.len) return null; - return self.tokens.items[self.index]; - } -}; - -pub const StringIterator = struct { - string: []const u8, - index: usize = 0, - - pub fn init(string: []const u8) StringIterator { - return StringIterator{ .string = string }; - } + /// Initialize tokenizer with a slice + pub fn init(items: []const typ) Iterator(typ) { + return Iterator(typ){ .items = items }; + } - pub fn peek(self: StringIterator) ?u8 { - if (self.index >= self.string.len) return null; - return self.string[self.index]; - } + /// Get current item + pub fn peek(self: Iterator(typ)) ?typ { + if (self.index >= self.items.len) return null; + return self.items[self.index]; + } - pub fn consume(self: *StringIterator) ?u8 { - defer self.index += 1; - return self.peek(); - } + /// Get current item and iterate index + pub fn consume(self: *Iterator(typ)) ?typ { + defer self.index += 1; + return self.peek(); + } + /// Get current item and iterate index + pub const next = consume; - pub fn skip(self: *StringIterator) void { - self.index += 1; - } -}; + /// Skip over current item + pub fn skip(self: *Iterator(typ)) void { + self.index += 1; + } + }; +} +/// Tokenizes a string of source code pub const Tokenizer = struct { - src: StringIterator, + src: Iterator(u8), allocator: std.mem.Allocator, toks: std.ArrayList(Token), + /// Initializes a string of source code + /// Deinitialize with Tokenizer.deinit() pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer { return Tokenizer{ - .src = StringIterator.init(src), + .src = Iterator(u8).init(src), .allocator = allocator, .toks = std.ArrayList(Token).init(allocator), }; } + /// Releases allocated memory pub fn deinit(self: *Tokenizer) void { self.toks.deinit(); } + /// Returns an ArrayList of tokens pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) { var str = std.ArrayList(u8).init(self.allocator); defer str.deinit(); while (self.src.peek()) |char| { switch (char) { - ' ', '\n', '\t' => { - self.src.skip(); - continue; - }, + ' ', '\n', '\t' => self.src.skip(), '0'...'9' => { while (std.ascii.isDigit(self.src.peek().?)) try str.append(self.src.consume().?); @@ -91,12 +92,44 @@ pub const Tokenizer = struct { self.src.skip(); try self.toks.append(.semiCol); }, - '+', '-', '*', '/' => { - // Process operator - }, + '+', '-', '*', '/' => try self.toks.append(.{ .binaryOp = self.src.consume().? }), else => {}, } } return self.toks; } }; + +test "Tokenize" { + std.testing.log_level = std.log.Level.info; + const expect = std.testing.expect; + const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;"; + var toks = Tokenizer.init(std.testing.allocator, testSource); + defer toks.deinit(); + const arrtoks = try toks.tokenize(); + const expected = &[_]Token{ + .{ .ret = "exit" }, + .{ .intLit = 120 }, + .{ .binaryOp = '+' }, + .{ .intLit = 150 }, + .{ .binaryOp = '-' }, + .{ .intLit = 260 }, + .{ .binaryOp = '*' }, + .{ .intLit = 12 }, + .{ .binaryOp = '/' }, + .{ .intLit = 5 }, + .semiCol, + }; + for (arrtoks.items, expected) |act, exp| { + switch (act) { + .ret => |v| { + try expect(std.mem.eql(u8, v, exp.ret)); + std.testing.allocator.free(v); + }, + .intLit => |v| try expect(v == exp.intLit), + .semiCol => |v| try expect(v == exp.semiCol), + .binaryOp => |v| try expect(v == exp.binaryOp), + else => {}, + } + } +} -- cgit v1.2.3