From c955f2fb933865a50e791e2be91adbcb34bbadc9 Mon Sep 17 00:00:00 2001 From: Nic Gaffney Date: Fri, 28 Jun 2024 00:54:59 -0500 Subject: feat(tokenizer): Move tokenizer into a struct Moved the tokenizer into a struct and created a string iterator to make tokenizing easier --- src/main.zig | 45 ++++++++++++++----- src/tokenize.zig | 132 +++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 118 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/main.zig b/src/main.zig index 18239b1..99b8130 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,19 +4,27 @@ const tok = @import("tokenize.zig"); const gftCompilerError = error{NoInputFile}; pub fn main() !void { - if (std.os.argv.len != 2) return gftCompilerError.NoInputFile; + if (std.os.argv.len < 2) return gftCompilerError.NoInputFile; + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); + var args = std.process.args(); _ = args.skip(); const inputFileName = args.next(); + + var out_name: []const u8 = "out"; + if (std.os.argv.len == 3) out_name = args.next().?; + const inputFile = try std.fs.cwd().openFile(inputFileName.?, .{}); defer inputFile.close(); - std.fs.cwd().makeDir("out") catch |err| { + std.fs.cwd().makeDir("calico-out") catch |err| if (err != error.PathAlreadyExists) return err; - }; - const outfile = try std.fs.cwd().createFile("out/out.asm", .{}); + + const outFileName = try getFileName(gpa.allocator(), out_name, "asm"); + defer gpa.allocator().free(outFileName); + const outfile = try std.fs.cwd().createFile(outFileName, .{}); const outWriter = outfile.writer(); defer outfile.close(); @@ -24,9 +32,10 @@ pub fn main() !void { const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048); defer gpa.allocator().free(all); - const toks = try tok.tokenize(gpa.allocator(), all); - defer gpa.allocator().free(toks); - var tokIter = tok.TokenIterator{ .tokens = toks }; + var tokenizer = tok.Tokenizer.init(gpa.allocator(), all); + defer tokenizer.deinit(); + var tokIter = tok.TokenIterator{ .tokens = try tokenizer.tokenize() }; + try outWriter.print("global _start:\n", .{}); while (tokIter.next()) |t| { switch (t) { @@ -52,13 +61,27 @@ pub fn main() !void { } } - const nasmargv = [_][]const u8{ "nasm", "-felf64", "out/out.asm" }; - const nasmproc = try std.ChildProcess.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() }); + // Run nasm and ld to build the executable + // TODO: switch to qbe or llvm (preferabbly qbe) + const nasmFile = try getFileName(gpa.allocator(), out_name, "asm"); + defer gpa.allocator().free(nasmFile); + const nasmargv = [_][]const u8{ "nasm", "-felf64", nasmFile }; + const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() }); defer gpa.allocator().free(nasmproc.stdout); defer gpa.allocator().free(nasmproc.stderr); - const ldargv = [_][]const u8{ "ld", "-o", "out/out", "out/out.o" }; - const ldproc = try std.ChildProcess.run(.{ .argv = &ldargv, .allocator = gpa.allocator() }); + const ldFile = try getFileName(gpa.allocator(), out_name, "o"); + defer gpa.allocator().free(ldFile); + const binFile = try getFileName(gpa.allocator(), out_name, ""); + defer gpa.allocator().free(binFile); + const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile }; + const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = gpa.allocator() }); defer gpa.allocator().free(ldproc.stdout); defer gpa.allocator().free(ldproc.stderr); } + +inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileType: []const u8) ![]const u8 { + var hasDot: []const u8 = "."; + if (fileType.len == 0) hasDot = ""; + return try std.fmt.allocPrint(allocator, "calico-out/{s}{s}{s}", .{ out_name, hasDot, fileType }); +} diff --git a/src/tokenize.zig b/src/tokenize.zig index b5d5d23..6225814 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -5,62 +5,98 @@ const TokenError = error{UnknownToken}; const Token = union(enum) { ret: []const u8, intLit: i32, - semiCol: u8, - nil: void, + semiCol, + nil, }; pub const TokenIterator = struct { - tokens: []const Token, + tokens: std.ArrayList(Token), index: usize = 0, pub fn next(self: *TokenIterator) ?Token { - defer self.*.index = self.*.index + 1; - if (self.*.index >= self.*.tokens.len) return null; - return self.*.tokens[self.*.index]; + defer self.index = self.index + 1; + if (self.index >= self.tokens.items.len) return null; + return self.tokens.items[self.index]; } }; -pub fn tokenize(allocator: std.mem.Allocator, buff: []const u8) ![]const Token { - var toks = std.ArrayList(Token).init(allocator); - defer toks.deinit(); - var str = std.ArrayList(u8).init(allocator); - defer str.deinit(); - - var i: u32 = 0; - while (i < buff.len) { - switch (buff[i]) { - ' ', '\n', '\t' => { - i = i + 1; - continue; - }, - '0'...'9' => { - while (std.ascii.isDigit(buff[i])) { - try str.append(buff[i]); - i = i + 1; - } - const num: i32 = try std.fmt.parseInt(i32, str.items, 10); - try toks.append(.{ .intLit = num }); - str.deinit(); - str = std.ArrayList(u8).init(allocator); - }, - 'a'...'z', 'A'...'Z' => { - while (std.ascii.isAlphanumeric(buff[i])) { - try str.append(buff[i]); - i = i + 1; - } - try toks.append(.{ .ret = try str.toOwnedSlice() }); - str.deinit(); - str = std.ArrayList(u8).init(allocator); - }, - ';' => { - i = i + 1; - try toks.append(.{ .semiCol = ';' }); - }, - '+', '-', '*', '/' => { - // Process operator - }, - else => {}, +pub const StringIterator = struct { + string: []const u8, + index: usize = 0, + + pub fn init(string: []const u8) StringIterator { + return StringIterator{ .string = string }; + } + + pub fn peek(self: StringIterator) ?u8 { + if (self.index >= self.string.len) return null; + return self.string[self.index]; + } + + pub fn consume(self: *StringIterator) ?u8 { + defer self.index += 1; + return self.peek(); + } + + pub fn skip(self: *StringIterator) void { + self.index += 1; + } +}; + +pub const Tokenizer = struct { + src: StringIterator, + allocator: std.mem.Allocator, + toks: std.ArrayList(Token), + + pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer { + return Tokenizer{ + .src = StringIterator.init(src), + .allocator = allocator, + .toks = std.ArrayList(Token).init(allocator), + }; + } + + pub fn deinit(self: *Tokenizer) void { + self.toks.deinit(); + } + + pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) { + var str = std.ArrayList(u8).init(self.allocator); + defer str.deinit(); + + while (self.src.peek()) |char| { + switch (char) { + ' ', '\n', '\t' => { + self.src.skip(); + continue; + }, + '0'...'9' => { + while (std.ascii.isDigit(self.src.peek().?)) + try str.append(self.src.consume().?); + + const num: i32 = try std.fmt.parseInt(i32, str.items, 10); + try self.toks.append(.{ .intLit = num }); + str.deinit(); + str = std.ArrayList(u8).init(self.allocator); + }, + 'a'...'z', 'A'...'Z' => { + while (std.ascii.isAlphanumeric(self.src.peek().?)) + try str.append(self.src.consume().?); + + try self.toks.append(.{ .ret = try str.toOwnedSlice() }); + str.deinit(); + str = std.ArrayList(u8).init(self.allocator); + }, + ';' => { + self.src.skip(); + try self.toks.append(.semiCol); + }, + '+', '-', '*', '/' => { + // Process operator + }, + else => {}, + } } + return self.toks; } - return toks.toOwnedSlice(); -} +}; -- cgit v1.2.3