summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNic Gaffney <gaffney_nic@protonmail.com>2024-06-28 00:54:59 -0500
committerNic Gaffney <gaffney_nic@protonmail.com>2024-06-28 00:54:59 -0500
commitc955f2fb933865a50e791e2be91adbcb34bbadc9 (patch)
treeefd6def99007c8936721931d02e42c77357f3fd7
parent36e990c5bdfffb145b7255b8159d3ac879344996 (diff)
downloadcalico-c955f2fb933865a50e791e2be91adbcb34bbadc9.tar.gz
feat(tokenizer): Move tokenizer into a struct
Moved the tokenizer into a struct and created a string iterator to make tokenizing easier
-rw-r--r--.gitignore4
-rw-r--r--build.zig4
-rw-r--r--build.zig.zon64
-rw-r--r--src/main.zig45
-rw-r--r--src/tokenize.zig132
5 files changed, 125 insertions, 124 deletions
diff --git a/.gitignore b/.gitignore
index 9ae02d6..093eb95 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,3 @@
-out/
+calico-out/
zig-out/
-zig-cache/
+.zig-cache/
diff --git a/build.zig b/build.zig
index f04131f..052d68b 100644
--- a/build.zig
+++ b/build.zig
@@ -6,7 +6,7 @@ pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
- .name = "compiler",
+ .name = "calico",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
@@ -22,7 +22,7 @@ pub fn build(b: *std.Build) void {
run_cmd.addArgs(args);
}
- const run_step = b.step("run", "Run the app");
+ const run_step = b.step("run", "Run the compiler");
run_step.dependOn(&run_cmd.step);
const exe_unit_tests = b.addTest(.{
diff --git a/build.zig.zon b/build.zig.zon
index 39f7607..cf62921 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,67 +1,9 @@
.{
- .name = "compiler",
- // This is a [Semantic Version](https://semver.org/).
- // In a future version of Zig it will be used for package deduplication.
- .version = "0.0.0",
+ .name = "calico",
+ .version = "0.0.1",
- // This field is optional.
- // This is currently advisory only; Zig does not yet do anything
- // with this value.
- //.minimum_zig_version = "0.11.0",
-
- // This field is optional.
- // Each dependency must either provide a `url` and `hash`, or a `path`.
- // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
- // Once all dependencies are fetched, `zig build` no longer requires
- // internet connectivity.
- .dependencies = .{
- // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
- //.example = .{
- // // When updating this field to a new URL, be sure to delete the corresponding
- // // `hash`, otherwise you are communicating that you expect to find the old hash at
- // // the new URL.
- // .url = "https://example.com/foo.tar.gz",
- //
- // // This is computed from the file contents of the directory of files that is
- // // obtained after fetching `url` and applying the inclusion rules given by
- // // `paths`.
- // //
- // // This field is the source of truth; packages do not come from a `url`; they
- // // come from a `hash`. `url` is just one of many possible mirrors for how to
- // // obtain a package matching this `hash`.
- // //
- // // Uses the [multihash](https://multiformats.io/multihash/) format.
- // .hash = "...",
- //
- // // When this is provided, the package is found in a directory relative to the
- // // build root. In this case the package's hash is irrelevant and therefore not
- // // computed. This field and `url` are mutually exclusive.
- // .path = "foo",
-
- // // When this is set to `true`, a package is declared to be lazily
- // // fetched. This makes the dependency only get fetched if it is
- // // actually used.
- // .lazy = false,
- //},
- },
-
- // Specifies the set of files and directories that are included in this package.
- // Only files and directories listed here are included in the `hash` that
- // is computed for this package.
- // Paths are relative to the build root. Use the empty string (`""`) to refer to
- // the build root itself.
- // A directory listed here means that all files within, recursively, are included.
+ .dependencies = .{},
.paths = .{
- // This makes *all* files, recursively, included in this package. It is generally
- // better to explicitly list the files and directories instead, to insure that
- // fetching from tarballs, file system paths, and version control all result
- // in the same contents hash.
"",
- // For example...
- //"build.zig",
- //"build.zig.zon",
- //"src",
- //"LICENSE",
- //"README.md",
},
}
diff --git a/src/main.zig b/src/main.zig
index 18239b1..99b8130 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -4,19 +4,27 @@ const tok = @import("tokenize.zig");
const gftCompilerError = error{NoInputFile};
pub fn main() !void {
- if (std.os.argv.len != 2) return gftCompilerError.NoInputFile;
+ if (std.os.argv.len < 2) return gftCompilerError.NoInputFile;
+
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
+
var args = std.process.args();
_ = args.skip();
const inputFileName = args.next();
+
+ var out_name: []const u8 = "out";
+ if (std.os.argv.len == 3) out_name = args.next().?;
+
const inputFile = try std.fs.cwd().openFile(inputFileName.?, .{});
defer inputFile.close();
- std.fs.cwd().makeDir("out") catch |err| {
+ std.fs.cwd().makeDir("calico-out") catch |err|
if (err != error.PathAlreadyExists) return err;
- };
- const outfile = try std.fs.cwd().createFile("out/out.asm", .{});
+
+ const outFileName = try getFileName(gpa.allocator(), out_name, "asm");
+ defer gpa.allocator().free(outFileName);
+ const outfile = try std.fs.cwd().createFile(outFileName, .{});
const outWriter = outfile.writer();
defer outfile.close();
@@ -24,9 +32,10 @@ pub fn main() !void {
const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048);
defer gpa.allocator().free(all);
- const toks = try tok.tokenize(gpa.allocator(), all);
- defer gpa.allocator().free(toks);
- var tokIter = tok.TokenIterator{ .tokens = toks };
+ var tokenizer = tok.Tokenizer.init(gpa.allocator(), all);
+ defer tokenizer.deinit();
+ var tokIter = tok.TokenIterator{ .tokens = try tokenizer.tokenize() };
+
try outWriter.print("global _start:\n", .{});
while (tokIter.next()) |t| {
switch (t) {
@@ -52,13 +61,27 @@ pub fn main() !void {
}
}
- const nasmargv = [_][]const u8{ "nasm", "-felf64", "out/out.asm" };
- const nasmproc = try std.ChildProcess.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() });
+ // Run nasm and ld to build the executable
+ // TODO: switch to qbe or llvm (preferabbly qbe)
+ const nasmFile = try getFileName(gpa.allocator(), out_name, "asm");
+ defer gpa.allocator().free(nasmFile);
+ const nasmargv = [_][]const u8{ "nasm", "-felf64", nasmFile };
+ const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() });
defer gpa.allocator().free(nasmproc.stdout);
defer gpa.allocator().free(nasmproc.stderr);
- const ldargv = [_][]const u8{ "ld", "-o", "out/out", "out/out.o" };
- const ldproc = try std.ChildProcess.run(.{ .argv = &ldargv, .allocator = gpa.allocator() });
+ const ldFile = try getFileName(gpa.allocator(), out_name, "o");
+ defer gpa.allocator().free(ldFile);
+ const binFile = try getFileName(gpa.allocator(), out_name, "");
+ defer gpa.allocator().free(binFile);
+ const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile };
+ const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = gpa.allocator() });
defer gpa.allocator().free(ldproc.stdout);
defer gpa.allocator().free(ldproc.stderr);
}
+
+inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileType: []const u8) ![]const u8 {
+ var hasDot: []const u8 = ".";
+ if (fileType.len == 0) hasDot = "";
+ return try std.fmt.allocPrint(allocator, "calico-out/{s}{s}{s}", .{ out_name, hasDot, fileType });
+}
diff --git a/src/tokenize.zig b/src/tokenize.zig
index b5d5d23..6225814 100644
--- a/src/tokenize.zig
+++ b/src/tokenize.zig
@@ -5,62 +5,98 @@ const TokenError = error{UnknownToken};
const Token = union(enum) {
ret: []const u8,
intLit: i32,
- semiCol: u8,
- nil: void,
+ semiCol,
+ nil,
};
pub const TokenIterator = struct {
- tokens: []const Token,
+ tokens: std.ArrayList(Token),
index: usize = 0,
pub fn next(self: *TokenIterator) ?Token {
- defer self.*.index = self.*.index + 1;
- if (self.*.index >= self.*.tokens.len) return null;
- return self.*.tokens[self.*.index];
+ defer self.index = self.index + 1;
+ if (self.index >= self.tokens.items.len) return null;
+ return self.tokens.items[self.index];
}
};
-pub fn tokenize(allocator: std.mem.Allocator, buff: []const u8) ![]const Token {
- var toks = std.ArrayList(Token).init(allocator);
- defer toks.deinit();
- var str = std.ArrayList(u8).init(allocator);
- defer str.deinit();
-
- var i: u32 = 0;
- while (i < buff.len) {
- switch (buff[i]) {
- ' ', '\n', '\t' => {
- i = i + 1;
- continue;
- },
- '0'...'9' => {
- while (std.ascii.isDigit(buff[i])) {
- try str.append(buff[i]);
- i = i + 1;
- }
- const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
- try toks.append(.{ .intLit = num });
- str.deinit();
- str = std.ArrayList(u8).init(allocator);
- },
- 'a'...'z', 'A'...'Z' => {
- while (std.ascii.isAlphanumeric(buff[i])) {
- try str.append(buff[i]);
- i = i + 1;
- }
- try toks.append(.{ .ret = try str.toOwnedSlice() });
- str.deinit();
- str = std.ArrayList(u8).init(allocator);
- },
- ';' => {
- i = i + 1;
- try toks.append(.{ .semiCol = ';' });
- },
- '+', '-', '*', '/' => {
- // Process operator
- },
- else => {},
+pub const StringIterator = struct {
+ string: []const u8,
+ index: usize = 0,
+
+ pub fn init(string: []const u8) StringIterator {
+ return StringIterator{ .string = string };
+ }
+
+ pub fn peek(self: StringIterator) ?u8 {
+ if (self.index >= self.string.len) return null;
+ return self.string[self.index];
+ }
+
+ pub fn consume(self: *StringIterator) ?u8 {
+ defer self.index += 1;
+ return self.peek();
+ }
+
+ pub fn skip(self: *StringIterator) void {
+ self.index += 1;
+ }
+};
+
+pub const Tokenizer = struct {
+ src: StringIterator,
+ allocator: std.mem.Allocator,
+ toks: std.ArrayList(Token),
+
+ pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer {
+ return Tokenizer{
+ .src = StringIterator.init(src),
+ .allocator = allocator,
+ .toks = std.ArrayList(Token).init(allocator),
+ };
+ }
+
+ pub fn deinit(self: *Tokenizer) void {
+ self.toks.deinit();
+ }
+
+ pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {
+ var str = std.ArrayList(u8).init(self.allocator);
+ defer str.deinit();
+
+ while (self.src.peek()) |char| {
+ switch (char) {
+ ' ', '\n', '\t' => {
+ self.src.skip();
+ continue;
+ },
+ '0'...'9' => {
+ while (std.ascii.isDigit(self.src.peek().?))
+ try str.append(self.src.consume().?);
+
+ const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
+ try self.toks.append(.{ .intLit = num });
+ str.deinit();
+ str = std.ArrayList(u8).init(self.allocator);
+ },
+ 'a'...'z', 'A'...'Z' => {
+ while (std.ascii.isAlphanumeric(self.src.peek().?))
+ try str.append(self.src.consume().?);
+
+ try self.toks.append(.{ .ret = try str.toOwnedSlice() });
+ str.deinit();
+ str = std.ArrayList(u8).init(self.allocator);
+ },
+ ';' => {
+ self.src.skip();
+ try self.toks.append(.semiCol);
+ },
+ '+', '-', '*', '/' => {
+ // Process operator
+ },
+ else => {},
+ }
}
+ return self.toks;
}
- return toks.toOwnedSlice();
-}
+};