diff options
| author | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-06-28 00:54:59 -0500 | 
|---|---|---|
| committer | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-06-28 00:54:59 -0500 | 
| commit | c955f2fb933865a50e791e2be91adbcb34bbadc9 (patch) | |
| tree | efd6def99007c8936721931d02e42c77357f3fd7 | |
| parent | 36e990c5bdfffb145b7255b8159d3ac879344996 (diff) | |
| download | calico-c955f2fb933865a50e791e2be91adbcb34bbadc9.tar.gz | |
feat(tokenizer): Move tokenizer into a struct
Moved the tokenizer into a struct and created a string iterator to
make tokenizing easier
| -rw-r--r-- | .gitignore | 4 | ||||
| -rw-r--r-- | build.zig | 4 | ||||
| -rw-r--r-- | build.zig.zon | 64 | ||||
| -rw-r--r-- | src/main.zig | 45 | ||||
| -rw-r--r-- | src/tokenize.zig | 132 | 
5 files changed, 125 insertions, 124 deletions
| @@ -1,3 +1,3 @@ -out/ +calico-out/  zig-out/ -zig-cache/ +.zig-cache/ @@ -6,7 +6,7 @@ pub fn build(b: *std.Build) void {      const optimize = b.standardOptimizeOption(.{});      const exe = b.addExecutable(.{ -        .name = "compiler", +        .name = "calico",          .root_source_file = b.path("src/main.zig"),          .target = target,          .optimize = optimize, @@ -22,7 +22,7 @@ pub fn build(b: *std.Build) void {          run_cmd.addArgs(args);      } -    const run_step = b.step("run", "Run the app"); +    const run_step = b.step("run", "Run the compiler");      run_step.dependOn(&run_cmd.step);      const exe_unit_tests = b.addTest(.{ diff --git a/build.zig.zon b/build.zig.zon index 39f7607..cf62921 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,67 +1,9 @@  .{ -    .name = "compiler", -    // This is a [Semantic Version](https://semver.org/). -    // In a future version of Zig it will be used for package deduplication. -    .version = "0.0.0", +    .name = "calico", +    .version = "0.0.1", -    // This field is optional. -    // This is currently advisory only; Zig does not yet do anything -    // with this value. -    //.minimum_zig_version = "0.11.0", - -    // This field is optional. -    // Each dependency must either provide a `url` and `hash`, or a `path`. -    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. -    // Once all dependencies are fetched, `zig build` no longer requires -    // internet connectivity. -    .dependencies = .{ -        // See `zig fetch --save <url>` for a command-line interface for adding dependencies. -        //.example = .{ -        //    // When updating this field to a new URL, be sure to delete the corresponding -        //    // `hash`, otherwise you are communicating that you expect to find the old hash at -        //    // the new URL. -        //    .url = "https://example.com/foo.tar.gz", -        // -        //    // This is computed from the file contents of the directory of files that is -        //    // obtained after fetching `url` and applying the inclusion rules given by -        //    // `paths`. -        //    // -        //    // This field is the source of truth; packages do not come from a `url`; they -        //    // come from a `hash`. `url` is just one of many possible mirrors for how to -        //    // obtain a package matching this `hash`. -        //    // -        //    // Uses the [multihash](https://multiformats.io/multihash/) format. -        //    .hash = "...", -        // -        //    // When this is provided, the package is found in a directory relative to the -        //    // build root. In this case the package's hash is irrelevant and therefore not -        //    // computed. This field and `url` are mutually exclusive. -        //    .path = "foo", - -        //    // When this is set to `true`, a package is declared to be lazily -        //    // fetched. This makes the dependency only get fetched if it is -        //    // actually used. -        //    .lazy = false, -        //}, -    }, - -    // Specifies the set of files and directories that are included in this package. -    // Only files and directories listed here are included in the `hash` that -    // is computed for this package. -    // Paths are relative to the build root. Use the empty string (`""`) to refer to -    // the build root itself. -    // A directory listed here means that all files within, recursively, are included. +    .dependencies = .{},      .paths = .{ -        // This makes *all* files, recursively, included in this package. It is generally -        // better to explicitly list the files and directories instead, to insure that -        // fetching from tarballs, file system paths, and version control all result -        // in the same contents hash.          "", -        // For example... -        //"build.zig", -        //"build.zig.zon", -        //"src", -        //"LICENSE", -        //"README.md",      },  } diff --git a/src/main.zig b/src/main.zig index 18239b1..99b8130 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,19 +4,27 @@ const tok = @import("tokenize.zig");  const gftCompilerError = error{NoInputFile};  pub fn main() !void { -    if (std.os.argv.len != 2) return gftCompilerError.NoInputFile; +    if (std.os.argv.len < 2) return gftCompilerError.NoInputFile; +      var gpa = std.heap.GeneralPurposeAllocator(.{}){};      defer _ = gpa.deinit(); +      var args = std.process.args();      _ = args.skip();      const inputFileName = args.next(); + +    var out_name: []const u8 = "out"; +    if (std.os.argv.len == 3) out_name = args.next().?; +      const inputFile = try std.fs.cwd().openFile(inputFileName.?, .{});      defer inputFile.close(); -    std.fs.cwd().makeDir("out") catch |err| { +    std.fs.cwd().makeDir("calico-out") catch |err|          if (err != error.PathAlreadyExists) return err; -    }; -    const outfile = try std.fs.cwd().createFile("out/out.asm", .{}); + +    const outFileName = try getFileName(gpa.allocator(), out_name, "asm"); +    defer gpa.allocator().free(outFileName); +    const outfile = try std.fs.cwd().createFile(outFileName, .{});      const outWriter = outfile.writer();      defer outfile.close(); @@ -24,9 +32,10 @@ pub fn main() !void {      const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048);      defer gpa.allocator().free(all); -    const toks = try tok.tokenize(gpa.allocator(), all); -    defer gpa.allocator().free(toks); -    var tokIter = tok.TokenIterator{ .tokens = toks }; +    var tokenizer = tok.Tokenizer.init(gpa.allocator(), all); +    defer tokenizer.deinit(); +    var tokIter = tok.TokenIterator{ .tokens = try tokenizer.tokenize() }; +      try outWriter.print("global _start:\n", .{});      while (tokIter.next()) |t| {          switch (t) { @@ -52,13 +61,27 @@ pub fn main() !void {          }      } -    const nasmargv = [_][]const u8{ "nasm", "-felf64", "out/out.asm" }; -    const nasmproc = try std.ChildProcess.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() }); +    // Run nasm and ld to build the executable +    // TODO: switch to qbe or llvm (preferabbly qbe) +    const nasmFile = try getFileName(gpa.allocator(), out_name, "asm"); +    defer gpa.allocator().free(nasmFile); +    const nasmargv = [_][]const u8{ "nasm", "-felf64", nasmFile }; +    const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() });      defer gpa.allocator().free(nasmproc.stdout);      defer gpa.allocator().free(nasmproc.stderr); -    const ldargv = [_][]const u8{ "ld", "-o", "out/out", "out/out.o" }; -    const ldproc = try std.ChildProcess.run(.{ .argv = &ldargv, .allocator = gpa.allocator() }); +    const ldFile = try getFileName(gpa.allocator(), out_name, "o"); +    defer gpa.allocator().free(ldFile); +    const binFile = try getFileName(gpa.allocator(), out_name, ""); +    defer gpa.allocator().free(binFile); +    const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile }; +    const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = gpa.allocator() });      defer gpa.allocator().free(ldproc.stdout);      defer gpa.allocator().free(ldproc.stderr);  } + +inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileType: []const u8) ![]const u8 { +    var hasDot: []const u8 = "."; +    if (fileType.len == 0) hasDot = ""; +    return try std.fmt.allocPrint(allocator, "calico-out/{s}{s}{s}", .{ out_name, hasDot, fileType }); +} diff --git a/src/tokenize.zig b/src/tokenize.zig index b5d5d23..6225814 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -5,62 +5,98 @@ const TokenError = error{UnknownToken};  const Token = union(enum) {      ret: []const u8,      intLit: i32, -    semiCol: u8, -    nil: void, +    semiCol, +    nil,  };  pub const TokenIterator = struct { -    tokens: []const Token, +    tokens: std.ArrayList(Token),      index: usize = 0,      pub fn next(self: *TokenIterator) ?Token { -        defer self.*.index = self.*.index + 1; -        if (self.*.index >= self.*.tokens.len) return null; -        return self.*.tokens[self.*.index]; +        defer self.index = self.index + 1; +        if (self.index >= self.tokens.items.len) return null; +        return self.tokens.items[self.index];      }  }; -pub fn tokenize(allocator: std.mem.Allocator, buff: []const u8) ![]const Token { -    var toks = std.ArrayList(Token).init(allocator); -    defer toks.deinit(); -    var str = std.ArrayList(u8).init(allocator); -    defer str.deinit(); - -    var i: u32 = 0; -    while (i < buff.len) { -        switch (buff[i]) { -            ' ', '\n', '\t' => { -                i = i + 1; -                continue; -            }, -            '0'...'9' => { -                while (std.ascii.isDigit(buff[i])) { -                    try str.append(buff[i]); -                    i = i + 1; -                } -                const num: i32 = try std.fmt.parseInt(i32, str.items, 10); -                try toks.append(.{ .intLit = num }); -                str.deinit(); -                str = std.ArrayList(u8).init(allocator); -            }, -            'a'...'z', 'A'...'Z' => { -                while (std.ascii.isAlphanumeric(buff[i])) { -                    try str.append(buff[i]); -                    i = i + 1; -                } -                try toks.append(.{ .ret = try str.toOwnedSlice() }); -                str.deinit(); -                str = std.ArrayList(u8).init(allocator); -            }, -            ';' => { -                i = i + 1; -                try toks.append(.{ .semiCol = ';' }); -            }, -            '+', '-', '*', '/' => { -                // Process operator -            }, -            else => {}, +pub const StringIterator = struct { +    string: []const u8, +    index: usize = 0, + +    pub fn init(string: []const u8) StringIterator { +        return StringIterator{ .string = string }; +    } + +    pub fn peek(self: StringIterator) ?u8 { +        if (self.index >= self.string.len) return null; +        return self.string[self.index]; +    } + +    pub fn consume(self: *StringIterator) ?u8 { +        defer self.index += 1; +        return self.peek(); +    } + +    pub fn skip(self: *StringIterator) void { +        self.index += 1; +    } +}; + +pub const Tokenizer = struct { +    src: StringIterator, +    allocator: std.mem.Allocator, +    toks: std.ArrayList(Token), + +    pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer { +        return Tokenizer{ +            .src = StringIterator.init(src), +            .allocator = allocator, +            .toks = std.ArrayList(Token).init(allocator), +        }; +    } + +    pub fn deinit(self: *Tokenizer) void { +        self.toks.deinit(); +    } + +    pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) { +        var str = std.ArrayList(u8).init(self.allocator); +        defer str.deinit(); + +        while (self.src.peek()) |char| { +            switch (char) { +                ' ', '\n', '\t' => { +                    self.src.skip(); +                    continue; +                }, +                '0'...'9' => { +                    while (std.ascii.isDigit(self.src.peek().?)) +                        try str.append(self.src.consume().?); + +                    const num: i32 = try std.fmt.parseInt(i32, str.items, 10); +                    try self.toks.append(.{ .intLit = num }); +                    str.deinit(); +                    str = std.ArrayList(u8).init(self.allocator); +                }, +                'a'...'z', 'A'...'Z' => { +                    while (std.ascii.isAlphanumeric(self.src.peek().?)) +                        try str.append(self.src.consume().?); + +                    try self.toks.append(.{ .ret = try str.toOwnedSlice() }); +                    str.deinit(); +                    str = std.ArrayList(u8).init(self.allocator); +                }, +                ';' => { +                    self.src.skip(); +                    try self.toks.append(.semiCol); +                }, +                '+', '-', '*', '/' => { +                    // Process operator +                }, +                else => {}, +            }          } +        return self.toks;      } -    return toks.toOwnedSlice(); -} +}; | 
