diff options
| -rw-r--r-- | .gitignore | 3 | ||||
| -rw-r--r-- | build.zig | 8 | ||||
| -rw-r--r-- | examples/test1.gft | 2 | ||||
| -rw-r--r-- | src/main.zig | 13 | ||||
| -rw-r--r-- | src/tokenize.zig | 111 | 
5 files changed, 90 insertions, 47 deletions
| @@ -1,3 +1,2 @@ -calico-out/ -zig-out/ +*-out/  .zig-cache/ @@ -31,8 +31,16 @@ pub fn build(b: *std.Build) void {          .optimize = optimize,      }); +    const token_unit_tests = b.addTest(.{ +        .root_source_file = b.path("src/tokenize.zig"), +        .target = target, +        .optimize = optimize, +    }); +      const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); +    const run_token_unit_tests = b.addRunArtifact(token_unit_tests);      const test_step = b.step("test", "Run unit tests");      test_step.dependOn(&run_exe_unit_tests.step); +    test_step.dependOn(&run_token_unit_tests.step);  } diff --git a/examples/test1.gft b/examples/test1.gft index 16f566f..11388cd 100644 --- a/examples/test1.gft +++ b/examples/test1.gft @@ -1 +1 @@ -return 69; +exit 69; diff --git a/src/main.zig b/src/main.zig index 99b8130..c24f8a8 100644 --- a/src/main.zig +++ b/src/main.zig @@ -22,20 +22,23 @@ pub fn main() !void {      std.fs.cwd().makeDir("calico-out") catch |err|          if (err != error.PathAlreadyExists) return err; +    // Setup native code writer      const outFileName = try getFileName(gpa.allocator(), out_name, "asm");      defer gpa.allocator().free(outFileName);      const outfile = try std.fs.cwd().createFile(outFileName, .{});      const outWriter = outfile.writer();      defer outfile.close(); -    // Logic here to compile language +    // Turn the input file into a string      const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048);      defer gpa.allocator().free(all); +    // Tokenize      var tokenizer = tok.Tokenizer.init(gpa.allocator(), all);      defer tokenizer.deinit(); -    var tokIter = tok.TokenIterator{ .tokens = try tokenizer.tokenize() }; +    var tokIter = tok.Iterator(tok.Token).init((try tokenizer.tokenize()).items); +    // Parse tokens      try outWriter.print("global _start:\n", .{});      while (tokIter.next()) |t| {          switch (t) { @@ -57,15 +60,14 @@ pub fn main() !void {                  , .{num.?.intLit});                  gpa.allocator().free(t.ret);              }, +            // No other commands              else => {},          }      }      // Run nasm and ld to build the executable      // TODO: switch to qbe or llvm (preferabbly qbe) -    const nasmFile = try getFileName(gpa.allocator(), out_name, "asm"); -    defer gpa.allocator().free(nasmFile); -    const nasmargv = [_][]const u8{ "nasm", "-felf64", nasmFile }; +    const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName };      const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() });      defer gpa.allocator().free(nasmproc.stdout);      defer gpa.allocator().free(nasmproc.stderr); @@ -80,6 +82,7 @@ pub fn main() !void {      defer gpa.allocator().free(ldproc.stderr);  } +/// Get file extension based on filename  inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileType: []const u8) ![]const u8 {      var hasDot: []const u8 = ".";      if (fileType.len == 0) hasDot = ""; diff --git a/src/tokenize.zig b/src/tokenize.zig index 6225814..37080b4 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -2,74 +2,75 @@ const std = @import("std");  const TokenError = error{UnknownToken}; -const Token = union(enum) { +pub const Token = union(enum) {      ret: []const u8,      intLit: i32, +    binaryOp: u8,      semiCol,      nil,  }; -pub const TokenIterator = struct { -    tokens: std.ArrayList(Token), -    index: usize = 0, +/// Creates a tokenizer over a slice of typ +pub fn Iterator(comptime typ: type) type { +    return struct { +        items: []const typ, +        index: usize = 0, -    pub fn next(self: *TokenIterator) ?Token { -        defer self.index = self.index + 1; -        if (self.index >= self.tokens.items.len) return null; -        return self.tokens.items[self.index]; -    } -}; - -pub const StringIterator = struct { -    string: []const u8, -    index: usize = 0, - -    pub fn init(string: []const u8) StringIterator { -        return StringIterator{ .string = string }; -    } +        /// Initialize tokenizer with a slice +        pub fn init(items: []const typ) Iterator(typ) { +            return Iterator(typ){ .items = items }; +        } -    pub fn peek(self: StringIterator) ?u8 { -        if (self.index >= self.string.len) return null; -        return self.string[self.index]; -    } +        /// Get current item +        pub fn peek(self: Iterator(typ)) ?typ { +            if (self.index >= self.items.len) return null; +            return self.items[self.index]; +        } -    pub fn consume(self: *StringIterator) ?u8 { -        defer self.index += 1; -        return self.peek(); -    } +        /// Get current item and iterate index +        pub fn consume(self: *Iterator(typ)) ?typ { +            defer self.index += 1; +            return self.peek(); +        } +        /// Get current item and iterate index +        pub const next = consume; -    pub fn skip(self: *StringIterator) void { -        self.index += 1; -    } -}; +        /// Skip over current item +        pub fn skip(self: *Iterator(typ)) void { +            self.index += 1; +        } +    }; +} +/// Tokenizes a string of source code  pub const Tokenizer = struct { -    src: StringIterator, +    src: Iterator(u8),      allocator: std.mem.Allocator,      toks: std.ArrayList(Token), +    /// Initializes a string of source code +    /// Deinitialize with Tokenizer.deinit()      pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer {          return Tokenizer{ -            .src = StringIterator.init(src), +            .src = Iterator(u8).init(src),              .allocator = allocator,              .toks = std.ArrayList(Token).init(allocator),          };      } +    /// Releases allocated memory      pub fn deinit(self: *Tokenizer) void {          self.toks.deinit();      } +    /// Returns an ArrayList of tokens      pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {          var str = std.ArrayList(u8).init(self.allocator);          defer str.deinit();          while (self.src.peek()) |char| {              switch (char) { -                ' ', '\n', '\t' => { -                    self.src.skip(); -                    continue; -                }, +                ' ', '\n', '\t' => self.src.skip(),                  '0'...'9' => {                      while (std.ascii.isDigit(self.src.peek().?))                          try str.append(self.src.consume().?); @@ -91,12 +92,44 @@ pub const Tokenizer = struct {                      self.src.skip();                      try self.toks.append(.semiCol);                  }, -                '+', '-', '*', '/' => { -                    // Process operator -                }, +                '+', '-', '*', '/' => try self.toks.append(.{ .binaryOp = self.src.consume().? }),                  else => {},              }          }          return self.toks;      }  }; + +test "Tokenize" { +    std.testing.log_level = std.log.Level.info; +    const expect = std.testing.expect; +    const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;"; +    var toks = Tokenizer.init(std.testing.allocator, testSource); +    defer toks.deinit(); +    const arrtoks = try toks.tokenize(); +    const expected = &[_]Token{ +        .{ .ret = "exit" }, +        .{ .intLit = 120 }, +        .{ .binaryOp = '+' }, +        .{ .intLit = 150 }, +        .{ .binaryOp = '-' }, +        .{ .intLit = 260 }, +        .{ .binaryOp = '*' }, +        .{ .intLit = 12 }, +        .{ .binaryOp = '/' }, +        .{ .intLit = 5 }, +        .semiCol, +    }; +    for (arrtoks.items, expected) |act, exp| { +        switch (act) { +            .ret => |v| { +                try expect(std.mem.eql(u8, v, exp.ret)); +                std.testing.allocator.free(v); +            }, +            .intLit => |v| try expect(v == exp.intLit), +            .semiCol => |v| try expect(v == exp.semiCol), +            .binaryOp => |v| try expect(v == exp.binaryOp), +            else => {}, +        } +    } +} | 
