diff options
| author | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-08-06 02:24:04 -0500 | 
|---|---|---|
| committer | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-08-06 02:24:04 -0500 | 
| commit | 6a7467264b506e5812b2d9a8cd55fa307f2b5a10 (patch) | |
| tree | 97c136f9f65a575d940f647b7c2130b3454fa88d | |
| parent | f8542ab4c65d64e7713063460d750901e7b0362d (diff) | |
| download | calico-6a7467264b506e5812b2d9a8cd55fa307f2b5a10.tar.gz | |
ok so i messed up
| -rw-r--r-- | .gitmodules | 4 | ||||
| -rw-r--r-- | build.zig | 23 | ||||
| -rw-r--r-- | build.zig.zon | 7 | ||||
| -rw-r--r-- | examples/test1.gft | 7 | ||||
| m--------- | lib/llvm-zig (renamed from vendors/llvm-zig) | 0 | ||||
| -rw-r--r-- | src/codegen.zig | 188 | ||||
| -rw-r--r-- | src/main.zig | 28 | ||||
| -rw-r--r-- | src/parser.zig | 196 | ||||
| -rw-r--r-- | src/symtable.zig | 141 | ||||
| -rw-r--r-- | src/tokenize.zig | 84 | 
10 files changed, 529 insertions, 149 deletions
| diff --git a/.gitmodules b/.gitmodules index 410faa0..a957321 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "vendors/llvm-zig"] -	path = vendors/llvm-zig +[submodule "lib/llvm-zig"] +	path = lib/llvm-zig  	url = https://github.com/dwclake/llvm-zig.git @@ -1,6 +1,6 @@  const std = @import("std"); -pub fn build(b: *std.Build) void { +pub fn build(b: *std.Build) !void {      const target = b.standardTargetOptions(.{});      const optimize = b.standardOptimizeOption(.{}); @@ -12,8 +12,27 @@ pub fn build(b: *std.Build) void {          .optimize = optimize,      }); +    const lib = b.addStaticLibrary(.{ +        .name = "llvm", +        .root_source_file = b.path("lib/llvm-zig/src/llvm.zig"), +        .target = target, +        .optimize = optimize, +    }); +    lib.defineCMacro("_FILE_OFFSET_BITS", "64"); +    lib.defineCMacro("__STDC_CONSTANT_MACROS", null); +    lib.defineCMacro("__STDC_FORMAT_MACROS", null); +    lib.defineCMacro("__STDC_LIMIT_MACROS", null); +    lib.linkSystemLibrary("z"); +    lib.linkLibC(); +    lib.linkSystemLibrary("LLVM-17"); +      b.installArtifact(exe); +    b.installArtifact(lib); + +    _ = try b.modules.put("llvm", &lib.root_module); +    exe.root_module.addImport("llvm", b.modules.get("llvm").?); +      const run_cmd = b.addRunArtifact(exe);      run_cmd.step.dependOn(b.getInstallStep()); @@ -49,6 +68,8 @@ pub fn build(b: *std.Build) void {          .optimize = optimize,      }); +    codegen_unit_tests.root_module.addImport("llvm", &lib.root_module); +      const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);      const run_token_unit_tests = b.addRunArtifact(token_unit_tests);      const run_parse_unit_tests = b.addRunArtifact(parse_unit_tests); diff --git a/build.zig.zon b/build.zig.zon index cf62921..77ba4d7 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -2,7 +2,12 @@      .name = "calico",      .version = "0.0.1", -    .dependencies = .{}, +    .dependencies = .{ +        .@"llvm-zig" = .{ +            .url = "lib/llvm-zig/", +            .hash = "12209e03a708a6d54551a1fc9fc9b94aa379bfe13e48087cf69529c5fd98ae443aa1", +        }, +    },      .paths = .{          "",      }, diff --git a/examples/test1.gft b/examples/test1.gft index 3ae0040..1f4ac3b 100644 --- a/examples/test1.gft +++ b/examples/test1.gft @@ -1,4 +1,3 @@ -const x = 1; -var y = 4; -y = 7; -exit y; +fn main() -> i32 { +    return 7; +} diff --git a/vendors/llvm-zig b/lib/llvm-zig -Subproject 28880cd947da96bcb8213c630714baf87cf7de9 +Subproject 28880cd947da96bcb8213c630714baf87cf7de9 diff --git a/src/codegen.zig b/src/codegen.zig index a1df34f..e2889a6 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1,46 +1,72 @@  const std = @import("std");  const parse = @import("parser.zig"); +const symb = @import("symtable.zig"); +const llvm = @import("llvm"); +const analysis = llvm.analysis; +const core = llvm.core; +const target = llvm.target; +const types = llvm.types; + +const CodegenError = error{ +    OutOfMemory, +}; + +fn toLLVMtype(typ: parse.TypeIdent, sym: *symb.SymbolTable) types.LLVMTypeRef { +    if (sym.getType(typ)) |t| { +        return switch (t) { +            .Integer => core.LLVMInt32Type(), +            .Void => core.LLVMVoidType(), +            else => core.LLVMVoidType(), +        }; +    } +    return core.LLVMVoidType(); +}  pub const Generator = struct { -    root: []const parse.NodeStmt, +    root: parse.NodeStmt,      allocator: std.mem.Allocator, -    code: std.ArrayList(u8), +    builder: types.LLVMBuilderRef, +    context: types.LLVMContextRef, +    module: types.LLVMModuleRef, +    currentFunc: ?types.LLVMValueRef, +    currentFuncIsVoid: bool, + +    pub fn init(allocator: std.mem.Allocator, root: parse.NodeStmt) Generator { +        _ = target.LLVMInitializeNativeTarget(); +        _ = target.LLVMInitializeNativeAsmPrinter(); +        _ = target.LLVMInitializeNativeAsmParser(); + +        const context = core.LLVMContextCreate(); +        const builder = core.LLVMCreateBuilderInContext(context); +        const module = core.LLVMModuleCreateWithNameInContext("_calico_start", context); -    pub fn init(allocator: std.mem.Allocator, stmts: []const parse.NodeStmt) Generator {          return .{ -            .root = stmts, +            .root = root,              .allocator = allocator, -            .code = std.ArrayList(u8).init(allocator), +            .builder = builder, +            .context = context, +            .module = module, +            .currentFunc = null, +            .currentFuncIsVoid = false,          };      }      pub fn deinit(self: *Generator) void { -        self.code.deinit(); +        // Shutdown LLVM +        defer core.LLVMShutdown(); +        defer core.LLVMDisposeModule(self.module); +        defer core.LLVMDisposeBuilder(self.builder); + +        //self.code.deinit();      }      fn genExit(self: *Generator, exit: parse.NodeExit) !void { -        const expr = exit.expr; -        const newCode = -            switch (expr.kind) { -            .intLit => |intlit| try std.fmt.allocPrint(self.allocator, -                \\  mov rax, 60 -                \\  mov rdi, {d} -                \\  syscall -                \\ -            , .{ -                intlit.intlit.intLit, -            }), -            .ident => |ident| try std.fmt.allocPrint(self.allocator, -                \\  mov rax, 60 -                \\  mov rdi, [{s}] -                \\  syscall -                \\ -            , .{ -                ident.ident.ident, -            }), -        }; -        try self.code.appendSlice(newCode); -        self.allocator.free(newCode); +        const expr = exit; +        const val = core.LLVMConstInt(core.LLVMInt32Type(), switch (expr.kind) { +            .intLit => |i| @intCast(i.intLit), +            .ident => unreachable, +        }, 0); +        _ = core.LLVMBuildRet(self.builder, val);      }      fn genVar(self: *Generator, value: parse.NodeVar) !void { @@ -49,7 +75,7 @@ pub const Generator = struct {              \\  {s}: dw {d}              \\          , .{ value.ident.ident, switch (value.expr.kind) { -            .intLit => |intlit| intlit.intlit.intLit, +            .intLit => |intlit| intlit.intLit,              else => return error.NotImplemented,          } });          defer self.allocator.free(str); @@ -62,7 +88,7 @@ pub const Generator = struct {              \\  {s}: dw {d}              \\          , .{ value.ident.ident, switch (value.expr.kind) { -            .intLit => |intlit| intlit.intlit.intLit, +            .intLit => |intlit| intlit.intLit,              else => return error.NotImplemented,          } });          defer self.allocator.free(str); @@ -77,7 +103,7 @@ pub const Generator = struct {                  \\  mov [{s}], rax                  \\              , .{ -                intlit.intlit.intLit, +                intlit.intLit,                  assign.ident.ident,              }),              .ident => |ident| try std.fmt.allocPrint(self.allocator, @@ -85,7 +111,7 @@ pub const Generator = struct {                  \\  mov [{s}], rax                  \\              , .{ -                ident.ident.ident, +                ident.ident,                  assign.ident.ident,              }),          }; @@ -93,46 +119,92 @@ pub const Generator = struct {          self.allocator.free(newCode);      } +    fn genBlock(self: *Generator, block: []const parse.NodeStmt) CodegenError!void { +        for (block) |stmt| try self.genStmt(stmt); +    } + +    fn genFunc(self: *Generator, stmt: parse.NodeStmt) !void { +        const fun = stmt.kind.function; +        const table = stmt.symtable; +        const block = fun.block; +        const codeSlice = block.kind.block; +        const funcName: [*:0]const u8 = try self.allocator.dupeZ(u8, fun.ident.ident); + +        const retType = toLLVMtype(fun.retType.?, table); +        var params = [0]types.LLVMTypeRef{}; +        const funcType = core.LLVMFunctionType(retType, @ptrCast(¶ms), 0, 0); +        const func = core.LLVMAddFunction(self.module, funcName, funcType); +        self.currentFunc = func; +        self.currentFuncIsVoid = switch (table.getType(fun.retType.?).?) { +            .Void => true, +            else => false, +        }; + +        const function: types.LLVMValueRef = self.currentFunc.?; +        const codeBlock = core.LLVMAppendBasicBlockInContext(self.context, function, "entry"); +        core.LLVMPositionBuilderAtEnd(self.builder, codeBlock); +        const bodyTable = block.symtable; +        _ = bodyTable; +        //TODO: codegen for args + +        try self.genBlock(codeSlice); +        _ = if (self.currentFuncIsVoid) core.LLVMBuildRetVoid(self.builder); +    } + +    fn genStmt(self: *Generator, stmt: parse.NodeStmt) !void { +        try switch (stmt.kind) { +            .exit => |expr| self.genExit(expr), +            .function => self.genFunc(stmt), +            else => {}, +        }; +    } +      pub fn generate(self: *Generator) ![]const u8 { -        try self.code.appendSlice( -            \\section .text -            \\  global _start -            \\_start: -            \\ -        ); -        for (self.root) |stmt| { -            switch (stmt.kind) { -                .exit => |exit| try self.genExit(exit), -                .defValue => |defValue| try self.genValue(defValue), -                .defVar => |defVar| try self.genVar(defVar), -                .assignVar => |assign| try self.genAssign(assign), -            } -        } -        return self.code.items; +        try switch (self.root.kind) { +            .block => |b| { +                for (b) |stmt| +                    try self.genStmt(stmt); +            }, +            else => error.InvalidTop, +        }; +        const string: []const u8 = std.mem.span(core.LLVMPrintModuleToString(self.module)); +        return string;      }  };  test "Codegen exit" {      const tok = @import("tokenize.zig");      const expect = std.testing.expect; -    const src = "exit 120;"; +    const main = @import("main.zig"); + +    const src = +        \\fn main() -> i32 { +        \\    return 7; +        \\} +    ; +    const expected = +        \\; ModuleID = '_calico_start' +        \\source_filename = "_calico_start" +        \\ +        \\define i32 @main() { +        \\entry: +        \\  ret i32 7 +        \\} +        \\ +    ;      var tokenizer = tok.Tokenizer.init(std.testing.allocator, src);      defer tokenizer.deinit();      const toks = try tokenizer.tokenize(); -    var parser = parse.Parser.init(std.testing.allocator, toks); +    var symbTable: *symb.SymbolTable = try main.initSymbolTable(std.testing.allocator); +    defer symbTable.deinit(); +    var parser = parse.Parser.init(std.testing.allocator, toks, symbTable);      defer parser.deinit();      const parseTree = try parser.parse(); +    var pop = symb.Populator.init(std.testing.allocator); +    var treeNode = parseTree.asNode(); +    try pop.populateSymtable(&treeNode);      var gen = Generator.init(std.testing.allocator, parseTree);      defer gen.deinit();      const actual = try gen.generate(); -    const expected = -        \\section .text -        \\  global _start -        \\_start: -        \\  mov rax, 60 -        \\  mov rdi, 120 -        \\  syscall -        \\ -    ;      try expect(std.mem.eql(u8, actual, expected));  } diff --git a/src/main.zig b/src/main.zig index 53421f4..248640b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -31,7 +31,7 @@ pub fn main() !void {          if (err != error.PathAlreadyExists) return err;      // Setup native code writer -    const outFileName = try getFileName(allocator, out_name, "asm"); +    const outFileName = try getFileName(allocator, out_name, "ll");      defer allocator.free(outFileName);      const outfile = try std.fs.cwd().createFile(outFileName, .{});      const outWriter = outfile.writer(); @@ -47,11 +47,15 @@ pub fn main() !void {      const tokens = try tokenizer.tokenize();      // Parse -    var symbTable = try symb.SymbolTable.init(allocator); +    var symbTable = try initSymbolTable(allocator);      defer symbTable.deinit(); -    var parser = parse.Parser.init(allocator, tokens, &symbTable); + +    var parser = parse.Parser.init(allocator, tokens, symbTable);      defer parser.deinit();      const tree = try parser.parse(); +    var pop = symb.Populator.init(allocator); +    var treeNode = tree.asNode(); +    try pop.populateSymtable(&treeNode);      // Codegen      var generator = gen.Generator.init(allocator, tree); @@ -59,18 +63,9 @@ pub fn main() !void {      const code = try generator.generate();      try outWriter.writeAll(code); -    // Run nasm and ld to build the executable -    // TODO: switch to qbe or llvm (preferabbly qbe) -    const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName }; -    const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = allocator }); -    defer allocator.free(nasmproc.stdout); -    defer allocator.free(nasmproc.stderr); - -    const ldFile = try getFileName(allocator, out_name, "o"); -    defer allocator.free(ldFile);      const binFile = try getFileName(allocator, out_name, "");      defer allocator.free(binFile); -    const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile }; +    const ldargv = [_][]const u8{ "clang", "-o", binFile, outFileName };      const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = allocator });      defer allocator.free(ldproc.stdout);      defer allocator.free(ldproc.stderr); @@ -82,3 +77,10 @@ inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileTy      if (fileType.len == 0) hasDot = "";      return try std.fmt.allocPrint(allocator, "calico-out/{s}{s}{s}", .{ out_name, hasDot, fileType });  } + +pub fn initSymbolTable(allocator: std.mem.Allocator) !*symb.SymbolTable { +    var table = try symb.SymbolTable.init(allocator); +    const intSymb: symb.SymbType = symb.SymbType.Integer; +    if (!try table.insert("i32", intSymb.toSymb())) return error.FailedToInsert; +    return table; +} diff --git a/src/parser.zig b/src/parser.zig index feaa226..fa0acec 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -11,18 +11,22 @@ const ParsingError = error{      ExpectedEqual,      ExpectedIdentifier,      InvalidStatement, +    UnknownIdentifier, +    UnknownToken, +    UnexpectedEOF, +    ExpectedToken, +    OutOfMemory,  };  pub const Node = union(enum) {      Expr: NodeExpr,      Stmt: NodeStmt, -      pub fn children(self: Node, allocator: std.mem.Allocator) ![]Node {          var childrenArray = std.ArrayList(Node).init(allocator);          defer childrenArray.deinit();          switch (self) { -            .Expr => |expr| try childrenArray.appendSlice(try expr.children()), -            .Stmt => |stmt| try childrenArray.appendSlice(try stmt.children()), +            .Expr => |expr| try childrenArray.appendSlice(try expr.children(allocator)), +            .Stmt => |stmt| try childrenArray.appendSlice(try stmt.children(allocator)),          }          return try childrenArray.toOwnedSlice();      } @@ -32,7 +36,7 @@ pub const NodeExpr = struct {      id: u32,      kind: ExprKind,      symtable: *symb.SymbolTable, -    typ: ?symb.SymbType, +    typ: ?TypeIdent,      isConst: bool,      pub fn asNode(self: NodeExpr) Node { @@ -49,6 +53,16 @@ pub const NodeExpr = struct {      }  }; +pub fn map(comptime T: type, comptime F: type, slice: []const F, func: fn (F) T) []const T { +    var list: [64]T = undefined; +    var max: usize = 0; +    for (slice, 0..) |item, i| { +        list[i] = func(item); +        max = i + 1; +    } +    return list[0..max]; +} +  pub const NodeStmt = struct {      id: u32,      kind: StmtKind, @@ -62,11 +76,15 @@ pub const NodeStmt = struct {          var childrenArray = std.ArrayList(Node).init(allocator);          defer childrenArray.deinit();          switch (self.kind) { -            .exit => |exit| try childrenArray.append(exit.expr.asNode()), +            .exit => |exit| try childrenArray.append(exit.asNode()),              .defValue => |value| try childrenArray.append(value.expr.asNode()),              .defVar => |variable| try childrenArray.append(variable.expr.asNode()),              .assignVar => |assign| try childrenArray.append(assign.expr.asNode()), -            else => {}, +            .block => |block| { +                const blockChildren = map(Node, NodeStmt, block, NodeStmt.asNode); +                for (blockChildren) |child| try childrenArray.append(child); +            }, +            .function => |fun| try childrenArray.append(fun.block.*.asNode()),          }          return try childrenArray.toOwnedSlice();      } @@ -96,42 +114,113 @@ pub const Parser = struct {      }      pub fn deinit(self: *Parser) void { +        for (self.nodes.items) |node| { +            switch (node.kind) { +                .block => |blk| self.allocator.free(blk), +                .function => |fun| { +                    self.allocator.free(fun.block.kind.block); +                    self.allocator.destroy(fun.block); +                }, +                else => {}, +            } +        }          self.nodes.deinit();      }      fn parseExpr(self: *Parser) !NodeExpr { -        const kind = try switch (self.tokens.peek().?) { -            .intLit => ExprKind{ -                .intLit = NodeIntlit{ -                    .intlit = (try self.tokens.consume(.intLit)).?, +        var typ: ?TypeIdent = null; +        const kind = try blk: { +            try switch (self.tokens.peek().?) { +                .intLit => { +                    typ = TypeIdent{ +                        .ident = "i32", +                        .list = false, +                    }; +                    break :blk ExprKind{ .intLit = (try self.tokens.consume(.intLit)).? };                  }, -            }, -            .ident => ExprKind{ -                .ident = NodeIdent{ -                    .ident = (try self.tokens.consume(.ident)).?, +                .ident => { +                    const ident = (try self.tokens.consume(.ident)).?; +                    const symbType = if (self.top.get(ident.ident)) |sym| +                        sym.Type +                    else +                        return ParsingError.UnknownIdentifier; +                    typ = TypeIdent{ +                        .ident = symbType.toString(), +                        .list = false, +                    }; +                    break :blk ExprKind{ .ident = ident };                  }, -            }, -            else => ParsingError.InvalidExpression, +                else => break :blk ParsingError.InvalidExpression, +            };          };          return NodeExpr{              .id = self.reserveId(),              .kind = kind,              .isConst = kind.isConstant(), -            .typ = null, +            .typ = typ,              .symtable = self.top,          };      } -    fn parseStmt(self: *Parser) !NodeStmt { +    fn parseStmt(self: *Parser) ParsingError!NodeStmt {          return switch (self.tokens.peek().?) {              .exit => try self.parseExit(),              .constant => try self.parseConstant(),              .variable => try self.parseVariable(),              .ident => try self.parseAssign(), +            .fun => try self.parseFunc(),              else => ParsingError.InvalidStatement,          };      } +    fn parseFunc(self: *Parser) ParsingError!NodeStmt { +        var typ: ?TypeIdent = null; +        _ = try self.tokens.consume(.fun); +        const ident = (try self.tokens.consume(.ident)).?; +        _ = try self.tokens.consume(.openParen); +        //TODO: Argument Parsing +        _ = try self.tokens.consume(.closeParen); +        if (tok.checkType(self.tokens.peek().?, .arrow)) { +            self.tokens.skip(); +            typ = TypeIdent{ .ident = (try self.tokens.consume(.ident)).?.ident, .list = false }; +        } + +        const block = try self.allocator.create(NodeStmt); +        block.* = try self.parseBlock(); + +        const kind = StmtKind{ +            .function = .{ +                .ident = ident, +                .args = &[_]TypeIdent{}, +                .retType = typ, +                .block = block, +            }, +        }; + +        return NodeStmt{ +            .id = self.reserveId(), +            .kind = kind, +            .symtable = self.top, +        }; +    } + +    fn parseBlock(self: *Parser) !NodeStmt { +        _ = try self.tokens.consume(.openBrace); +        var stmtArr = std.ArrayList(NodeStmt).init(self.allocator); +        while (!tok.checkType(self.tokens.peek().?, .closeBrace)) +            try stmtArr.append(try self.parseStmt()); +        _ = try self.tokens.consume(.closeBrace); +        const kind = StmtKind{ +            .block = try stmtArr.toOwnedSlice(), +        }; + +        return NodeStmt{ +            .id = self.reserveId(), +            .kind = kind, +            .symtable = try self.top.makeChild(), +        }; +    } +      fn parseAssign(self: *Parser) !NodeStmt {          const ident = (try self.tokens.consume(.ident)).?;          _ = try self.tokens.consume(.equal); @@ -150,11 +239,11 @@ pub const Parser = struct {          };      } -    fn parseExit(self: *Parser) !NodeStmt { +    fn parseExit(self: *Parser) ParsingError!NodeStmt {          _ = try self.tokens.consume(.exit);          const expr = try self.parseExpr();          _ = try self.tokens.consume(.semiCol); -        const kind = StmtKind{ .exit = NodeExit{ .expr = expr } }; +        const kind = StmtKind{ .exit = expr };          return NodeStmt{              .symtable = self.top,              .kind = kind, @@ -200,14 +289,30 @@ pub const Parser = struct {          };      } -    pub fn parse(self: *Parser) ![]const NodeStmt { +    pub fn parse(self: *Parser) !NodeStmt {          while (self.tokens.peek()) |_|              try self.nodes.append(try self.parseStmt()); -        return self.nodes.items; +        return NodeStmt{ +            .id = self.reserveId(), +            .kind = StmtKind{ .block = self.nodes.items }, +            .symtable = self.top, +        };      }  }; +pub const TypeIdent = struct { +    ident: []const u8, +    list: bool, +}; + +pub const NodeFunction = struct { +    ident: Token, +    args: []const TypeIdent, +    retType: ?TypeIdent, +    block: *NodeStmt, +}; +  pub const NodeAssign = struct {      ident: Token,      expr: NodeExpr, @@ -223,23 +328,18 @@ pub const NodeVar = struct {      expr: NodeExpr,  }; -pub const NodeExit = struct { -    expr: NodeExpr, -}; - -pub const NodeIntlit = struct { -    intlit: Token, -}; - -pub const NodeIdent = struct { -    ident: Token, -}; +pub const NodeExit = NodeExpr; +pub const NodeIntlit = Token; +pub const NodeIdent = Token; +pub const NodeBlock = []const NodeStmt;  pub const StmtKind = union(enum) { +    function: NodeFunction,      exit: NodeExit,      defValue: NodeValue,      defVar: NodeVar,      assignVar: NodeAssign, +    block: NodeBlock,  };  pub const ExprKind = union(enum) { @@ -256,24 +356,36 @@ pub const ExprKind = union(enum) {  test "Parser" {      const expect = std.testing.expect; -    const src = "exit 120;"; +    const src = "return 120;";      var tokenizer = tok.Tokenizer.init(std.testing.allocator, src);      defer tokenizer.deinit();      const toks = try tokenizer.tokenize(); -    var parser = Parser.init(std.testing.allocator, toks); + +    var symbTable = try symb.SymbolTable.init(std.testing.allocator); +    defer symbTable.deinit(); + +    var parser = Parser.init(std.testing.allocator, toks, symbTable);      defer parser.deinit();      const parseTree = try parser.parse(); -    const exp: []const NodeStmt = &[_]NodeStmt{NodeStmt{ -        .exit = NodeExit{ -            .expr = NodeExpr{ -                .intLit = NodeIntlit{ -                    .intlit = Token{ -                        .intLit = 120, +    const children = try parseTree.children(std.testing.allocator); +    defer std.testing.allocator.free(children); +    const exp: []const Node = &[_]Node{Node{ +        .Stmt = NodeStmt{ +            .id = 2, +            .symtable = symbTable, +            .kind = StmtKind{ +                .exit = NodeExpr{ +                    .id = 1, +                    .kind = ExprKind{ +                        .intLit = Token{ .intLit = 120 },                      }, +                    .symtable = symbTable, +                    .typ = TypeIdent{ .list = false, .ident = "i32" }, +                    .isConst = true,                  },              },          },      }}; -    for (parseTree, exp) |stmt, expStmt| +    for (children, exp) |stmt, expStmt|          try expect(std.meta.eql(stmt, expStmt));  } diff --git a/src/symtable.zig b/src/symtable.zig index 0ff8fc8..72eecc7 100644 --- a/src/symtable.zig +++ b/src/symtable.zig @@ -6,7 +6,7 @@ const Scope = struct {      symbs: std.StringHashMap(Symbol),  }; -const Symbol = union(enum) { +pub const Symbol = union(enum) {      Type: SymbType,      Value: SymbValue,  }; @@ -15,12 +15,28 @@ pub const SymbType = union(enum) {      Void,      Integer,      String, +    Function: struct { +        input: []const SymbType, +        output: *SymbType, +    }, +    pub fn toSymb(self: SymbType) Symbol { +        return Symbol{ .Type = self }; +    } +    pub fn toString(self: SymbType) []const u8 { +        return switch (self) { +            .Integer => "i32", +            else => "void", +        }; +    }  }; -const SymbValue = struct { +pub const SymbValue = struct {      typ: SymbType,      id: u32,      mut: bool, +    pub fn toSymb(self: SymbValue) Symbol { +        return Symbol{ .Value = self }; +    }  };  pub const SymbolTable = struct { @@ -41,19 +57,35 @@ pub const SymbolTable = struct {      pub fn deinit(self: *SymbolTable) void {          if (self.scope) |scope| { +            var iter = scope.symbs.iterator(); +            while (iter.next()) |entry| { +                switch (entry.value_ptr.*) { +                    .Type => |t| switch (t) { +                        .Function => |f| { +                            self.allocator.destroy(f.output); +                            self.allocator.free(f.input); +                        }, +                        else => {}, +                    }, +                    else => {}, +                } +            }              scope.symbs.deinit();              self.allocator.destroy(scope);          } +        self.allocator.destroy(self);      } -    pub fn makeChild(self: *SymbolTable) SymbolTable { +    pub fn makeChild(self: *SymbolTable) !*SymbolTable {          const scope = try self.allocator.create(Scope); -        scope.par = self; +        scope.par = self.scope;          scope.symbs = std.StringHashMap(Symbol).init(self.allocator); -        return SymbolTable{ +        const stable: *SymbolTable = try self.allocator.create(SymbolTable); +        stable.* = .{              .scope = scope,              .allocator = self.allocator,          }; +        return stable;      }      pub fn parent(self: SymbolTable) ?*SymbolTable { @@ -92,20 +124,30 @@ pub const SymbolTable = struct {          return null;      } -    pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) bool { +    pub fn getType(self: *SymbolTable, typ: pars.TypeIdent) ?SymbType { +        if (self.get(typ.ident)) |symb| return symb.Type; +        return null; +    } + +    pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) !bool {          if (self.scope) |scope| { -            if (scope.symbs.getEntry(ident)) return false; -            scope.symbs.put(ident, symbol); +            if (scope.symbs.getEntry(ident)) |_| return false; +            try scope.symbs.put(ident, symbol);              return true;          }          return false;      }  }; -const Populator = struct { +pub const Populator = struct {      id: u32,      allocator: std.mem.Allocator, +    fn reserveId(self: *Populator) u32 { +        defer self.id += 1; +        return self.id; +    } +      pub fn init(allocator: std.mem.Allocator) Populator {          return .{              .id = 0, @@ -113,37 +155,88 @@ const Populator = struct {          };      } -    fn populateSymtable(self: *Populator, node: *pars.Node) void { -        switch (node) { +    pub fn populateSymtable(self: *Populator, node: *const pars.Node) !void { +        switch (node.*) {              .Stmt => |stmt| {                  const table: *SymbolTable = stmt.symtable; -                switch (stmt.kind) { +                try switch (stmt.kind) {                      .defVar => |variable| { -                        const symbol = self.buildValueSymb( +                        const symbol: Symbol = try self.buildValueSymb(                              table, -                            if (variable.expr.typ) |typ| typ else .Integer, +                            if (variable.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false },                              true,                          ); -                        table.insert(variable.ident, symbol); +                        if (!try table.insert(variable.ident.ident, symbol)) return error.FailedToInsert;                      },                      .defValue => |value| { -                        const symbol = self.buildValueSymb( +                        const symbol: Symbol = try self.buildValueSymb(                              table, -                            if (value.expr.typ) |typ| typ else .Integer, -                            true, +                            if (value.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false }, +                            false,                          ); -                        table.insert(value.ident, symbol); +                        if (!try table.insert(value.ident.ident, symbol)) return error.FailedToInsert;                      }, -                } +                    .block => { +                        const children = try stmt.children(self.allocator); +                        defer self.allocator.free(children); +                        for (children) |child| try self.populateSymtable(&child); +                    }, +                    .function => |fun| { +                        const symbol: Symbol = try self.buildFunctionSymb( +                            table, +                            fun.args, +                            fun.retType, +                        ); +                        if (!try table.insert(fun.ident.ident, symbol)) return error.FailedToInsert; +                    }, + +                    .exit => {}, +                    else => error.Unimplemented, +                };              },              else => { -                for (node.children(self.allocator)) |child| -                    populateSymtable(&child); +                for (try node.children(self.allocator)) |child| +                    try self.populateSymtable(&child);              },          }      } -    fn buildValueSymb(self: *Populator, table: *SymbolTable, typ: SymbType, mutable: bool) Symbol { -        const newTyp = table.getType(typ); +    fn buildFunctionSymb( +        self: *Populator, +        table: *SymbolTable, +        args: []const pars.TypeIdent, +        retType: ?pars.TypeIdent, +    ) !Symbol { +        var inputArr = std.ArrayList(SymbType).init(self.allocator); +        for (args) |arg| try inputArr.append(table.getType(arg) orelse SymbType.Void); +        const input = try inputArr.toOwnedSlice(); + +        const output = try self.allocator.create(SymbType); +        output.* = if (retType) |typ| table.getType(typ).? else SymbType.Void; + +        return Symbol{ +            .Value = SymbValue{ +                .mut = true, +                .id = self.reserveId(), +                .typ = SymbType{ +                    .Function = .{ +                        .input = input, +                        .output = output, +                    }, +                }, +            }, +        }; +    } + +    fn buildValueSymb(self: *Populator, table: *SymbolTable, typ: pars.TypeIdent, mutable: bool) !Symbol { +        if (table.getType(typ)) |newTyp| { +            const value = SymbValue{ +                .typ = newTyp, +                .id = self.reserveId(), +                .mut = mutable, +            }; +            return value.toSymb(); +        } +        return error.UnknownType;      }  }; diff --git a/src/tokenize.zig b/src/tokenize.zig index ad263c2..770f483 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -1,36 +1,57 @@  const std = @import("std"); -const TokenizeError = error{ +pub const TokenizeError = error{      UnknownToken,      UnexpectedEOF, +    ExpectedToken,  };  pub const TokenType = enum { +    // Runtime Values      ident,      intLit, +    // Keywords      constant,      variable,      exit, +    fun, +    // Operators      plus,      minus,      star,      slash,      semiCol,      equal, +    // Symbols +    openBrace, +    closeBrace, +    openParen, +    closeParen, +    arrow,  };  pub const Token = union(TokenType) { +    //RuntimeVar      ident: []const u8,      intLit: i32, +    // Keywords      constant,      variable,      exit, +    fun, +    // Operators      plus,      minus,      star,      slash,      semiCol,      equal, +    // Symbols +    openBrace, +    closeBrace, +    openParen, +    closeParen, +    arrow,      pub fn fromChar(char: u8) !Token {          return switch (char) { @@ -40,15 +61,20 @@ pub const Token = union(TokenType) {              '/' => .slash,              ';' => .semiCol,              '=' => .equal, +            '{' => .openBrace, +            '}' => .closeBrace, +            '(' => .openParen, +            ')' => .closeParen,              else => TokenizeError.UnknownToken,          };      }      pub fn fromStr(str: []const u8) Token {          const eql = std.mem.eql; -        if (eql(u8, str, "exit")) return .exit; +        if (eql(u8, str, "return")) return .exit;          if (eql(u8, str, "const")) return .constant;          if (eql(u8, str, "var")) return .variable; +        if (eql(u8, str, "fn")) return .fun;          return Token{ .ident = str };      }  }; @@ -91,7 +117,7 @@ pub fn Iterator(comptime typ: type) type {          pub fn consume(self: *Iterator(typ), comptime expected: TokenType) !?typ {              if (typ != Token) return error.TokenIteratorOnly;              if (!checkType(self.peek().?, expected)) -                return error.ExpectedToken; +                return TokenizeError.ExpectedToken;              return self.next();          } @@ -134,6 +160,15 @@ pub const Tokenizer = struct {          while (self.src.peek()) |char| {              try switch (char) { +                '-' => { +                    self.src.skip(); +                    if (self.src.peek().? != '>') { +                        try self.toks.append(.minus); +                        continue; +                    } +                    self.src.skip(); +                    try self.toks.append(.arrow); +                },                  ' ', '\n', '\t' => self.src.skip(),                  '0'...'9' => {                      while (std.ascii.isDigit(self.src.peek().?)) @@ -161,7 +196,7 @@ pub const Tokenizer = struct {  test "Tokenize Expression" {      const expect = std.testing.expect; -    const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5 + variable;"; +    const testSource: []const u8 = "return 120 + 150 - 260 * 12 / 5 + variable;";      var tokenizer = Tokenizer.init(std.testing.allocator, testSource);      defer tokenizer.deinit();      const tokens = try tokenizer.tokenize(); @@ -219,6 +254,7 @@ test "Tokenize variable" {          }      }  } +  test "Tokenize constant" {      const expect = std.testing.expect;      const testSource: []const u8 = "const five = 5;"; @@ -243,3 +279,43 @@ test "Tokenize constant" {          }      }  } + +test "Tokenize Function" { +    const expect = std.testing.expect; +    const testSource: []const u8 = +        \\fn main() -> i32 { +        \\  return 7; +        \\} +    ; +    var tokenizer = Tokenizer.init(std.testing.allocator, testSource); +    defer tokenizer.deinit(); +    const tokens = try tokenizer.tokenize(); +    const expected = &[_]Token{ +        .fun, +        .{ .ident = "main" }, +        .openParen, +        .closeParen, +        .arrow, +        .{ .ident = "i32" }, +        .openBrace, +        .exit, +        .{ .intLit = 7 }, +        .semiCol, +        .closeBrace, +    }; +    for (tokens, expected) |act, exp| { +        switch (act) { +            .ident => |v| try expect(std.mem.eql(u8, exp.ident, v)), +            .fun => |v| try expect(v == exp.fun), +            .arrow => |v| try expect(v == exp.arrow), +            .intLit => |v| try expect(v == exp.intLit), +            .exit => |v| try expect(v == exp.exit), +            .closeParen => |v| try expect(v == exp.closeParen), +            .openParen => |v| try expect(v == exp.openParen), +            .openBrace => |v| try expect(v == exp.openBrace), +            .closeBrace => |v| try expect(v == exp.closeBrace), +            .semiCol => |v| try expect(v == exp.semiCol), +            else => try expect(1 == 0), +        } +    } +} | 
