diff options
| author | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-08-06 02:24:04 -0500 |
|---|---|---|
| committer | Nic Gaffney <gaffney_nic@protonmail.com> | 2024-08-06 02:24:04 -0500 |
| commit | 6a7467264b506e5812b2d9a8cd55fa307f2b5a10 (patch) | |
| tree | 97c136f9f65a575d940f647b7c2130b3454fa88d | |
| parent | f8542ab4c65d64e7713063460d750901e7b0362d (diff) | |
| download | calico-6a7467264b506e5812b2d9a8cd55fa307f2b5a10.tar.gz | |
ok so i messed up
| -rw-r--r-- | .gitmodules | 4 | ||||
| -rw-r--r-- | build.zig | 23 | ||||
| -rw-r--r-- | build.zig.zon | 7 | ||||
| -rw-r--r-- | examples/test1.gft | 7 | ||||
| m--------- | lib/llvm-zig (renamed from vendors/llvm-zig) | 0 | ||||
| -rw-r--r-- | src/codegen.zig | 188 | ||||
| -rw-r--r-- | src/main.zig | 28 | ||||
| -rw-r--r-- | src/parser.zig | 196 | ||||
| -rw-r--r-- | src/symtable.zig | 141 | ||||
| -rw-r--r-- | src/tokenize.zig | 84 |
10 files changed, 529 insertions, 149 deletions
diff --git a/.gitmodules b/.gitmodules index 410faa0..a957321 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "vendors/llvm-zig"] - path = vendors/llvm-zig +[submodule "lib/llvm-zig"] + path = lib/llvm-zig url = https://github.com/dwclake/llvm-zig.git @@ -1,6 +1,6 @@ const std = @import("std"); -pub fn build(b: *std.Build) void { +pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); @@ -12,8 +12,27 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + const lib = b.addStaticLibrary(.{ + .name = "llvm", + .root_source_file = b.path("lib/llvm-zig/src/llvm.zig"), + .target = target, + .optimize = optimize, + }); + lib.defineCMacro("_FILE_OFFSET_BITS", "64"); + lib.defineCMacro("__STDC_CONSTANT_MACROS", null); + lib.defineCMacro("__STDC_FORMAT_MACROS", null); + lib.defineCMacro("__STDC_LIMIT_MACROS", null); + lib.linkSystemLibrary("z"); + lib.linkLibC(); + lib.linkSystemLibrary("LLVM-17"); + b.installArtifact(exe); + b.installArtifact(lib); + + _ = try b.modules.put("llvm", &lib.root_module); + exe.root_module.addImport("llvm", b.modules.get("llvm").?); + const run_cmd = b.addRunArtifact(exe); run_cmd.step.dependOn(b.getInstallStep()); @@ -49,6 +68,8 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + codegen_unit_tests.root_module.addImport("llvm", &lib.root_module); + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); const run_token_unit_tests = b.addRunArtifact(token_unit_tests); const run_parse_unit_tests = b.addRunArtifact(parse_unit_tests); diff --git a/build.zig.zon b/build.zig.zon index cf62921..77ba4d7 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -2,7 +2,12 @@ .name = "calico", .version = "0.0.1", - .dependencies = .{}, + .dependencies = .{ + .@"llvm-zig" = .{ + .url = "lib/llvm-zig/", + .hash = "12209e03a708a6d54551a1fc9fc9b94aa379bfe13e48087cf69529c5fd98ae443aa1", + }, + }, .paths = .{ "", }, diff --git a/examples/test1.gft b/examples/test1.gft index 3ae0040..1f4ac3b 100644 --- a/examples/test1.gft +++ b/examples/test1.gft @@ -1,4 +1,3 @@ -const x = 1; -var y = 4; -y = 7; -exit y; +fn main() -> i32 { + return 7; +} diff --git a/vendors/llvm-zig b/lib/llvm-zig -Subproject 28880cd947da96bcb8213c630714baf87cf7de9 +Subproject 28880cd947da96bcb8213c630714baf87cf7de9 diff --git a/src/codegen.zig b/src/codegen.zig index a1df34f..e2889a6 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1,46 +1,72 @@ const std = @import("std"); const parse = @import("parser.zig"); +const symb = @import("symtable.zig"); +const llvm = @import("llvm"); +const analysis = llvm.analysis; +const core = llvm.core; +const target = llvm.target; +const types = llvm.types; + +const CodegenError = error{ + OutOfMemory, +}; + +fn toLLVMtype(typ: parse.TypeIdent, sym: *symb.SymbolTable) types.LLVMTypeRef { + if (sym.getType(typ)) |t| { + return switch (t) { + .Integer => core.LLVMInt32Type(), + .Void => core.LLVMVoidType(), + else => core.LLVMVoidType(), + }; + } + return core.LLVMVoidType(); +} pub const Generator = struct { - root: []const parse.NodeStmt, + root: parse.NodeStmt, allocator: std.mem.Allocator, - code: std.ArrayList(u8), + builder: types.LLVMBuilderRef, + context: types.LLVMContextRef, + module: types.LLVMModuleRef, + currentFunc: ?types.LLVMValueRef, + currentFuncIsVoid: bool, + + pub fn init(allocator: std.mem.Allocator, root: parse.NodeStmt) Generator { + _ = target.LLVMInitializeNativeTarget(); + _ = target.LLVMInitializeNativeAsmPrinter(); + _ = target.LLVMInitializeNativeAsmParser(); + + const context = core.LLVMContextCreate(); + const builder = core.LLVMCreateBuilderInContext(context); + const module = core.LLVMModuleCreateWithNameInContext("_calico_start", context); - pub fn init(allocator: std.mem.Allocator, stmts: []const parse.NodeStmt) Generator { return .{ - .root = stmts, + .root = root, .allocator = allocator, - .code = std.ArrayList(u8).init(allocator), + .builder = builder, + .context = context, + .module = module, + .currentFunc = null, + .currentFuncIsVoid = false, }; } pub fn deinit(self: *Generator) void { - self.code.deinit(); + // Shutdown LLVM + defer core.LLVMShutdown(); + defer core.LLVMDisposeModule(self.module); + defer core.LLVMDisposeBuilder(self.builder); + + //self.code.deinit(); } fn genExit(self: *Generator, exit: parse.NodeExit) !void { - const expr = exit.expr; - const newCode = - switch (expr.kind) { - .intLit => |intlit| try std.fmt.allocPrint(self.allocator, - \\ mov rax, 60 - \\ mov rdi, {d} - \\ syscall - \\ - , .{ - intlit.intlit.intLit, - }), - .ident => |ident| try std.fmt.allocPrint(self.allocator, - \\ mov rax, 60 - \\ mov rdi, [{s}] - \\ syscall - \\ - , .{ - ident.ident.ident, - }), - }; - try self.code.appendSlice(newCode); - self.allocator.free(newCode); + const expr = exit; + const val = core.LLVMConstInt(core.LLVMInt32Type(), switch (expr.kind) { + .intLit => |i| @intCast(i.intLit), + .ident => unreachable, + }, 0); + _ = core.LLVMBuildRet(self.builder, val); } fn genVar(self: *Generator, value: parse.NodeVar) !void { @@ -49,7 +75,7 @@ pub const Generator = struct { \\ {s}: dw {d} \\ , .{ value.ident.ident, switch (value.expr.kind) { - .intLit => |intlit| intlit.intlit.intLit, + .intLit => |intlit| intlit.intLit, else => return error.NotImplemented, } }); defer self.allocator.free(str); @@ -62,7 +88,7 @@ pub const Generator = struct { \\ {s}: dw {d} \\ , .{ value.ident.ident, switch (value.expr.kind) { - .intLit => |intlit| intlit.intlit.intLit, + .intLit => |intlit| intlit.intLit, else => return error.NotImplemented, } }); defer self.allocator.free(str); @@ -77,7 +103,7 @@ pub const Generator = struct { \\ mov [{s}], rax \\ , .{ - intlit.intlit.intLit, + intlit.intLit, assign.ident.ident, }), .ident => |ident| try std.fmt.allocPrint(self.allocator, @@ -85,7 +111,7 @@ pub const Generator = struct { \\ mov [{s}], rax \\ , .{ - ident.ident.ident, + ident.ident, assign.ident.ident, }), }; @@ -93,46 +119,92 @@ pub const Generator = struct { self.allocator.free(newCode); } + fn genBlock(self: *Generator, block: []const parse.NodeStmt) CodegenError!void { + for (block) |stmt| try self.genStmt(stmt); + } + + fn genFunc(self: *Generator, stmt: parse.NodeStmt) !void { + const fun = stmt.kind.function; + const table = stmt.symtable; + const block = fun.block; + const codeSlice = block.kind.block; + const funcName: [*:0]const u8 = try self.allocator.dupeZ(u8, fun.ident.ident); + + const retType = toLLVMtype(fun.retType.?, table); + var params = [0]types.LLVMTypeRef{}; + const funcType = core.LLVMFunctionType(retType, @ptrCast(¶ms), 0, 0); + const func = core.LLVMAddFunction(self.module, funcName, funcType); + self.currentFunc = func; + self.currentFuncIsVoid = switch (table.getType(fun.retType.?).?) { + .Void => true, + else => false, + }; + + const function: types.LLVMValueRef = self.currentFunc.?; + const codeBlock = core.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + core.LLVMPositionBuilderAtEnd(self.builder, codeBlock); + const bodyTable = block.symtable; + _ = bodyTable; + //TODO: codegen for args + + try self.genBlock(codeSlice); + _ = if (self.currentFuncIsVoid) core.LLVMBuildRetVoid(self.builder); + } + + fn genStmt(self: *Generator, stmt: parse.NodeStmt) !void { + try switch (stmt.kind) { + .exit => |expr| self.genExit(expr), + .function => self.genFunc(stmt), + else => {}, + }; + } + pub fn generate(self: *Generator) ![]const u8 { - try self.code.appendSlice( - \\section .text - \\ global _start - \\_start: - \\ - ); - for (self.root) |stmt| { - switch (stmt.kind) { - .exit => |exit| try self.genExit(exit), - .defValue => |defValue| try self.genValue(defValue), - .defVar => |defVar| try self.genVar(defVar), - .assignVar => |assign| try self.genAssign(assign), - } - } - return self.code.items; + try switch (self.root.kind) { + .block => |b| { + for (b) |stmt| + try self.genStmt(stmt); + }, + else => error.InvalidTop, + }; + const string: []const u8 = std.mem.span(core.LLVMPrintModuleToString(self.module)); + return string; } }; test "Codegen exit" { const tok = @import("tokenize.zig"); const expect = std.testing.expect; - const src = "exit 120;"; + const main = @import("main.zig"); + + const src = + \\fn main() -> i32 { + \\ return 7; + \\} + ; + const expected = + \\; ModuleID = '_calico_start' + \\source_filename = "_calico_start" + \\ + \\define i32 @main() { + \\entry: + \\ ret i32 7 + \\} + \\ + ; var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); defer tokenizer.deinit(); const toks = try tokenizer.tokenize(); - var parser = parse.Parser.init(std.testing.allocator, toks); + var symbTable: *symb.SymbolTable = try main.initSymbolTable(std.testing.allocator); + defer symbTable.deinit(); + var parser = parse.Parser.init(std.testing.allocator, toks, symbTable); defer parser.deinit(); const parseTree = try parser.parse(); + var pop = symb.Populator.init(std.testing.allocator); + var treeNode = parseTree.asNode(); + try pop.populateSymtable(&treeNode); var gen = Generator.init(std.testing.allocator, parseTree); defer gen.deinit(); const actual = try gen.generate(); - const expected = - \\section .text - \\ global _start - \\_start: - \\ mov rax, 60 - \\ mov rdi, 120 - \\ syscall - \\ - ; try expect(std.mem.eql(u8, actual, expected)); } diff --git a/src/main.zig b/src/main.zig index 53421f4..248640b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -31,7 +31,7 @@ pub fn main() !void { if (err != error.PathAlreadyExists) return err; // Setup native code writer - const outFileName = try getFileName(allocator, out_name, "asm"); + const outFileName = try getFileName(allocator, out_name, "ll"); defer allocator.free(outFileName); const outfile = try std.fs.cwd().createFile(outFileName, .{}); const outWriter = outfile.writer(); @@ -47,11 +47,15 @@ pub fn main() !void { const tokens = try tokenizer.tokenize(); // Parse - var symbTable = try symb.SymbolTable.init(allocator); + var symbTable = try initSymbolTable(allocator); defer symbTable.deinit(); - var parser = parse.Parser.init(allocator, tokens, &symbTable); + + var parser = parse.Parser.init(allocator, tokens, symbTable); defer parser.deinit(); const tree = try parser.parse(); + var pop = symb.Populator.init(allocator); + var treeNode = tree.asNode(); + try pop.populateSymtable(&treeNode); // Codegen var generator = gen.Generator.init(allocator, tree); @@ -59,18 +63,9 @@ pub fn main() !void { const code = try generator.generate(); try outWriter.writeAll(code); - // Run nasm and ld to build the executable - // TODO: switch to qbe or llvm (preferabbly qbe) - const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName }; - const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = allocator }); - defer allocator.free(nasmproc.stdout); - defer allocator.free(nasmproc.stderr); - - const ldFile = try getFileName(allocator, out_name, "o"); - defer allocator.free(ldFile); const binFile = try getFileName(allocator, out_name, ""); defer allocator.free(binFile); - const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile }; + const ldargv = [_][]const u8{ "clang", "-o", binFile, outFileName }; const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = allocator }); defer allocator.free(ldproc.stdout); defer allocator.free(ldproc.stderr); @@ -82,3 +77,10 @@ inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileTy if (fileType.len == 0) hasDot = ""; return try std.fmt.allocPrint(allocator, "calico-out/{s}{s}{s}", .{ out_name, hasDot, fileType }); } + +pub fn initSymbolTable(allocator: std.mem.Allocator) !*symb.SymbolTable { + var table = try symb.SymbolTable.init(allocator); + const intSymb: symb.SymbType = symb.SymbType.Integer; + if (!try table.insert("i32", intSymb.toSymb())) return error.FailedToInsert; + return table; +} diff --git a/src/parser.zig b/src/parser.zig index feaa226..fa0acec 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -11,18 +11,22 @@ const ParsingError = error{ ExpectedEqual, ExpectedIdentifier, InvalidStatement, + UnknownIdentifier, + UnknownToken, + UnexpectedEOF, + ExpectedToken, + OutOfMemory, }; pub const Node = union(enum) { Expr: NodeExpr, Stmt: NodeStmt, - pub fn children(self: Node, allocator: std.mem.Allocator) ![]Node { var childrenArray = std.ArrayList(Node).init(allocator); defer childrenArray.deinit(); switch (self) { - .Expr => |expr| try childrenArray.appendSlice(try expr.children()), - .Stmt => |stmt| try childrenArray.appendSlice(try stmt.children()), + .Expr => |expr| try childrenArray.appendSlice(try expr.children(allocator)), + .Stmt => |stmt| try childrenArray.appendSlice(try stmt.children(allocator)), } return try childrenArray.toOwnedSlice(); } @@ -32,7 +36,7 @@ pub const NodeExpr = struct { id: u32, kind: ExprKind, symtable: *symb.SymbolTable, - typ: ?symb.SymbType, + typ: ?TypeIdent, isConst: bool, pub fn asNode(self: NodeExpr) Node { @@ -49,6 +53,16 @@ pub const NodeExpr = struct { } }; +pub fn map(comptime T: type, comptime F: type, slice: []const F, func: fn (F) T) []const T { + var list: [64]T = undefined; + var max: usize = 0; + for (slice, 0..) |item, i| { + list[i] = func(item); + max = i + 1; + } + return list[0..max]; +} + pub const NodeStmt = struct { id: u32, kind: StmtKind, @@ -62,11 +76,15 @@ pub const NodeStmt = struct { var childrenArray = std.ArrayList(Node).init(allocator); defer childrenArray.deinit(); switch (self.kind) { - .exit => |exit| try childrenArray.append(exit.expr.asNode()), + .exit => |exit| try childrenArray.append(exit.asNode()), .defValue => |value| try childrenArray.append(value.expr.asNode()), .defVar => |variable| try childrenArray.append(variable.expr.asNode()), .assignVar => |assign| try childrenArray.append(assign.expr.asNode()), - else => {}, + .block => |block| { + const blockChildren = map(Node, NodeStmt, block, NodeStmt.asNode); + for (blockChildren) |child| try childrenArray.append(child); + }, + .function => |fun| try childrenArray.append(fun.block.*.asNode()), } return try childrenArray.toOwnedSlice(); } @@ -96,42 +114,113 @@ pub const Parser = struct { } pub fn deinit(self: *Parser) void { + for (self.nodes.items) |node| { + switch (node.kind) { + .block => |blk| self.allocator.free(blk), + .function => |fun| { + self.allocator.free(fun.block.kind.block); + self.allocator.destroy(fun.block); + }, + else => {}, + } + } self.nodes.deinit(); } fn parseExpr(self: *Parser) !NodeExpr { - const kind = try switch (self.tokens.peek().?) { - .intLit => ExprKind{ - .intLit = NodeIntlit{ - .intlit = (try self.tokens.consume(.intLit)).?, + var typ: ?TypeIdent = null; + const kind = try blk: { + try switch (self.tokens.peek().?) { + .intLit => { + typ = TypeIdent{ + .ident = "i32", + .list = false, + }; + break :blk ExprKind{ .intLit = (try self.tokens.consume(.intLit)).? }; }, - }, - .ident => ExprKind{ - .ident = NodeIdent{ - .ident = (try self.tokens.consume(.ident)).?, + .ident => { + const ident = (try self.tokens.consume(.ident)).?; + const symbType = if (self.top.get(ident.ident)) |sym| + sym.Type + else + return ParsingError.UnknownIdentifier; + typ = TypeIdent{ + .ident = symbType.toString(), + .list = false, + }; + break :blk ExprKind{ .ident = ident }; }, - }, - else => ParsingError.InvalidExpression, + else => break :blk ParsingError.InvalidExpression, + }; }; return NodeExpr{ .id = self.reserveId(), .kind = kind, .isConst = kind.isConstant(), - .typ = null, + .typ = typ, .symtable = self.top, }; } - fn parseStmt(self: *Parser) !NodeStmt { + fn parseStmt(self: *Parser) ParsingError!NodeStmt { return switch (self.tokens.peek().?) { .exit => try self.parseExit(), .constant => try self.parseConstant(), .variable => try self.parseVariable(), .ident => try self.parseAssign(), + .fun => try self.parseFunc(), else => ParsingError.InvalidStatement, }; } + fn parseFunc(self: *Parser) ParsingError!NodeStmt { + var typ: ?TypeIdent = null; + _ = try self.tokens.consume(.fun); + const ident = (try self.tokens.consume(.ident)).?; + _ = try self.tokens.consume(.openParen); + //TODO: Argument Parsing + _ = try self.tokens.consume(.closeParen); + if (tok.checkType(self.tokens.peek().?, .arrow)) { + self.tokens.skip(); + typ = TypeIdent{ .ident = (try self.tokens.consume(.ident)).?.ident, .list = false }; + } + + const block = try self.allocator.create(NodeStmt); + block.* = try self.parseBlock(); + + const kind = StmtKind{ + .function = .{ + .ident = ident, + .args = &[_]TypeIdent{}, + .retType = typ, + .block = block, + }, + }; + + return NodeStmt{ + .id = self.reserveId(), + .kind = kind, + .symtable = self.top, + }; + } + + fn parseBlock(self: *Parser) !NodeStmt { + _ = try self.tokens.consume(.openBrace); + var stmtArr = std.ArrayList(NodeStmt).init(self.allocator); + while (!tok.checkType(self.tokens.peek().?, .closeBrace)) + try stmtArr.append(try self.parseStmt()); + _ = try self.tokens.consume(.closeBrace); + const kind = StmtKind{ + .block = try stmtArr.toOwnedSlice(), + }; + + return NodeStmt{ + .id = self.reserveId(), + .kind = kind, + .symtable = try self.top.makeChild(), + }; + } + fn parseAssign(self: *Parser) !NodeStmt { const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); @@ -150,11 +239,11 @@ pub const Parser = struct { }; } - fn parseExit(self: *Parser) !NodeStmt { + fn parseExit(self: *Parser) ParsingError!NodeStmt { _ = try self.tokens.consume(.exit); const expr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); - const kind = StmtKind{ .exit = NodeExit{ .expr = expr } }; + const kind = StmtKind{ .exit = expr }; return NodeStmt{ .symtable = self.top, .kind = kind, @@ -200,14 +289,30 @@ pub const Parser = struct { }; } - pub fn parse(self: *Parser) ![]const NodeStmt { + pub fn parse(self: *Parser) !NodeStmt { while (self.tokens.peek()) |_| try self.nodes.append(try self.parseStmt()); - return self.nodes.items; + return NodeStmt{ + .id = self.reserveId(), + .kind = StmtKind{ .block = self.nodes.items }, + .symtable = self.top, + }; } }; +pub const TypeIdent = struct { + ident: []const u8, + list: bool, +}; + +pub const NodeFunction = struct { + ident: Token, + args: []const TypeIdent, + retType: ?TypeIdent, + block: *NodeStmt, +}; + pub const NodeAssign = struct { ident: Token, expr: NodeExpr, @@ -223,23 +328,18 @@ pub const NodeVar = struct { expr: NodeExpr, }; -pub const NodeExit = struct { - expr: NodeExpr, -}; - -pub const NodeIntlit = struct { - intlit: Token, -}; - -pub const NodeIdent = struct { - ident: Token, -}; +pub const NodeExit = NodeExpr; +pub const NodeIntlit = Token; +pub const NodeIdent = Token; +pub const NodeBlock = []const NodeStmt; pub const StmtKind = union(enum) { + function: NodeFunction, exit: NodeExit, defValue: NodeValue, defVar: NodeVar, assignVar: NodeAssign, + block: NodeBlock, }; pub const ExprKind = union(enum) { @@ -256,24 +356,36 @@ pub const ExprKind = union(enum) { test "Parser" { const expect = std.testing.expect; - const src = "exit 120;"; + const src = "return 120;"; var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); defer tokenizer.deinit(); const toks = try tokenizer.tokenize(); - var parser = Parser.init(std.testing.allocator, toks); + + var symbTable = try symb.SymbolTable.init(std.testing.allocator); + defer symbTable.deinit(); + + var parser = Parser.init(std.testing.allocator, toks, symbTable); defer parser.deinit(); const parseTree = try parser.parse(); - const exp: []const NodeStmt = &[_]NodeStmt{NodeStmt{ - .exit = NodeExit{ - .expr = NodeExpr{ - .intLit = NodeIntlit{ - .intlit = Token{ - .intLit = 120, + const children = try parseTree.children(std.testing.allocator); + defer std.testing.allocator.free(children); + const exp: []const Node = &[_]Node{Node{ + .Stmt = NodeStmt{ + .id = 2, + .symtable = symbTable, + .kind = StmtKind{ + .exit = NodeExpr{ + .id = 1, + .kind = ExprKind{ + .intLit = Token{ .intLit = 120 }, }, + .symtable = symbTable, + .typ = TypeIdent{ .list = false, .ident = "i32" }, + .isConst = true, }, }, }, }}; - for (parseTree, exp) |stmt, expStmt| + for (children, exp) |stmt, expStmt| try expect(std.meta.eql(stmt, expStmt)); } diff --git a/src/symtable.zig b/src/symtable.zig index 0ff8fc8..72eecc7 100644 --- a/src/symtable.zig +++ b/src/symtable.zig @@ -6,7 +6,7 @@ const Scope = struct { symbs: std.StringHashMap(Symbol), }; -const Symbol = union(enum) { +pub const Symbol = union(enum) { Type: SymbType, Value: SymbValue, }; @@ -15,12 +15,28 @@ pub const SymbType = union(enum) { Void, Integer, String, + Function: struct { + input: []const SymbType, + output: *SymbType, + }, + pub fn toSymb(self: SymbType) Symbol { + return Symbol{ .Type = self }; + } + pub fn toString(self: SymbType) []const u8 { + return switch (self) { + .Integer => "i32", + else => "void", + }; + } }; -const SymbValue = struct { +pub const SymbValue = struct { typ: SymbType, id: u32, mut: bool, + pub fn toSymb(self: SymbValue) Symbol { + return Symbol{ .Value = self }; + } }; pub const SymbolTable = struct { @@ -41,19 +57,35 @@ pub const SymbolTable = struct { pub fn deinit(self: *SymbolTable) void { if (self.scope) |scope| { + var iter = scope.symbs.iterator(); + while (iter.next()) |entry| { + switch (entry.value_ptr.*) { + .Type => |t| switch (t) { + .Function => |f| { + self.allocator.destroy(f.output); + self.allocator.free(f.input); + }, + else => {}, + }, + else => {}, + } + } scope.symbs.deinit(); self.allocator.destroy(scope); } + self.allocator.destroy(self); } - pub fn makeChild(self: *SymbolTable) SymbolTable { + pub fn makeChild(self: *SymbolTable) !*SymbolTable { const scope = try self.allocator.create(Scope); - scope.par = self; + scope.par = self.scope; scope.symbs = std.StringHashMap(Symbol).init(self.allocator); - return SymbolTable{ + const stable: *SymbolTable = try self.allocator.create(SymbolTable); + stable.* = .{ .scope = scope, .allocator = self.allocator, }; + return stable; } pub fn parent(self: SymbolTable) ?*SymbolTable { @@ -92,20 +124,30 @@ pub const SymbolTable = struct { return null; } - pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) bool { + pub fn getType(self: *SymbolTable, typ: pars.TypeIdent) ?SymbType { + if (self.get(typ.ident)) |symb| return symb.Type; + return null; + } + + pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) !bool { if (self.scope) |scope| { - if (scope.symbs.getEntry(ident)) return false; - scope.symbs.put(ident, symbol); + if (scope.symbs.getEntry(ident)) |_| return false; + try scope.symbs.put(ident, symbol); return true; } return false; } }; -const Populator = struct { +pub const Populator = struct { id: u32, allocator: std.mem.Allocator, + fn reserveId(self: *Populator) u32 { + defer self.id += 1; + return self.id; + } + pub fn init(allocator: std.mem.Allocator) Populator { return .{ .id = 0, @@ -113,37 +155,88 @@ const Populator = struct { }; } - fn populateSymtable(self: *Populator, node: *pars.Node) void { - switch (node) { + pub fn populateSymtable(self: *Populator, node: *const pars.Node) !void { + switch (node.*) { .Stmt => |stmt| { const table: *SymbolTable = stmt.symtable; - switch (stmt.kind) { + try switch (stmt.kind) { .defVar => |variable| { - const symbol = self.buildValueSymb( + const symbol: Symbol = try self.buildValueSymb( table, - if (variable.expr.typ) |typ| typ else .Integer, + if (variable.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false }, true, ); - table.insert(variable.ident, symbol); + if (!try table.insert(variable.ident.ident, symbol)) return error.FailedToInsert; }, .defValue => |value| { - const symbol = self.buildValueSymb( + const symbol: Symbol = try self.buildValueSymb( table, - if (value.expr.typ) |typ| typ else .Integer, - true, + if (value.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false }, + false, ); - table.insert(value.ident, symbol); + if (!try table.insert(value.ident.ident, symbol)) return error.FailedToInsert; }, - } + .block => { + const children = try stmt.children(self.allocator); + defer self.allocator.free(children); + for (children) |child| try self.populateSymtable(&child); + }, + .function => |fun| { + const symbol: Symbol = try self.buildFunctionSymb( + table, + fun.args, + fun.retType, + ); + if (!try table.insert(fun.ident.ident, symbol)) return error.FailedToInsert; + }, + + .exit => {}, + else => error.Unimplemented, + }; }, else => { - for (node.children(self.allocator)) |child| - populateSymtable(&child); + for (try node.children(self.allocator)) |child| + try self.populateSymtable(&child); }, } } - fn buildValueSymb(self: *Populator, table: *SymbolTable, typ: SymbType, mutable: bool) Symbol { - const newTyp = table.getType(typ); + fn buildFunctionSymb( + self: *Populator, + table: *SymbolTable, + args: []const pars.TypeIdent, + retType: ?pars.TypeIdent, + ) !Symbol { + var inputArr = std.ArrayList(SymbType).init(self.allocator); + for (args) |arg| try inputArr.append(table.getType(arg) orelse SymbType.Void); + const input = try inputArr.toOwnedSlice(); + + const output = try self.allocator.create(SymbType); + output.* = if (retType) |typ| table.getType(typ).? else SymbType.Void; + + return Symbol{ + .Value = SymbValue{ + .mut = true, + .id = self.reserveId(), + .typ = SymbType{ + .Function = .{ + .input = input, + .output = output, + }, + }, + }, + }; + } + + fn buildValueSymb(self: *Populator, table: *SymbolTable, typ: pars.TypeIdent, mutable: bool) !Symbol { + if (table.getType(typ)) |newTyp| { + const value = SymbValue{ + .typ = newTyp, + .id = self.reserveId(), + .mut = mutable, + }; + return value.toSymb(); + } + return error.UnknownType; } }; diff --git a/src/tokenize.zig b/src/tokenize.zig index ad263c2..770f483 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -1,36 +1,57 @@ const std = @import("std"); -const TokenizeError = error{ +pub const TokenizeError = error{ UnknownToken, UnexpectedEOF, + ExpectedToken, }; pub const TokenType = enum { + // Runtime Values ident, intLit, + // Keywords constant, variable, exit, + fun, + // Operators plus, minus, star, slash, semiCol, equal, + // Symbols + openBrace, + closeBrace, + openParen, + closeParen, + arrow, }; pub const Token = union(TokenType) { + //RuntimeVar ident: []const u8, intLit: i32, + // Keywords constant, variable, exit, + fun, + // Operators plus, minus, star, slash, semiCol, equal, + // Symbols + openBrace, + closeBrace, + openParen, + closeParen, + arrow, pub fn fromChar(char: u8) !Token { return switch (char) { @@ -40,15 +61,20 @@ pub const Token = union(TokenType) { '/' => .slash, ';' => .semiCol, '=' => .equal, + '{' => .openBrace, + '}' => .closeBrace, + '(' => .openParen, + ')' => .closeParen, else => TokenizeError.UnknownToken, }; } pub fn fromStr(str: []const u8) Token { const eql = std.mem.eql; - if (eql(u8, str, "exit")) return .exit; + if (eql(u8, str, "return")) return .exit; if (eql(u8, str, "const")) return .constant; if (eql(u8, str, "var")) return .variable; + if (eql(u8, str, "fn")) return .fun; return Token{ .ident = str }; } }; @@ -91,7 +117,7 @@ pub fn Iterator(comptime typ: type) type { pub fn consume(self: *Iterator(typ), comptime expected: TokenType) !?typ { if (typ != Token) return error.TokenIteratorOnly; if (!checkType(self.peek().?, expected)) - return error.ExpectedToken; + return TokenizeError.ExpectedToken; return self.next(); } @@ -134,6 +160,15 @@ pub const Tokenizer = struct { while (self.src.peek()) |char| { try switch (char) { + '-' => { + self.src.skip(); + if (self.src.peek().? != '>') { + try self.toks.append(.minus); + continue; + } + self.src.skip(); + try self.toks.append(.arrow); + }, ' ', '\n', '\t' => self.src.skip(), '0'...'9' => { while (std.ascii.isDigit(self.src.peek().?)) @@ -161,7 +196,7 @@ pub const Tokenizer = struct { test "Tokenize Expression" { const expect = std.testing.expect; - const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5 + variable;"; + const testSource: []const u8 = "return 120 + 150 - 260 * 12 / 5 + variable;"; var tokenizer = Tokenizer.init(std.testing.allocator, testSource); defer tokenizer.deinit(); const tokens = try tokenizer.tokenize(); @@ -219,6 +254,7 @@ test "Tokenize variable" { } } } + test "Tokenize constant" { const expect = std.testing.expect; const testSource: []const u8 = "const five = 5;"; @@ -243,3 +279,43 @@ test "Tokenize constant" { } } } + +test "Tokenize Function" { + const expect = std.testing.expect; + const testSource: []const u8 = + \\fn main() -> i32 { + \\ return 7; + \\} + ; + var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + defer tokenizer.deinit(); + const tokens = try tokenizer.tokenize(); + const expected = &[_]Token{ + .fun, + .{ .ident = "main" }, + .openParen, + .closeParen, + .arrow, + .{ .ident = "i32" }, + .openBrace, + .exit, + .{ .intLit = 7 }, + .semiCol, + .closeBrace, + }; + for (tokens, expected) |act, exp| { + switch (act) { + .ident => |v| try expect(std.mem.eql(u8, exp.ident, v)), + .fun => |v| try expect(v == exp.fun), + .arrow => |v| try expect(v == exp.arrow), + .intLit => |v| try expect(v == exp.intLit), + .exit => |v| try expect(v == exp.exit), + .closeParen => |v| try expect(v == exp.closeParen), + .openParen => |v| try expect(v == exp.openParen), + .openBrace => |v| try expect(v == exp.openBrace), + .closeBrace => |v| try expect(v == exp.closeBrace), + .semiCol => |v| try expect(v == exp.semiCol), + else => try expect(1 == 0), + } + } +} |
