diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/codegen.zig | 304 | ||||
| -rw-r--r-- | src/main.zig | 22 | ||||
| -rw-r--r-- | src/parser.zig | 237 | ||||
| -rw-r--r-- | src/symtable.zig | 104 | ||||
| -rw-r--r-- | src/tokenize.zig | 61 |
5 files changed, 545 insertions, 183 deletions
diff --git a/src/codegen.zig b/src/codegen.zig index e9ec53b..7aff0b6 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -10,12 +10,16 @@ const types = llvm.types; const CodegenError = error{ Immutable, OutOfMemory, + IncorrectType, + UnknownIdentifier, }; -fn toLLVMtype(typ: parse.TypeIdent, sym: *symb.SymbolTable) types.LLVMTypeRef { +fn toLLVMtype(typ: parse.TypeIdent, sym: *symb.SymbolTable, expr: ?parse.NodeExpr) types.LLVMTypeRef { + _ = expr; if (sym.getType(typ)) |t| { return switch (t) { .Integer => core.LLVMInt32Type(), + .String => core.LLVMPointerType(core.LLVMInt8Type(), 0), .Void => core.LLVMVoidType(), else => core.LLVMVoidType(), }; @@ -32,15 +36,16 @@ pub const Generator = struct { currentFunc: ?types.LLVMValueRef, currentFuncIsVoid: bool, references: std.AutoHashMap(u32, types.LLVMValueRef), + stringId: u32, - pub fn init(allocator: std.mem.Allocator, root: parse.NodeStmt) Generator { + pub fn init(allocator: std.mem.Allocator, root: parse.NodeStmt, filename: [*:0]const u8) Generator { _ = target.LLVMInitializeNativeTarget(); _ = target.LLVMInitializeNativeAsmPrinter(); _ = target.LLVMInitializeNativeAsmParser(); const context = core.LLVMContextCreate(); const builder = core.LLVMCreateBuilderInContext(context); - const module = core.LLVMModuleCreateWithNameInContext("_calico_start", context); + const module = core.LLVMModuleCreateWithNameInContext(filename, context); return .{ .root = root, @@ -51,6 +56,7 @@ pub const Generator = struct { .currentFunc = null, .currentFuncIsVoid = false, .references = std.AutoHashMap(u32, types.LLVMValueRef).init(allocator), + .stringId = 0, }; } @@ -75,7 +81,15 @@ pub const Generator = struct { const table = stmt.symtable; const symbol = table.getValue(nodeVar.ident.ident).?; const value = try self.genExpr(nodeVar.expr); - const ptr = try self.genAlloc(toLLVMtype(nodeVar.expr.typ.?, table).?, nodeVar.ident.ident); + // const ptr = try self.genAlloc(toLLVMtype(nodeVar.expr.typ.?, table).?, nodeVar.ident.ident); + const ptr = try self.genAlloc( + toLLVMtype( + nodeVar.expr.typ orelse try nodeVar.expr.symtable.getValue(nodeVar.ident.ident).?.typ.toTypeIdent(self.allocator), + table, + nodeVar.expr, + ).?, + nodeVar.ident.ident, + ); _ = core.LLVMBuildStore(self.builder, value, ptr); try self.references.put(symbol.id, ptr); } @@ -85,7 +99,8 @@ pub const Generator = struct { const table = stmt.symtable; const symbol = table.getValue(nodeVar.ident.ident).?; - const ptr = try self.genAlloc(toLLVMtype(nodeVar.expr.typ.?, table), nodeVar.ident.ident); + // const ptr = try self.genAlloc(toLLVMtype(nodeVar.expr.typ.?, table), nodeVar.ident.ident); + const ptr = try self.genAlloc(toLLVMtype(nodeVar.expr.typ orelse try nodeVar.expr.inferType(self.allocator, table), table, nodeVar.expr), nodeVar.ident.ident); const value = try self.genExpr(nodeVar.expr); _ = core.LLVMBuildStore(self.builder, value, ptr); std.debug.print("\t\t\tGenerated Value {s}\n", .{nodeVar.ident.ident}); @@ -116,7 +131,7 @@ pub const Generator = struct { } fn genAssign(self: *Generator, stmt: parse.NodeStmt) !void { - std.debug.print("assign\n", .{}); + // std.debug.print("assign\n", .{}); const table = stmt.symtable; const symbol = table.get(stmt.kind.assignVar.ident.ident).?; if (!symbol.Value.mut) return CodegenError.Immutable; @@ -130,34 +145,51 @@ pub const Generator = struct { } fn genFunc(self: *Generator, stmt: parse.NodeStmt) !void { + self.references.clearAndFree(); const fun = stmt.kind.function; - const table = stmt.symtable; - const block = fun.block; - const codeSlice = block.kind.block; + var table: *symb.SymbolTable = stmt.symtable; + var block: *parse.NodeStmt = undefined; + var codeSlice: []const parse.NodeStmt = undefined; + if (fun.block != null) { + table = fun.block.?.symtable; + block = fun.block.?; + codeSlice = block.kind.block; + } const funcName: [*:0]const u8 = try self.allocator.dupeZ(u8, fun.ident.ident); - const retType = toLLVMtype(fun.retType.?, table); - var params = [0]types.LLVMTypeRef{}; - const funcType = core.LLVMFunctionType(retType, @ptrCast(¶ms), 0, 0); - const func = core.LLVMAddFunction(self.module, funcName, funcType); - self.currentFunc = func; - self.currentFuncIsVoid = switch (table.getType(fun.retType.?).?) { - .Void => true, - else => false, - }; + const retType = toLLVMtype(fun.retType.?, table, null); + var params = std.ArrayList(types.LLVMTypeRef).init(self.allocator); + for (fun.args) |arg| { + try params.append(toLLVMtype(arg.typ, table, null)); + } - const function: types.LLVMValueRef = self.currentFunc.?; - const codeBlock = core.LLVMAppendBasicBlockInContext(self.context, function, "entry"); - core.LLVMPositionBuilderAtEnd(self.builder, codeBlock); - const bodyTable = block.symtable; - _ = bodyTable; - //TODO: codegen for args + const funcType = core.LLVMFunctionType(retType, @ptrCast(params.items), @intCast(params.items.len), 0); + const func = core.LLVMAddFunction(self.module, funcName, funcType); + for (fun.args, 0..) |arg, i| { + const symbol = table.get(arg.ident).?; + const ptr: types.LLVMValueRef = core.LLVMGetParam(func, @intCast(i)); + try self.references.put(symbol.Value.id, ptr); + } - try self.genBlock(codeSlice); - _ = if (self.currentFuncIsVoid) core.LLVMBuildRetVoid(self.builder); + if (fun.block != null) { + self.currentFunc = func; + self.currentFuncIsVoid = switch (table.getType(fun.retType.?).?) { + .Void => true, + else => false, + }; + + const function: types.LLVMValueRef = func; + const codeBlock = core.LLVMAppendBasicBlockInContext(self.context, function, "entry"); + core.LLVMPositionBuilderAtEnd(self.builder, codeBlock); + const bodyTable = block.symtable; + _ = bodyTable; + + try self.genBlock(codeSlice); + _ = if (self.currentFuncIsVoid) core.LLVMBuildRetVoid(self.builder); + } } - fn genIf(self: *Generator, stmt: parse.NodeStmt) error{ OutOfMemory, Immutable }!void { + fn genIf(self: *Generator, stmt: parse.NodeStmt) CodegenError!void { const ifkind = stmt.kind.ifstmt; const block = ifkind.body; const expr = ifkind.expr; @@ -190,41 +222,41 @@ pub const Generator = struct { .defVar => self.genVar(stmt), .assignVar => self.genAssign(stmt), .ifstmt => self.genIf(stmt), + .expr => |expression| { + _ = try self.genExpr(expression); + }, + else => {}, }; } fn genExpr(self: *Generator, expr: parse.NodeExpr) !types.LLVMValueRef { - std.debug.print("======\n\t\tExpr: {any}\n======\n", .{expr.kind}); return switch (expr.kind) { - .call => |callee| blk: { - const ident: [*:0]const u8 = try self.allocator.dupeZ(u8, callee.ident.ident); - const func = core.LLVMGetNamedFunction(self.module, ident); - const symbol = expr.symtable.get(callee.ident.ident).?; - const retType = asBasicType(symbol.Value.typ.Function.output.*); + .ident => |id| blk: { + // std.debug.print("getValue({s})\n", .{id.ident}); + const table = expr.symtable; - const paramTypes = ([_]types.LLVMTypeRef{})[0..0]; - var args = std.ArrayList(types.LLVMValueRef).init(self.allocator); - for (callee.args.items) |item| - try args.append(try self.genExpr(item)); + // std.debug.print("\n\nEXPERTABLE\n\n", .{}); + // var iterTable = table.scope.?.symbs.iterator(); + // while (iterTable.next()) |entry| { + // // std.debug.print("{s} -> {any}\n", .{ entry.key_ptr.*, entry.value_ptr.* }); + // } + // std.debug.print("\n\nEXPERTABLE\n\n", .{}); + const symbol = table.getValue(id.ident).?; + const ptr = self.references.get(symbol.id).?; + if (core.LLVMIsAArgument(ptr)) |_| + break :blk ptr; - const typ = core.LLVMFunctionType(retType.?, @ptrCast(@constCast(paramTypes)), 0, 0); - const call = core.LLVMBuildCall2( + break :blk core.LLVMBuildLoad2( self.builder, - typ, - func, - @ptrCast(args.items), - 0, - // @intCast(callee.args.items.len), + toLLVMtype( + expr.typ orelse try table.getValue(id.ident).?.typ.toTypeIdent(self.allocator), + table, + expr, + ), + ptr, "", ); - break :blk call; - }, - .ident => blk: { - const table = expr.symtable; - const symbol = table.getValue(expr.kind.ident.ident).?; - const ptr = self.references.get(symbol.id).?; - break :blk core.LLVMBuildLoad2(self.builder, toLLVMtype(expr.typ.?, table), ptr, ""); }, .intLit => |int| core.LLVMConstInt(core.LLVMInt32TypeInContext(self.context), @intCast(int.intLit), 1), .binaryOp => |exp| blk: { @@ -241,6 +273,49 @@ pub const Generator = struct { else => core.LLVMBuildICmp(self.builder, types.LLVMIntPredicate.LLVMIntEQ, lhs, rhs, "eql"), }; }, + .stringLit => |str| blk: { + const vref = core.LLVMAddGlobal( + self.module, + core.LLVMArrayType(core.LLVMInt8Type(), @intCast(str.stringLit.len + 1)), + try self.allocator.dupeZ(u8, try std.fmt.allocPrint( + self.allocator, + ".str.{d}", + .{self.stringId}, + )), + ); + self.stringId += 1; + const sref = core.LLVMConstString(try self.allocator.dupeZ(u8, str.stringLit), @intCast(str.stringLit.len), 0); + core.LLVMSetInitializer(vref, sref); + core.LLVMSetGlobalConstant(vref, 1); + core.LLVMSetLinkage(vref, .LLVMPrivateLinkage); + core.LLVMSetUnnamedAddr(vref, 1); + break :blk vref; + }, + + .call => |call| blk: { + const functype = expr.symtable.getValue(call.ident.ident).?.typ.Function; + const ident = try self.allocator.dupeZ(u8, call.ident.ident); + const function = core.LLVMGetNamedFunction(self.module, ident); + var args = std.ArrayList(types.LLVMValueRef).init(self.allocator); + for (call.args.items, functype.input) |arg, intype| { + if (!std.meta.eql(expr.symtable.getType(arg.typ.?).?, intype)) return CodegenError.IncorrectType; + try args.append(try self.genExpr(arg)); + } + const funcType = core.LLVMGlobalGetValueType(function); + // std.debug.print("FUNCTYPE: {s}\n", .{call.ident.ident}); + + const llvmCall = core.LLVMBuildCall2( + self.builder, + funcType, + function, + @ptrCast(args.items), + @intCast(call.args.items.len), + ident, + ); + // std.debug.print("CALL\n", .{}); + + break :blk llvmCall; + }, }; } @@ -278,23 +353,22 @@ test "Codegen exit" { \\} \\ ; - var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); - defer tokenizer.deinit(); - const toks = try tokenizer.tokenize(); - var symbTable: *symb.SymbolTable = try main.initSymbolTable(std.testing.allocator); - defer symbTable.deinit(); - var parser = parse.Parser.init(std.testing.allocator, toks, symbTable); - defer parser.deinit(); - const parseTree = try parser.parse(); - var pop = symb.Populator.init(std.testing.allocator); - var treeNode = parseTree.asNode(); - try pop.populateSymtable(&treeNode); var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); - var gen = Generator.init(arena.allocator(), parseTree); - defer gen.deinit(); - const actual = try gen.generate(); - try expect(std.mem.eql(u8, actual, expected)); + const allocator = arena.allocator(); + var tokenizer = tok.Tokenizer.init(allocator, src); + defer tokenizer.deinit(); + const tokens = try tokenizer.tokenize(); + const symbTable = try main.initSymbolTable(arena.allocator()); + var parser = parse.Parser.init(arena.allocator(), tokens, symbTable); + const tree = try parser.parse(); + var treeNode = tree.asNode(); + var pop = symb.Populator.init(arena.allocator()); + try pop.populateSymtable(&treeNode); + var generator = Generator.init(arena.allocator(), tree, "_calico_start"); + defer generator.deinit(); + const code = try generator.generate(); + try expect(std.mem.eql(u8, code, expected)); } test "Codegen assign" { @@ -305,7 +379,7 @@ test "Codegen assign" { const src = \\fn main() -> i32 { \\ const testval = 6; - \\ var testvar = testval; + \\ varbl: i32 testvar = testval; \\ testvar = 5; \\ return testvar; \\} @@ -327,21 +401,22 @@ test "Codegen assign" { \\} \\ ; - var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = tok.Tokenizer.init(allocator, src); defer tokenizer.deinit(); - const toks = try tokenizer.tokenize(); - var symbTable: *symb.SymbolTable = try main.initSymbolTable(std.testing.allocator); - defer symbTable.deinit(); - var parser = parse.Parser.init(std.testing.allocator, toks, symbTable); - defer parser.deinit(); - const parseTree = try parser.parse(); - var pop = symb.Populator.init(std.testing.allocator); - var treeNode = parseTree.asNode(); + const tokens = try tokenizer.tokenize(); + const symbTable = try main.initSymbolTable(arena.allocator()); + var parser = parse.Parser.init(arena.allocator(), tokens, symbTable); + const tree = try parser.parse(); + var treeNode = tree.asNode(); + var pop = symb.Populator.init(arena.allocator()); try pop.populateSymtable(&treeNode); - var gen = Generator.init(std.testing.allocator, parseTree); - defer gen.deinit(); - const actual = try gen.generate(); - try expect(std.mem.eql(u8, actual, expected)); + var generator = Generator.init(arena.allocator(), tree, "_calico_start"); + defer generator.deinit(); + const code = try generator.generate(); + try expect(std.mem.eql(u8, code, expected)); } test "Codegen assign constant" { @@ -356,19 +431,62 @@ test "Codegen assign constant" { \\ return testvar; \\} ; - var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = tok.Tokenizer.init(allocator, src); + defer tokenizer.deinit(); + const tokens = try tokenizer.tokenize(); + const symbTable = try main.initSymbolTable(arena.allocator()); + var parser = parse.Parser.init(arena.allocator(), tokens, symbTable); + const tree = try parser.parse(); + var treeNode = tree.asNode(); + var pop = symb.Populator.init(arena.allocator()); + try pop.populateSymtable(&treeNode); + var generator = Generator.init(arena.allocator(), tree, "_calico_start"); + defer generator.deinit(); + const code = generator.generate(); + try std.testing.expectError(CodegenError.Immutable, code); +} +test "Codegen extern fn string" { + const tok = @import("tokenize.zig"); + const expect = std.testing.expect; + const main = @import("main.zig"); + + const src = + \\import fn puts(str: [u8]) -> i32; + \\fn main() -> i32 { + \\ puts("Hello World!"); + \\} + ; + const expected = + \\; ModuleID = '_calico_start' + \\source_filename = "_calico_start" + \\ + \\@.str.0 = private unnamed_addr constant [13 x i8] c"Hello World!\00" + \\ + \\declare i32 @puts(ptr) + \\ + \\define i32 @main() { + \\entry: + \\ %puts = call i32 @puts(ptr @.str.0) + \\} + \\ + ; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = tok.Tokenizer.init(allocator, src); defer tokenizer.deinit(); - const toks = try tokenizer.tokenize(); - var symbTable: *symb.SymbolTable = try main.initSymbolTable(std.testing.allocator); - defer symbTable.deinit(); - var parser = parse.Parser.init(std.testing.allocator, toks, symbTable); - defer parser.deinit(); - const parseTree = try parser.parse(); - var pop = symb.Populator.init(std.testing.allocator); - var treeNode = parseTree.asNode(); + const tokens = try tokenizer.tokenize(); + const symbTable = try main.initSymbolTable(arena.allocator()); + var parser = parse.Parser.init(arena.allocator(), tokens, symbTable); + const tree = try parser.parse(); + var treeNode = tree.asNode(); + var pop = symb.Populator.init(arena.allocator()); try pop.populateSymtable(&treeNode); - var gen = Generator.init(std.testing.allocator, parseTree); - defer gen.deinit(); - const actual = gen.generate(); - try std.testing.expectError(CodegenError.Immutable, actual); + var generator = Generator.init(arena.allocator(), tree, "_calico_start"); + defer generator.deinit(); + const code = try generator.generate(); + try expect(std.mem.eql(u8, code, expected)); } diff --git a/src/main.zig b/src/main.zig index 9e53ecd..b51b170 100644 --- a/src/main.zig +++ b/src/main.zig @@ -49,20 +49,26 @@ pub fn main() !void { const tokens = try tokenizer.tokenize(); // Parse - var symbTable = try initSymbolTable(allocator); - defer symbTable.deinit(); + // var arena = std.heap.ArenaAllocator.init(allocator); + const symbTable = try initSymbolTable(arena.allocator()); - var parser = parse.Parser.init(allocator, tokens, symbTable); - defer parser.deinit(); + var parser = parse.Parser.init(arena.allocator(), tokens, symbTable); const tree = try parser.parse(); var treeNode = tree.asNode(); - var pop = symb.Populator.init(allocator); + var pop = symb.Populator.init(arena.allocator()); try pop.populateSymtable(&treeNode); + // var iter = symbTable.scope.?.symbs.iterator(); + // while (iter.next()) |entry| { + // std.debug.print("{s} -> {any}\n", .{ entry.key_ptr.*, entry.value_ptr.* }); + // } // Codegen - var generator = gen.Generator.init(allocator, tree); + const fname = try allocator.dupeZ(u8, inputFileName.?); + defer allocator.free(fname); + var generator = gen.Generator.init(arena.allocator(), tree, @ptrCast(fname)); defer generator.deinit(); const code = try generator.generate(); + // std.debug.print("{s}\n", .{code}); try outWriter.writeAll(code); const binFile = try getFileName(allocator, out_name, ""); @@ -83,6 +89,10 @@ inline fn getFileName(allocator: std.mem.Allocator, out_name: []const u8, fileTy pub fn initSymbolTable(allocator: std.mem.Allocator) !*symb.SymbolTable { var table = try symb.SymbolTable.init(allocator); const intSymb: symb.SymbType = symb.SymbType.Integer; + const charSymb: symb.SymbType = symb.SymbType.Character; + const strSymb: symb.SymbType = symb.SymbType.String; if (!try table.insert("i32", intSymb.toSymb())) return error.FailedToInsert; + if (!try table.insert("u8", charSymb.toSymb())) return error.FailedToInsert; + if (!try table.insert("[u8]", strSymb.toSymb())) return error.FailedToInsert; return table; } diff --git a/src/parser.zig b/src/parser.zig index 46d148e..66d02a2 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -16,8 +16,20 @@ const ParsingError = error{ UnexpectedEOF, ExpectedToken, OutOfMemory, + TokenIteratorOnly, + TypeRequiredForVarbl, }; +const TypeError = error{ + OutOfMemory, + IncorrectType, + UnknownIdentifier, +}; + +fn errcast(err: anytype) ParsingError { + return err[0]; +} + pub const Node = union(enum) { Expr: NodeExpr, Stmt: NodeStmt, @@ -51,6 +63,18 @@ pub const NodeExpr = struct { } return try childrenArray.toOwnedSlice(); } + pub fn inferType(self: NodeExpr, allocator: std.mem.Allocator, table: *symb.SymbolTable) TypeError!TypeIdent { + const expectedType = try switch (self.kind) { + .call => |call| if (table.getValue(call.ident.ident)) |symbol| try symbol.typ.Function.output.toTypeIdent(allocator) else TypeError.UnknownIdentifier, + .ident => |id| if (table.getValue(id.ident)) |symbol| try symbol.typ.toTypeIdent(allocator) else TypeError.UnknownIdentifier, + .intLit => TypeIdent{ .ident = "i32", .list = false }, + .binaryOp => TypeIdent{ .ident = "i32", .list = false }, + .stringLit => TypeIdent{ .ident = "[u8]", .list = true }, + }; + if (self.typ) |typ| { + return if (std.mem.eql(u8, typ.ident, expectedType.ident)) expectedType else TypeError.IncorrectType; + } else return expectedType; + } }; pub fn map(comptime T: type, comptime F: type, slice: []const F, func: fn (F) T) []const T { @@ -84,8 +108,9 @@ pub const NodeStmt = struct { const blockChildren = map(Node, NodeStmt, block, NodeStmt.asNode); for (blockChildren) |child| try childrenArray.append(child); }, - .function => |fun| try childrenArray.append(fun.block.*.asNode()), .ifstmt => |ifstmt| try childrenArray.append(ifstmt.body.*.asNode()), + .function => |fun| if (fun.block == null) {} else try childrenArray.append(fun.block.?.asNode()), + .expr => |expr| try childrenArray.append(expr.asNode()), } return try childrenArray.toOwnedSlice(); } @@ -127,7 +152,9 @@ pub const Parser = struct { const children = try node.children(self.allocator); defer self.allocator.free(children); for (children) |child| try self.dinitHelper(child.Stmt); - self.allocator.destroy(fun.block); + self.allocator.free(fun.args); + if (fun.block != null) + self.allocator.destroy(fun.block.?); }, else => {}, } @@ -175,15 +202,42 @@ pub const Parser = struct { break :blk ExprKind{ .intLit = (try self.tokens.consume(.intLit)).? }; }, .ident => { + isIdent = true; const ident = (try self.tokens.consume(.ident)).?; + if (tok.checkType(self.tokens.peek().?, .openParen)) { + _ = try self.tokens.consume(.openParen); + + break :blk ExprKind{ + .call = .{ + .ident = ident, + .args = innerblk: { + var argExprs = std.ArrayList(NodeExpr).init(self.allocator); + while (!tok.checkType(self.tokens.peek().?, .closeParen)) { + try argExprs.append(try self.parseExpr()); + if (tok.checkType(self.tokens.peek().?, .closeParen)) break; + _ = try self.tokens.consume(.comma); + } + _ = try self.tokens.consume(.closeParen); + + break :innerblk try argExprs.clone(); + }, + }, + }; + } + typ = null; + break :blk ExprKind{ .ident = ident }; + }, + .stringLit => { typ = TypeIdent{ - .ident = "i32", - .list = false, + .ident = "[u8]", + .list = true, }; - isIdent = true; - break :blk ExprKind{ .ident = ident }; + break :blk ExprKind{ .stringLit = (try self.tokens.consume(.stringLit)).? }; + }, + else => |expr| break :blk errorblk: { + std.debug.print("Invalid Expression: {any}\n", .{expr}); + break :errorblk ParsingError.InvalidExpression; }, - else => break :blk ParsingError.InvalidExpression, }; }; @@ -243,10 +297,25 @@ pub const Parser = struct { .exit => try self.parseExit(), .constant => try self.parseConstant(), .variable => try self.parseVariable(), - .ident => try self.parseAssign(), - .fun => try self.parseFunc(), - .openBrace => try self.parseBlock(), - else => ParsingError.InvalidStatement, + .ident => blk: { + if (!tok.checkType(self.tokens.peekAhead(1).?, .openParen)) + break :blk try self.parseAssign(); + break :blk try self.parseExprStmt(); + }, + .openBrace => self.parseBlock(), + .fun => try self.parseFunc(false), + .import => try self.parseFunc(true), + else => self.parseExprStmt(), + }; + } + + fn parseExprStmt(self: *Parser) ParsingError!NodeStmt { + const kind: StmtKind = StmtKind{ .expr = try self.parseExpr() }; + _ = try self.tokens.consume(.semiCol); + return NodeStmt{ + .id = self.reserveId(), + .symtable = self.top, + .kind = kind, }; } @@ -272,17 +341,46 @@ pub const Parser = struct { .symtable = self.top, }; } - - fn parseFunc(self: *Parser) ParsingError!NodeStmt { + fn parseFunc(self: *Parser, external: bool) ParsingError!NodeStmt { + if (external) _ = try self.tokens.consume(.import); var typ: ?TypeIdent = null; _ = try self.tokens.consume(.fun); const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.openParen); - //TODO: Argument Parsing + var args = std.ArrayList(FunctionArg).init(self.allocator); + defer args.deinit(); + while (!tok.checkType(self.tokens.peek().?, .closeParen)) { + const argIdent: Token = (try self.tokens.consume(.ident)).?; + _ = try self.tokens.consume(.colon); + const argTypIdent: TypeIdent = try self.parseType(); + const funcArg: FunctionArg = .{ + .ident = argIdent.ident, + .typ = argTypIdent, + }; + try args.append(funcArg); + if (!tok.checkType(self.tokens.peek().?, .comma)) break; + _ = try self.tokens.consume(.comma); + } _ = try self.tokens.consume(.closeParen); if (tok.checkType(self.tokens.peek().?, .arrow)) { self.tokens.skip(); - typ = TypeIdent{ .ident = (try self.tokens.consume(.ident)).?.ident, .list = false }; + typ = try self.parseType(); + } + + if (external) { + _ = try self.tokens.consume(.semiCol); + return NodeStmt{ + .id = self.reserveId(), + .kind = StmtKind{ + .function = .{ + .ident = ident, + .args = try args.toOwnedSlice(), + .retType = typ, + .block = null, + }, + }, + .symtable = self.top, + }; } const block = try self.allocator.create(NodeStmt); @@ -291,7 +389,7 @@ pub const Parser = struct { const kind = StmtKind{ .function = .{ .ident = ident, - .args = &[_]TypeIdent{}, + .args = try args.toOwnedSlice(), .retType = typ, .block = block, }, @@ -304,15 +402,14 @@ pub const Parser = struct { }; } - fn parseBlock(self: *Parser) !NodeStmt { + fn parseBlock(self: *Parser) ParsingError!NodeStmt { _ = try self.tokens.consume(.openBrace); var stmtArr = std.ArrayList(NodeStmt).init(self.allocator); - const top = self.top; const child = try self.top.makeChild(); self.top = child; while (!tok.checkType(self.tokens.peek().?, .closeBrace)) try stmtArr.append(try self.parseStmt()); - self.top = top; + self.top = self.top.parent().?; _ = try self.tokens.consume(.closeBrace); const kind = StmtKind{ .block = try stmtArr.toOwnedSlice(), @@ -325,7 +422,7 @@ pub const Parser = struct { }; } - fn parseAssign(self: *Parser) !NodeStmt { + fn parseAssign(self: *Parser) ParsingError!NodeStmt { const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); const expr = try self.parseExpr(); @@ -344,7 +441,7 @@ pub const Parser = struct { } fn parseExit(self: *Parser) ParsingError!NodeStmt { - _ = try self.tokens.consume(.exit); + _ = self.tokens.consume(.exit) catch |err| return errcast(.{err}); const expr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); const kind = StmtKind{ .exit = expr }; @@ -355,8 +452,16 @@ pub const Parser = struct { }; } - fn parseVariable(self: *Parser) !NodeStmt { + fn parseVariable(self: *Parser) ParsingError!NodeStmt { _ = try self.tokens.consume(.variable); + var typ: ?TypeIdent = null; + _ = self.tokens.consume(.colon) catch { + return ParsingError.TypeRequiredForVarbl; + }; + typ = .{ + .ident = (try self.tokens.consume(.ident)).?.ident, + .list = false, + }; const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); const expr = try self.parseExpr(); @@ -364,7 +469,13 @@ pub const Parser = struct { const kind = StmtKind{ .defVar = NodeVar{ .ident = ident, - .expr = expr, + .expr = NodeExpr{ + .typ = typ, + .id = expr.id, + .kind = expr.kind, + .isConst = expr.isConst, + .symtable = expr.symtable, + }, }, }; return NodeStmt{ @@ -374,21 +485,46 @@ pub const Parser = struct { }; } - fn parseConstant(self: *Parser) !NodeStmt { + fn parseType(self: *Parser) ParsingError!TypeIdent { + const list = tok.checkType(self.tokens.peek().?, .openBracket); + if (list) { + _ = try self.tokens.consume(.openBracket); + const typ = (try self.parseType()).ident; + _ = try self.tokens.consume(.closeBracket); + return .{ + .ident = try std.fmt.allocPrint(self.allocator, "[{s}]", .{typ}), + .list = true, + }; + } + return .{ + .ident = (try self.tokens.consume(.ident)).?.ident, + .list = false, + }; + } + + fn parseConstant(self: *Parser) ParsingError!NodeStmt { _ = try self.tokens.consume(.constant); var typ: ?TypeIdent = null; - if ((self.tokens.peek().?) == .colon) { - _ = try self.tokens.consume(.colon); - typ = .{ .ident = (try self.tokens.consume(.ident)).?.ident, .list = false }; - } + _ = if (self.tokens.consume(.colon)) |_| { + typ = try self.parseType(); + } else |err| { + if (err != tok.TokenizeError.ExpectedToken) return err; + }; + const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); - const expr = try self.parseExpr(); + const expr: NodeExpr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); const kind = StmtKind{ .defValue = NodeValue{ .ident = ident, - .expr = expr, + .expr = NodeExpr{ + .typ = typ orelse expr.typ, + .id = expr.id, + .kind = expr.kind, + .isConst = expr.isConst, + .symtable = expr.symtable, + }, }, }; return NodeStmt{ @@ -415,11 +551,16 @@ pub const TypeIdent = struct { list: bool, }; +pub const FunctionArg = struct { + ident: []const u8, + typ: TypeIdent, +}; + pub const NodeFunction = struct { ident: Token, - args: []const TypeIdent, + args: []const FunctionArg, retType: ?TypeIdent, - block: *NodeStmt, + block: ?*NodeStmt, }; pub const NodeAssign = struct { @@ -444,11 +585,13 @@ pub const NodeIf = struct { pub const NodeExit = NodeExpr; pub const NodeIntlit = Token; +pub const NodeStringlit = Token; pub const NodeIdent = Token; pub const NodeBlock = []const NodeStmt; pub const NodeBinOp = Token; + pub const NodeCall = struct { - ident: NodeIdent, + ident: Token, args: std.ArrayList(NodeExpr), }; @@ -460,10 +603,12 @@ pub const StmtKind = union(enum) { defVar: NodeVar, assignVar: NodeAssign, block: NodeBlock, + expr: NodeExpr, }; pub const ExprKind = union(enum) { intLit: NodeIntlit, + stringLit: NodeStringlit, ident: NodeIdent, call: NodeCall, binaryOp: struct { @@ -475,26 +620,30 @@ pub const ExprKind = union(enum) { pub fn isConstant(self: ExprKind) bool { return switch (self) { .intLit => true, + .stringLit => true, .ident => false, + .call => false, }; } }; test "Parser" { + const main = @import("main.zig"); const expect = std.testing.expect; const src = "return 120;"; - var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = tok.Tokenizer.init(allocator, src); defer tokenizer.deinit(); - const toks = try tokenizer.tokenize(); - - var symbTable = try symb.SymbolTable.init(std.testing.allocator); - defer symbTable.deinit(); - - var parser = Parser.init(std.testing.allocator, toks, symbTable); - defer parser.deinit(); - const parseTree = try parser.parse(); - const children = try parseTree.children(std.testing.allocator); - defer std.testing.allocator.free(children); + const tokens = try tokenizer.tokenize(); + const symbTable = try main.initSymbolTable(arena.allocator()); + var parser = Parser.init(arena.allocator(), tokens, symbTable); + const tree = try parser.parse(); + var treeNode = tree.asNode(); + var pop = symb.Populator.init(arena.allocator()); + try pop.populateSymtable(&treeNode); + const children = try treeNode.children(allocator); const exp: []const Node = &[_]Node{Node{ .Stmt = NodeStmt{ .id = 2, diff --git a/src/symtable.zig b/src/symtable.zig index 8a5bd8a..3017670 100644 --- a/src/symtable.zig +++ b/src/symtable.zig @@ -14,6 +14,7 @@ pub const Symbol = union(enum) { pub const SymbType = union(enum) { Void, Integer, + Character, String, Function: struct { input: []const SymbType, @@ -22,12 +23,38 @@ pub const SymbType = union(enum) { pub fn toSymb(self: SymbType) Symbol { return Symbol{ .Type = self }; } - pub fn toString(self: SymbType) []const u8 { + pub fn toString(self: SymbType, allocator: std.mem.Allocator) error{OutOfMemory}![]const u8 { return switch (self) { .Integer => "i32", + .Character => "u8", + .String => "[u8]", + .Function => |fun| blk: { + const output = try fun.output.toString(allocator); + var argstring: []const u8 = ""; + if (fun.input.len != 0) { + argstring = try fun.input[0].toString(allocator); + for (1..fun.input.len) |i| { + const inputTyp = [_][]const u8{ + argstring, + try fun.input[i].toString(allocator), + }; + argstring = try std.mem.join(allocator, ", ", &inputTyp); + } + } + break :blk try std.fmt.allocPrint(allocator, "fn({s})->{s}", .{ argstring, output }); + }, else => "void", }; } + pub fn toTypeIdent(self: SymbType, allocator: std.mem.Allocator) error{OutOfMemory}!pars.TypeIdent { + return pars.TypeIdent{ + .ident = try self.toString(allocator), + .list = switch (self) { + .String => true, + else => false, + }, + }; + } }; pub const SymbValue = struct { @@ -40,15 +67,17 @@ pub const SymbValue = struct { }; pub const SymbolTable = struct { + par: ?*SymbolTable, scope: ?*Scope = null, allocator: std.mem.Allocator, - pub fn init(allocator: std.mem.Allocator) !*SymbolTable { + pub fn init(allocator: std.mem.Allocator) error{OutOfMemory}!*SymbolTable { const scope = try allocator.create(Scope); scope.par = null; scope.symbs = std.StringHashMap(Symbol).init(allocator); const table = try allocator.create(SymbolTable); table.* = SymbolTable{ + .par = null, .scope = scope, .allocator = allocator, }; @@ -76,23 +105,22 @@ pub const SymbolTable = struct { self.allocator.destroy(self); } - pub fn makeChild(self: *SymbolTable) !*SymbolTable { + pub fn makeChild(self: *SymbolTable) error{OutOfMemory}!*SymbolTable { const scope = try self.allocator.create(Scope); scope.par = self.scope; - scope.symbs = std.StringHashMap(Symbol).init(self.allocator); - // scope.symbs = self.scope.?.symbs; + scope.symbs = try self.scope.?.symbs.clone(); const stable: *SymbolTable = try self.allocator.create(SymbolTable); stable.* = .{ + .par = self, .scope = scope, .allocator = self.allocator, }; return stable; } - pub fn parent(self: SymbolTable) ?*Scope { - if (self.scope) |scope| - if (scope.par) |par| - return par; + pub fn parent(self: SymbolTable) ?*SymbolTable { + if (self.par) |par| + return par; return null; } @@ -102,8 +130,7 @@ pub const SymbolTable = struct { } pub fn get(self: SymbolTable, ident: []const u8) ?Symbol { - if (!self.contains(ident)) if (self.parent()) |par| return par.symbs.get(ident); - if (self.scope) |scope| return scope.symbs.get(ident); + if (self.scope) |scope| return scope.symbs.get(ident) orelse if (self.parent()) |par| par.get(ident) else null; return null; } @@ -132,6 +159,7 @@ pub const SymbolTable = struct { } pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) !bool { + // std.debug.print("Inserted {s} as {any}\n", .{ ident, symbol }); if (self.scope) |scope| { if (scope.symbs.getEntry(ident)) |_| return false; try scope.symbs.put(ident, symbol); @@ -152,7 +180,7 @@ pub const Populator = struct { pub fn init(allocator: std.mem.Allocator) Populator { return .{ - .id = 0, + .id = 1, .allocator = allocator, }; } @@ -162,10 +190,10 @@ pub const Populator = struct { .Stmt => |stmt| { const table: *SymbolTable = stmt.symtable; switch (stmt.kind) { - .defVar => |variable| { + .defVar => |*variable| { const symbol: Symbol = try self.buildValueSymb( table, - if (variable.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false }, + try variable.expr.inferType(self.allocator, table), true, ); if (!try table.insert(variable.ident.ident, symbol)) return error.FailedToInsert; @@ -173,7 +201,7 @@ pub const Populator = struct { .defValue => |value| { const symbol: Symbol = try self.buildValueSymb( table, - if (value.expr.typ) |typ| typ else pars.TypeIdent{ .ident = "i32", .list = false }, + try value.expr.inferType(self.allocator, table), false, ); std.debug.print("Value: {s}\nSymbol: {any}\n", .{ value.ident.ident, symbol }); @@ -182,23 +210,34 @@ pub const Populator = struct { .block => { const children = try stmt.children(self.allocator); defer self.allocator.free(children); - for (children) |child| { - try self.populateSymtable(&child); - } + for (children) |child| try self.populateSymtable(&child); }, .function => |fun| { + const bodyTable = if (fun.block == null) stmt.symtable else fun.block.?.symtable; const symbol: Symbol = try self.buildFunctionSymb( - table, + bodyTable, fun.args, fun.retType, ); + if (!try table.insert(fun.ident.ident, symbol)) return error.FailedToInsert; - const children = try stmt.children(self.allocator); - defer self.allocator.free(children); - for (children) |child| try self.populateSymtable(&child); + if (fun.block == null) return; + const block = fun.block.?.asNode(); + try self.populateSymtable(&block); + }, + .assignVar => |assign| _ = { + const expectedType = try if (table.getValue(assign.ident.ident)) |symbol| try symbol.typ.toTypeIdent(self.allocator) else error.UnknownIdentifier; + const actualType = try assign.expr.inferType(self.allocator, table); + if (!std.mem.eql(u8, actualType.ident, expectedType.ident)) return error.IncorrectType; }, + .exit => |exit| _ = try exit.inferType(self.allocator, table), + .expr => {}, + .ifstmt => for (try stmt.children(self.allocator)) |c| try self.populateSymtable(&c), - else => {}, + // else => |unim| return errorblk: { + // std.debug.print("Error: Unimplemented: {any}\n", .{unim}); + // break :errorblk error.Unimplemented; + // }, } }, else => { @@ -211,11 +250,16 @@ pub const Populator = struct { fn buildFunctionSymb( self: *Populator, table: *SymbolTable, - args: []const pars.TypeIdent, + args: []const pars.FunctionArg, retType: ?pars.TypeIdent, ) !Symbol { var inputArr = std.ArrayList(SymbType).init(self.allocator); - for (args) |arg| try inputArr.append(table.getType(arg) orelse SymbType.Void); + for (args) |arg| { + // std.debug.print("{s}: {s}\n", .{ arg.ident, arg.typ.ident }); + const argSymb = try self.buildValueSymb(table, arg.typ, false); + if (!try table.insert(arg.ident, argSymb)) return error.FailedToInsert; + try inputArr.append(table.getType(arg.typ) orelse SymbType.Void); + } const input = try inputArr.toOwnedSlice(); const output = try self.allocator.create(SymbType); @@ -227,8 +271,15 @@ pub const Populator = struct { }, }; const id = self.reserveId(); + var argstring: []const u8 = ""; + if (input.len != 0) { + argstring = try input[0].toString(self.allocator); + for (1..input.len) |i| + argstring = try std.mem.join(self.allocator, ",", &[_][]const u8{ argstring, try input[i].toString(self.allocator) }); + } - _ = try table.insert("func_" ++ .{@as(u8, @truncate(id))}, typ.toSymb()); + const name = try std.fmt.allocPrint(self.allocator, "fn({s})->{s}", .{ argstring, try output.toString(self.allocator) }); + _ = try table.insert(name, typ.toSymb()); return Symbol{ .Value = SymbValue{ @@ -248,7 +299,6 @@ pub const Populator = struct { }; return value.toSymb(); } - std.debug.print("Unknown Type: {s}\n", .{typ.ident}); return error.UnknownType; } }; diff --git a/src/tokenize.zig b/src/tokenize.zig index 252bca4..2c536db 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -4,12 +4,15 @@ pub const TokenizeError = error{ UnknownToken, UnexpectedEOF, ExpectedToken, + TokenIteratorOnly, }; pub const TokenType = enum { // Runtime Values ident, + stringLit, intLit, + charLit, // Keywords ifstmt, constant, @@ -32,6 +35,8 @@ pub const TokenType = enum { closeBrace, openParen, closeParen, + openBracket, + closeBracket, arrow, colon, comma, @@ -40,7 +45,9 @@ pub const TokenType = enum { pub const Token = union(TokenType) { //RuntimeVar ident: []const u8, + stringLit: []const u8, intLit: i32, + charLit: u8, // Keywords ifstmt, constant, @@ -63,6 +70,8 @@ pub const Token = union(TokenType) { closeBrace, openParen, closeParen, + openBracket, + closeBracket, arrow, colon, comma, @@ -83,9 +92,11 @@ pub const Token = union(TokenType) { '<' => .lessthan, '>' => .greaterthan, ':' => .colon, - else => blk: { - std.debug.print("Invalid char: {c}\n", .{char}); - break :blk TokenizeError.UnknownToken; + '[' => .openBracket, + ']' => .closeBracket, + else => { + // std.debug.print("{c}: ", .{char}); + return TokenizeError.UnknownToken; }, }; } @@ -94,7 +105,7 @@ pub const Token = union(TokenType) { const eql = std.mem.eql; if (eql(u8, str, "return")) return .exit; if (eql(u8, str, "const")) return .constant; - if (eql(u8, str, "var")) return .variable; + if (eql(u8, str, "varbl")) return .variable; if (eql(u8, str, "fn")) return .fun; if (eql(u8, str, "if")) return .ifstmt; if (eql(u8, str, "import")) return .import; @@ -137,10 +148,9 @@ pub fn Iterator(comptime typ: type) type { return ret; } - pub fn consume(self: *Iterator(typ), comptime expected: TokenType) !?typ { - if (typ != Token) return error.TokenIteratorOnly; + pub fn consume(self: *Iterator(typ), comptime expected: TokenType) error{ ExpectedToken, TokenIteratorOnly }!?typ { + if (typ != Token) return TokenizeError.TokenIteratorOnly; if (!checkType(self.peek().?, expected)) { - std.debug.print("Expected {any} got {any} \n", .{ expected, self.peek().? }); return TokenizeError.ExpectedToken; } return self.next(); @@ -172,8 +182,10 @@ pub const Tokenizer = struct { /// Releases allocated memory pub fn deinit(self: *Tokenizer) void { for (self.toks.items) |token| { - if (checkType(token, TokenType.ident)) + if (checkType(token, .ident)) self.allocator.free(token.ident); + if (checkType(token, .stringLit)) + self.allocator.free(token.stringLit); } self.toks.deinit(); } @@ -221,6 +233,17 @@ pub const Tokenizer = struct { if (!checkType(token, TokenType.ident)) self.allocator.free(str); buff.clearAndFree(); }, + '"' => { + _ = self.src.next(); + while (self.src.peek().? != '"') + try buff.append(self.src.next().?); + + _ = self.src.next(); + // std.debug.print("{c}\n", .{self.src.peek().?}); + const token = Token{ .stringLit = try buff.toOwnedSlice() }; + try self.toks.append(token); + buff.clearAndFree(); + }, else => self.toks.append(try Token.fromChar(self.src.next().?)), }; } @@ -231,7 +254,10 @@ pub const Tokenizer = struct { test "Tokenize Expression" { const expect = std.testing.expect; const testSource: []const u8 = "return 120 + 150 - 260 * 12 / 5 + variable;"; - var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = Tokenizer.init(allocator, testSource); defer tokenizer.deinit(); const tokens = try tokenizer.tokenize(); const expected = &[_]Token{ @@ -266,8 +292,11 @@ test "Tokenize Expression" { test "Tokenize variable" { const expect = std.testing.expect; - const testSource: []const u8 = "var five = 5;"; - var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + const testSource: []const u8 = "varbl five = 5;"; + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = Tokenizer.init(allocator, testSource); defer tokenizer.deinit(); const tokens = try tokenizer.tokenize(); const expected = &[_]Token{ @@ -292,7 +321,10 @@ test "Tokenize variable" { test "Tokenize constant" { const expect = std.testing.expect; const testSource: []const u8 = "const five = 5;"; - var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = Tokenizer.init(allocator, testSource); defer tokenizer.deinit(); const tokens = try tokenizer.tokenize(); const expected = &[_]Token{ @@ -321,7 +353,10 @@ test "Tokenize Function" { \\ return 7; \\} ; - var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + var tokenizer = Tokenizer.init(allocator, testSource); defer tokenizer.deinit(); const tokens = try tokenizer.tokenize(); const expected = &[_]Token{ |
