From 9334af0c76c9410b3d0028febd4b1972ab7d4d7e Mon Sep 17 00:00:00 2001 From: Nic Gaffney Date: Mon, 29 Jul 2024 06:30:47 -0500 Subject: Started symbol table and restructured AST --- src/codegen.zig | 49 ++++++++---- src/main.zig | 5 +- src/parser.zig | 231 ++++++++++++++++++++++++++++++++++++++++--------------- src/symtable.zig | 149 +++++++++++++++++++++++++++++++++++ 4 files changed, 355 insertions(+), 79 deletions(-) create mode 100644 src/symtable.zig (limited to 'src') diff --git a/src/codegen.zig b/src/codegen.zig index 98501c2..a1df34f 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -18,37 +18,51 @@ pub const Generator = struct { self.code.deinit(); } - fn genExit(self: *Generator, expr: parse.NodeExpr) !void { + fn genExit(self: *Generator, exit: parse.NodeExit) !void { + const expr = exit.expr; const newCode = - switch (expr) { - .intLit => try std.fmt.allocPrint(self.allocator, + switch (expr.kind) { + .intLit => |intlit| try std.fmt.allocPrint(self.allocator, \\ mov rax, 60 \\ mov rdi, {d} \\ syscall \\ , .{ - expr.intLit.intlit.intLit, + intlit.intlit.intLit, }), - .ident => try std.fmt.allocPrint(self.allocator, + .ident => |ident| try std.fmt.allocPrint(self.allocator, \\ mov rax, 60 \\ mov rdi, [{s}] \\ syscall \\ , .{ - expr.ident.ident.ident, + ident.ident.ident, }), }; try self.code.appendSlice(newCode); self.allocator.free(newCode); } + fn genVar(self: *Generator, value: parse.NodeVar) !void { + const str = try std.fmt.allocPrint(self.allocator, + \\section .data + \\ {s}: dw {d} + \\ + , .{ value.ident.ident, switch (value.expr.kind) { + .intLit => |intlit| intlit.intlit.intLit, + else => return error.NotImplemented, + } }); + defer self.allocator.free(str); + try self.code.insertSlice(0, str); + } + fn genValue(self: *Generator, value: parse.NodeValue) !void { const str = try std.fmt.allocPrint(self.allocator, \\section .data \\ {s}: dw {d} \\ - , .{ value.ident.ident, switch (value.value) { - .intLit => value.value.intLit.intlit.intLit, + , .{ value.ident.ident, switch (value.expr.kind) { + .intLit => |intlit| intlit.intlit.intLit, else => return error.NotImplemented, } }); defer self.allocator.free(str); @@ -57,21 +71,21 @@ pub const Generator = struct { fn genAssign(self: *Generator, assign: parse.NodeAssign) !void { const newCode = - switch (assign.value) { - .intLit => try std.fmt.allocPrint(self.allocator, + switch (assign.expr.kind) { + .intLit => |intlit| try std.fmt.allocPrint(self.allocator, \\ mov rax, {d} \\ mov [{s}], rax \\ , .{ - assign.value.intLit.intlit.intLit, + intlit.intlit.intLit, assign.ident.ident, }), - .ident => try std.fmt.allocPrint(self.allocator, + .ident => |ident| try std.fmt.allocPrint(self.allocator, \\ mov rax, [{s}] \\ mov [{s}], rax \\ , .{ - assign.value.ident.ident.ident, + ident.ident.ident, assign.ident.ident, }), }; @@ -87,10 +101,11 @@ pub const Generator = struct { \\ ); for (self.root) |stmt| { - switch (stmt) { - .exit => try self.genExit(stmt.exit.expr), - .value => try self.genValue(stmt.value), - .assign => try self.genAssign(stmt.assign), + switch (stmt.kind) { + .exit => |exit| try self.genExit(exit), + .defValue => |defValue| try self.genValue(defValue), + .defVar => |defVar| try self.genVar(defVar), + .assignVar => |assign| try self.genAssign(assign), } } return self.code.items; diff --git a/src/main.zig b/src/main.zig index 14bcfad..53421f4 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,6 +2,7 @@ const std = @import("std"); const tok = @import("tokenize.zig"); const parse = @import("parser.zig"); const gen = @import("codegen.zig"); +const symb = @import("symtable.zig"); pub fn main() !void { if (std.os.argv.len < 2) { @@ -46,7 +47,9 @@ pub fn main() !void { const tokens = try tokenizer.tokenize(); // Parse - var parser = parse.Parser.init(allocator, tokens); + var symbTable = try symb.SymbolTable.init(allocator); + defer symbTable.deinit(); + var parser = parse.Parser.init(allocator, tokens, &symbTable); defer parser.deinit(); const tree = try parser.parse(); diff --git a/src/parser.zig b/src/parser.zig index df0f068..feaa226 100644 --- a/src/parser.zig +++ b/src/parser.zig @@ -1,5 +1,6 @@ const std = @import("std"); const tok = @import("tokenize.zig"); +const symb = @import("symtable.zig"); const Iterator = tok.Iterator; const Token = tok.Token; @@ -12,50 +13,85 @@ const ParsingError = error{ InvalidStatement, }; -pub const NodeExpr = union(enum) { - intLit: NodeIntlit, - ident: NodeIdent, -}; - -pub const NodeStmt = union(enum) { - exit: NodeExit, - value: NodeValue, - assign: NodeAssign, -}; +pub const Node = union(enum) { + Expr: NodeExpr, + Stmt: NodeStmt, -pub const NodeAssign = struct { - ident: Token, - value: NodeExpr, + pub fn children(self: Node, allocator: std.mem.Allocator) ![]Node { + var childrenArray = std.ArrayList(Node).init(allocator); + defer childrenArray.deinit(); + switch (self) { + .Expr => |expr| try childrenArray.appendSlice(try expr.children()), + .Stmt => |stmt| try childrenArray.appendSlice(try stmt.children()), + } + return try childrenArray.toOwnedSlice(); + } }; -pub const NodeValue = struct { - ident: Token, - value: NodeExpr, +pub const NodeExpr = struct { + id: u32, + kind: ExprKind, + symtable: *symb.SymbolTable, + typ: ?symb.SymbType, isConst: bool, -}; -pub const NodeExit = struct { - expr: NodeExpr, -}; + pub fn asNode(self: NodeExpr) Node { + return Node{ .Expr = self }; + } -pub const NodeIntlit = struct { - intlit: Token, + pub fn children(self: NodeExpr, allocator: std.mem.Allocator) ![]Node { + var childrenArray = std.ArrayList(Node).init(allocator); + defer childrenArray.deinit(); + switch (self.kind) { + else => {}, + } + return try childrenArray.toOwnedSlice(); + } }; -pub const NodeIdent = struct { - ident: Token, +pub const NodeStmt = struct { + id: u32, + kind: StmtKind, + symtable: *symb.SymbolTable, + + pub fn asNode(self: NodeStmt) Node { + return Node{ .Stmt = self }; + } + + pub fn children(self: NodeStmt, allocator: std.mem.Allocator) ![]Node { + var childrenArray = std.ArrayList(Node).init(allocator); + defer childrenArray.deinit(); + switch (self.kind) { + .exit => |exit| try childrenArray.append(exit.expr.asNode()), + .defValue => |value| try childrenArray.append(value.expr.asNode()), + .defVar => |variable| try childrenArray.append(variable.expr.asNode()), + .assignVar => |assign| try childrenArray.append(assign.expr.asNode()), + else => {}, + } + return try childrenArray.toOwnedSlice(); + } }; pub const Parser = struct { + top: *symb.SymbolTable, + id: u32, tokens: Iterator(Token), allocator: std.mem.Allocator, nodes: std.ArrayList(NodeStmt), + nextId: u32 = 1, + + fn reserveId(self: *Parser) u32 { + defer self.nextId += 1; + return self.nextId; + } - pub fn init(allocator: std.mem.Allocator, tokens: []Token) Parser { + pub fn init(allocator: std.mem.Allocator, tokens: []Token, symbolTable: *symb.SymbolTable) Parser { return .{ + .top = symbolTable, .allocator = allocator, .tokens = Iterator(Token).init(tokens), .nodes = std.ArrayList(NodeStmt).init(allocator), + .id = 0, }; } @@ -64,76 +100,103 @@ pub const Parser = struct { } fn parseExpr(self: *Parser) !NodeExpr { - return switch (self.tokens.peek().?) { - .intLit => NodeExpr{ + const kind = try switch (self.tokens.peek().?) { + .intLit => ExprKind{ .intLit = NodeIntlit{ .intlit = (try self.tokens.consume(.intLit)).?, }, }, - .ident => NodeExpr{ + .ident => ExprKind{ .ident = NodeIdent{ .ident = (try self.tokens.consume(.ident)).?, }, }, else => ParsingError.InvalidExpression, }; + return NodeExpr{ + .id = self.reserveId(), + .kind = kind, + .isConst = kind.isConstant(), + .typ = null, + .symtable = self.top, + }; } fn parseStmt(self: *Parser) !NodeStmt { return switch (self.tokens.peek().?) { - .exit => NodeStmt{ .exit = try self.parseExit() }, - .constant => NodeStmt{ .value = try self.parseValue(true) }, - .variable => NodeStmt{ .value = try self.parseValue(false) }, - .ident => NodeStmt{ .assign = try self.parseAssign() }, + .exit => try self.parseExit(), + .constant => try self.parseConstant(), + .variable => try self.parseVariable(), + .ident => try self.parseAssign(), else => ParsingError.InvalidStatement, }; } - fn parseAssign(self: *Parser) !NodeAssign { - const ident = try self.tokens.consume(.ident); - var isMutable = false; - var exists = false; - for (self.nodes.items) |item| { - switch (item) { - .value => |v| { - if (std.mem.eql(u8, v.ident.ident, ident.?.ident)) { - isMutable = !v.isConst; - exists = true; - } - }, - else => {}, - } - } - if (!exists) return error.UnknownIdentifier; - if (!isMutable) return error.ImmutableValue; + fn parseAssign(self: *Parser) !NodeStmt { + const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); const expr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); - return NodeAssign{ - .ident = ident.?, - .value = expr, + const kind = StmtKind{ + .assignVar = NodeAssign{ + .ident = ident, + .expr = expr, + }, + }; + return NodeStmt{ + .id = self.reserveId(), + .kind = kind, + .symtable = self.top, }; } - fn parseExit(self: *Parser) !NodeExit { + fn parseExit(self: *Parser) !NodeStmt { _ = try self.tokens.consume(.exit); const expr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); - return NodeExit{ - .expr = expr, + const kind = StmtKind{ .exit = NodeExit{ .expr = expr } }; + return NodeStmt{ + .symtable = self.top, + .kind = kind, + .id = self.reserveId(), + }; + } + + fn parseVariable(self: *Parser) !NodeStmt { + _ = try self.tokens.consume(.variable); + const ident = (try self.tokens.consume(.ident)).?; + _ = try self.tokens.consume(.equal); + const expr = try self.parseExpr(); + _ = try self.tokens.consume(.semiCol); + const kind = StmtKind{ + .defVar = NodeVar{ + .ident = ident, + .expr = expr, + }, + }; + return NodeStmt{ + .id = self.reserveId(), + .kind = kind, + .symtable = self.top, }; } - fn parseValue(self: *Parser, isConst: bool) !NodeValue { - self.tokens.skip(); + fn parseConstant(self: *Parser) !NodeStmt { + _ = try self.tokens.consume(.constant); const ident = (try self.tokens.consume(.ident)).?; _ = try self.tokens.consume(.equal); const expr = try self.parseExpr(); _ = try self.tokens.consume(.semiCol); - return NodeValue{ - .ident = ident, - .value = expr, - .isConst = isConst, + const kind = StmtKind{ + .defValue = NodeValue{ + .ident = ident, + .expr = expr, + }, + }; + return NodeStmt{ + .id = self.reserveId(), + .kind = kind, + .symtable = self.top, }; } @@ -145,6 +208,52 @@ pub const Parser = struct { } }; +pub const NodeAssign = struct { + ident: Token, + expr: NodeExpr, +}; + +pub const NodeValue = struct { + ident: Token, + expr: NodeExpr, +}; + +pub const NodeVar = struct { + ident: Token, + expr: NodeExpr, +}; + +pub const NodeExit = struct { + expr: NodeExpr, +}; + +pub const NodeIntlit = struct { + intlit: Token, +}; + +pub const NodeIdent = struct { + ident: Token, +}; + +pub const StmtKind = union(enum) { + exit: NodeExit, + defValue: NodeValue, + defVar: NodeVar, + assignVar: NodeAssign, +}; + +pub const ExprKind = union(enum) { + intLit: NodeIntlit, + ident: NodeIdent, + + pub fn isConstant(self: ExprKind) bool { + return switch (self) { + .intLit => true, + .ident => false, + }; + } +}; + test "Parser" { const expect = std.testing.expect; const src = "exit 120;"; diff --git a/src/symtable.zig b/src/symtable.zig new file mode 100644 index 0000000..0ff8fc8 --- /dev/null +++ b/src/symtable.zig @@ -0,0 +1,149 @@ +const std = @import("std"); +const pars = @import("parser.zig"); + +const Scope = struct { + par: ?*Scope, + symbs: std.StringHashMap(Symbol), +}; + +const Symbol = union(enum) { + Type: SymbType, + Value: SymbValue, +}; + +pub const SymbType = union(enum) { + Void, + Integer, + String, +}; + +const SymbValue = struct { + typ: SymbType, + id: u32, + mut: bool, +}; + +pub const SymbolTable = struct { + scope: ?*Scope = null, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) !*SymbolTable { + const scope = try allocator.create(Scope); + scope.par = null; + scope.symbs = std.StringHashMap(Symbol).init(allocator); + const table = try allocator.create(SymbolTable); + table.* = SymbolTable{ + .scope = scope, + .allocator = allocator, + }; + return table; + } + + pub fn deinit(self: *SymbolTable) void { + if (self.scope) |scope| { + scope.symbs.deinit(); + self.allocator.destroy(scope); + } + } + + pub fn makeChild(self: *SymbolTable) SymbolTable { + const scope = try self.allocator.create(Scope); + scope.par = self; + scope.symbs = std.StringHashMap(Symbol).init(self.allocator); + return SymbolTable{ + .scope = scope, + .allocator = self.allocator, + }; + } + + pub fn parent(self: SymbolTable) ?*SymbolTable { + if (self.scope) |scope| + if (scope.par) |par| + return par; + return null; + } + + pub fn contains(self: SymbolTable, ident: []const u8) bool { + if (self.scope) |scope| return scope.symbs.contains(ident); + return false; + } + + pub fn get(self: SymbolTable, ident: []const u8) ?Symbol { + if (self.scope) |scope| return scope.symbs.get(ident); + return null; + } + + pub fn getValue(self: SymbolTable, ident: []const u8) ?SymbValue { + if (self.get(ident)) |symbol| + return switch (symbol) { + .Value => |value| value, + else => null, + }; + + return null; + } + + pub fn getMut(self: *SymbolTable, ident: []const u8) ?Symbol { + if (self.get(ident)) |symbol| + return switch (symbol) { + .Value => null, + else => symbol, + }; + return null; + } + + pub fn insert(self: *SymbolTable, ident: []const u8, symbol: Symbol) bool { + if (self.scope) |scope| { + if (scope.symbs.getEntry(ident)) return false; + scope.symbs.put(ident, symbol); + return true; + } + return false; + } +}; + +const Populator = struct { + id: u32, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) Populator { + return .{ + .id = 0, + .allocator = allocator, + }; + } + + fn populateSymtable(self: *Populator, node: *pars.Node) void { + switch (node) { + .Stmt => |stmt| { + const table: *SymbolTable = stmt.symtable; + switch (stmt.kind) { + .defVar => |variable| { + const symbol = self.buildValueSymb( + table, + if (variable.expr.typ) |typ| typ else .Integer, + true, + ); + table.insert(variable.ident, symbol); + }, + .defValue => |value| { + const symbol = self.buildValueSymb( + table, + if (value.expr.typ) |typ| typ else .Integer, + true, + ); + table.insert(value.ident, symbol); + }, + } + }, + else => { + for (node.children(self.allocator)) |child| + populateSymtable(&child); + }, + } + } + + fn buildValueSymb(self: *Populator, table: *SymbolTable, typ: SymbType, mutable: bool) Symbol { + const newTyp = table.getType(typ); + } +}; -- cgit v1.2.3