diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.zig | 161 | ||||
| -rw-r--r-- | src/codegen.zig | 36 | ||||
| -rw-r--r-- | src/main.zig | 73 | ||||
| -rw-r--r-- | src/parser.zig | 59 | ||||
| -rw-r--r-- | src/tokenize.zig | 103 |
5 files changed, 178 insertions, 254 deletions
diff --git a/src/ast.zig b/src/ast.zig deleted file mode 100644 index e8f3a8e..0000000 --- a/src/ast.zig +++ /dev/null @@ -1,161 +0,0 @@ -const std = @import("std"); -const tok = @import("tokenize.zig"); - -const SyntaxError = error{SyntaxError}; -const expectedToken = error{ExpectedToken}; - -pub const BinOp = enum(u32) { - Add = 0b000, - Sub = 0b001, - Mul = 0b100, - Div = 0b101, - Mod = 0b110, - - fn init(typ: tok.TokenType) ?BinOp { - return switch (typ) { - .plus => .Add, - .minus => .Sub, - .star => .Mul, - .slash => .Div, - else => null, - }; - } - - fn greater(self: BinOp, other: ?BinOp) bool { - if (other == null) return true; - return (@intFromEnum(self) >> 2) > (@intFromEnum(other.?) >> 2); - } -}; - -pub const Literal = union(enum) { - Int: i32, -}; - -pub const AstExpr = struct { - kind: ExprKind, - allocator: std.mem.Allocator, - pub fn init(allocator: std.mem.Allocator) !*AstExpr { - const value = try allocator.create(AstExpr); - value.* = .{ - .kind = .Void, - .allocator = allocator, - }; - return value; - } - pub fn deinit(self: *AstExpr) void { - self.allocator.free(self); - } -}; - -pub const AstStmt = struct { - kind: StmtKind, - allocator: std.mem.Allocator, - pub fn init(allocator: std.mem.Allocator) !*AstStmt { - const value = try allocator.create(AstStmt); - value.* = .{ - .kind = .Void, - .allocator = allocator, - }; - return value; - } - pub fn deinit(self: *AstStmt) void { - self.allocator.free(self); - } -}; - -pub const Ast = union(enum) { - Expr: AstExpr, - Stmt: AstStmt, -}; - -const StmtKind = union(enum) { - exit: AstExpr, - Void, -}; - -const ExprKind = union(enum) { - Literal: Literal, - BinaryOp: struct { - op: BinOp, - left: *AstExpr, - right: *AstExpr, - }, - Void, -}; - -const AstParser = struct { - tokens: tok.Iterator(tok.Token), - arena: std.heap.ArenaAllocator, - allocator: std.mem.Allocator, - - fn init(allocator: *std.heap.ArenaAllocator, tokens: tok.Iterator(tok.Token)) AstParser { - return AstParser{ - .tokens = tokens, - .arena = allocator.*, - .allocator = allocator.allocator(), - }; - } - - fn deinit(self: *AstParser) void { - self.arena.deinit(); - } - - fn parseStmt(self: *AstParser) !AstStmt { - return switch (self.tokens.peek().?) { - .ret => self.exitStmt(), - else => error.SyntaxError, - }; - } - - fn parseExpr(self: *AstParser, lastOp: ?BinOp) !*AstExpr { - if (!tok.checkType(self.tokens.peek().?, tok.TokenType.intLit)) return error.ExpectedToken; - const kind = ExprKind{ .Literal = .{ .Int = self.tokens.consume().?.intLit } }; - var lhs = try AstExpr.init(self.allocator); - lhs.*.kind = kind; - while (self.tokens.peek()) |tokn| { - const op = BinOp.init(tokn); - if (op != null and op.?.greater(lastOp)) { - self.tokens.skip(); - const rhs = try self.parseExpr(op); - const newkind = ExprKind{ .BinaryOp = .{ - .op = op.?, - .left = lhs, - .right = rhs, - } }; - lhs = try AstExpr.init(self.allocator); - lhs.*.kind = newkind; - } - return lhs; - } - return lhs; - } - - fn exitStmt(self: *AstParser) !AstStmt { - if (!tok.checkType(self.tokens.consume().?, tok.TokenType.ret)) - return error.ExpectedToken; - const value = try self.parseExpr(null); - - if (!tok.checkType(self.tokens.consume().?, tok.TokenType.semiCol)) return error.ExpectedToken; - const kind = StmtKind{ .exit = value.* }; - const stmt = try AstStmt.init(self.allocator); - stmt.kind = kind; - return stmt.*; - } -}; - -test "AstParse" { - std.testing.log_level = std.log.Level.info; - const expect = std.testing.expect; - const testSource: []const u8 = "exit 120 + 150;"; - var toks = tok.Tokenizer.init(std.testing.allocator, testSource); - defer toks.deinit(); - var arrtoks = try toks.tokenize(); - const slice = try arrtoks.toOwnedSlice(); - const iter = tok.Iterator(tok.Token).init(slice); - var arena = std.heap.ArenaAllocator.init(std.testing.allocator); - var parser = AstParser.init(&arena, iter); - defer parser.deinit(); - const stmt = try parser.parseStmt(); - _ = stmt; - _ = expect; -} diff --git a/src/codegen.zig b/src/codegen.zig new file mode 100644 index 0000000..c8f414e --- /dev/null +++ b/src/codegen.zig @@ -0,0 +1,36 @@ +const std = @import("std"); +const parse = @import("parser.zig"); + +pub const Generator = struct { + root: parse.NodeExit, + allocator: std.mem.Allocator, + code: std.ArrayList(u8), + + pub fn init(allocator: std.mem.Allocator, root: parse.NodeExit) Generator { + return .{ + .root = root, + .allocator = allocator, + .code = std.ArrayList(u8).init(allocator), + }; + } + + fn genExit(self: *Generator) ![]const u8 { + return try std.fmt.allocPrint(self.allocator, + \\ mov rax, 60 + \\ mov rdi, {} + \\ syscall + \\ + , .{self.root.expr.intLit.intLit}); + } + + pub fn generate(self: *Generator) ![]const u8 { + try self.code.appendSlice( + \\global _start: + \\ + ); + const exitStmt = try self.genExit(); + defer self.allocator.free(exitStmt); + try self.code.appendSlice(exitStmt); + return try self.code.toOwnedSlice(); + } +}; diff --git a/src/main.zig b/src/main.zig index 2ca0b68..472f1ad 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,13 +1,19 @@ const std = @import("std"); const tok = @import("tokenize.zig"); -const ast = @import("ast.zig"); - -const gftCompilerError = error{NoInputFile}; +const parse = @import("parser.zig"); +const gen = @import("codegen.zig"); pub fn main() !void { - if (std.os.argv.len < 2) return gftCompilerError.NoInputFile; + if (std.os.argv.len < 2) { + std.debug.print( + \\info: Usage: calico [input file] + \\ + , .{}); + return; + } var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + var allocator = gpa.allocator(); defer _ = gpa.deinit(); var args = std.process.args(); @@ -24,57 +30,46 @@ pub fn main() !void { if (err != error.PathAlreadyExists) return err; // Setup native code writer - const outFileName = try getFileName(gpa.allocator(), out_name, "asm"); - defer gpa.allocator().free(outFileName); + const outFileName = try getFileName(allocator, out_name, "asm"); + defer allocator.free(outFileName); const outfile = try std.fs.cwd().createFile(outFileName, .{}); const outWriter = outfile.writer(); defer outfile.close(); // Turn the input file into a string - const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048); - defer gpa.allocator().free(all); + const all = try inputFile.readToEndAlloc(allocator, 2048); + defer allocator.free(all); // Tokenize - var tokenizer = tok.Tokenizer.init(gpa.allocator(), all); + var tokenizer = tok.Tokenizer.init(allocator, all); defer tokenizer.deinit(); - var tokIter = tok.Iterator(tok.Token).init((try tokenizer.tokenize()).items); + const tokens = try tokenizer.tokenize(); - // Parse tokens - try outWriter.print("global _start:\n", .{}); - while (tokIter.next()) |t| { - switch (t) { - .ret => { - const num = tokIter.next(); - if (!tok.checkType(num.?, tok.TokenType.intLit)) return error.SyntaxError; + // Parse + var parser = parse.Parser.init(tokens); + const tree = try parser.parse(); - if (!tok.checkType(tokIter.next().?, tok.TokenType.semiCol)) return error.SyntaxError; - try outWriter.print( - \\ mov rax, 60 - \\ mov rdi, {} - \\ syscall - \\ - , .{num.?.intLit}); - }, - // No other commands - else => {}, - } - } + // Codegen + var generator = gen.Generator.init(allocator, tree); + const code = try generator.generate(); + defer allocator.free(code); + try outWriter.writeAll(code); // Run nasm and ld to build the executable // TODO: switch to qbe or llvm (preferabbly qbe) const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName }; - const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() }); - defer gpa.allocator().free(nasmproc.stdout); - defer gpa.allocator().free(nasmproc.stderr); + const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = allocator }); + defer allocator.free(nasmproc.stdout); + defer allocator.free(nasmproc.stderr); - const ldFile = try getFileName(gpa.allocator(), out_name, "o"); - defer gpa.allocator().free(ldFile); - const binFile = try getFileName(gpa.allocator(), out_name, ""); - defer gpa.allocator().free(binFile); + const ldFile = try getFileName(allocator, out_name, "o"); + defer allocator.free(ldFile); + const binFile = try getFileName(allocator, out_name, ""); + defer allocator.free(binFile); const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile }; - const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = gpa.allocator() }); - defer gpa.allocator().free(ldproc.stdout); - defer gpa.allocator().free(ldproc.stderr); + const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = allocator }); + defer allocator.free(ldproc.stdout); + defer allocator.free(ldproc.stderr); } /// Get file extension based on filename diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..422c9cb --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,59 @@ +const std = @import("std"); +const tok = @import("tokenize.zig"); +const Iterator = tok.Iterator; +const Token = tok.Token; + +const ParsingError = error{ InvalidExpression, ExpectedExit, ExpectedSemicolon }; + +pub const NodeExpr = struct { + intLit: Token, +}; + +pub const NodeExit = struct { + expr: NodeExpr, +}; + +pub const Parser = struct { + tokens: Iterator(Token), + + pub fn init(tokens: []Token) Parser { + return .{ + .tokens = Iterator(Token).init(tokens), + }; + } + + fn parseExpr(self: *Parser) !NodeExpr { + if (tok.checkType(self.tokens.peek().?, tok.TokenType.intLit)) + return NodeExpr{ .intLit = self.tokens.consume().? }; + return ParsingError.InvalidExpression; + } + + pub fn parse(self: *Parser) !NodeExit { + var root: NodeExit = undefined; + while (self.tokens.peek()) |token| { + switch (token) { + .exit => { + self.tokens.skip(); + root.expr = try self.parseExpr(); + if (!tok.checkType(self.tokens.peek().?, tok.TokenType.semiCol)) + return ParsingError.ExpectedSemicolon; + self.tokens.skip(); + }, + else => return ParsingError.ExpectedExit, + } + } + return root; + } +}; + +test "Parser" { + const expect = std.testing.expect; + const src = "exit 120;"; + var tokenizer = tok.Tokenizer.init(std.testing.allocator, src); + defer tokenizer.deinit(); + const toks = try tokenizer.tokenize(); + var parser = Parser.init(toks); + const parseTree = try parser.parse(); + const exp = NodeExit{ .expr = NodeExpr{ .intLit = Token{ .intLit = 120 } } }; + try expect(std.meta.eql(parseTree, exp)); +} diff --git a/src/tokenize.zig b/src/tokenize.zig index 40a7d00..f28cdcf 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -1,28 +1,41 @@ const std = @import("std"); -const TokenError = error{UnknownToken}; +const TokenizeError = error{ + UnknownToken, + UnexpectedEOF, +}; + pub const TokenType = enum { ident, intLit, - ret, + exit, plus, minus, star, slash, semiCol, - nil, }; pub const Token = union(TokenType) { ident: []const u8, intLit: i32, - ret, + exit, plus, minus, star, slash, semiCol, - nil, + + pub fn fromChar(char: u8) !Token { + return switch (char) { + '+' => .plus, + '-' => .minus, + '*' => .star, + '/' => .slash, + ';' => .semiCol, + else => TokenizeError.UnknownToken, + }; + } }; pub fn checkType(tok: Token, comptime typ: TokenType) bool { @@ -44,15 +57,20 @@ pub fn Iterator(comptime typ: type) type { } /// Get current item + pub fn peekAhead(self: Iterator(typ), ahead: u32) ?typ { + if (self.index + ahead >= self.items.len) return null; + return self.items[self.index + ahead]; + } + pub fn peek(self: Iterator(typ)) ?typ { - if (self.index >= self.items.len) return null; - return self.items[self.index]; + return peekAhead(self, 0); } /// Get current item and iterate index pub fn consume(self: *Iterator(typ)) ?typ { - defer self.index += 1; - return self.peek(); + const ret = self.peek(); + self.index += 1; + return ret; } /// Get current item and iterate index pub const next = consume; @@ -86,67 +104,44 @@ pub const Tokenizer = struct { } /// Returns an ArrayList of tokens - pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) { - var str = std.ArrayList(u8).init(self.allocator); - defer str.deinit(); + pub fn tokenize(self: *Tokenizer) ![]Token { + var buff = std.ArrayList(u8).init(self.allocator); + defer buff.deinit(); while (self.src.peek()) |char| { - switch (char) { + try switch (char) { ' ', '\n', '\t' => self.src.skip(), '0'...'9' => { while (std.ascii.isDigit(self.src.peek().?)) - try str.append(self.src.consume().?); + try buff.append(self.src.consume().?); - const num: i32 = try std.fmt.parseInt(i32, str.items, 10); + const num: i32 = try std.fmt.parseInt(i32, buff.items, 10); try self.toks.append(.{ .intLit = num }); - str.deinit(); - str = std.ArrayList(u8).init(self.allocator); + buff.clearAndFree(); }, 'a'...'z', 'A'...'Z' => { while (std.ascii.isAlphanumeric(self.src.peek().?)) - try str.append(self.src.consume().?); - - if (std.mem.eql(u8, "exit", str.items)) - try self.toks.append(.ret); - str.deinit(); - str = std.ArrayList(u8).init(self.allocator); - }, - ';' => { - self.src.skip(); - try self.toks.append(.semiCol); - }, - '+' => { - self.src.skip(); - try self.toks.append(.plus); - }, - '-' => { - self.src.skip(); - try self.toks.append(.minus); - }, - '*' => { - self.src.skip(); - try self.toks.append(.star); - }, - '/' => { - self.src.skip(); - try self.toks.append(.slash); + try buff.append(self.src.consume().?); + if (std.mem.eql(u8, "exit", buff.items)) { + try self.toks.append(.exit); + } else return TokenizeError.UnknownToken; + buff.clearAndFree(); }, - else => {}, - } + else => self.toks.append(try Token.fromChar(self.src.consume().?)), + }; } - return self.toks; + return self.toks.items; } }; test "Tokenize" { - std.testing.log_level = std.log.Level.info; const expect = std.testing.expect; const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;"; - var toks = Tokenizer.init(std.testing.allocator, testSource); - defer toks.deinit(); - const arrtoks = try toks.tokenize(); + var tokenizer = Tokenizer.init(std.testing.allocator, testSource); + defer tokenizer.deinit(); + const tokens = try tokenizer.tokenize(); const expected = &[_]Token{ - .ret, + .exit, .{ .intLit = 120 }, .plus, .{ .intLit = 150 }, @@ -158,16 +153,16 @@ test "Tokenize" { .{ .intLit = 5 }, .semiCol, }; - for (arrtoks.items, expected) |act, exp| { + for (tokens, expected) |act, exp| { switch (act) { - .ret => |v| try expect(v == exp.ret), + .exit => |v| try expect(v == exp.exit), .intLit => |v| try expect(v == exp.intLit), .semiCol => |v| try expect(v == exp.semiCol), .plus => |v| try expect(v == exp.plus), .minus => |v| try expect(v == exp.minus), .star => |v| try expect(v == exp.star), .slash => |v| try expect(v == exp.slash), - else => {}, + else => try expect(1 == 0), } } } |
