summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNic Gaffney <gaffney_nic@protonmail.com>2024-07-27 00:31:28 -0500
committerNic Gaffney <gaffney_nic@protonmail.com>2024-07-27 00:31:28 -0500
commit2605d1e8aa158e8fce80853cf064cc5e2e41e0a9 (patch)
tree081edf18e84d22306e7c04c28d594f1ccfac14dc /src
parent8f55e2fa7059ef215a1b8369300f0a85103b079b (diff)
downloadcalico-2605d1e8aa158e8fce80853cf064cc5e2e41e0a9.tar.gz
Completely removed ast code and restructured
Diffstat (limited to 'src')
-rw-r--r--src/ast.zig161
-rw-r--r--src/codegen.zig36
-rw-r--r--src/main.zig73
-rw-r--r--src/parser.zig59
-rw-r--r--src/tokenize.zig103
5 files changed, 178 insertions, 254 deletions
diff --git a/src/ast.zig b/src/ast.zig
deleted file mode 100644
index e8f3a8e..0000000
--- a/src/ast.zig
+++ /dev/null
@@ -1,161 +0,0 @@
-const std = @import("std");
-const tok = @import("tokenize.zig");
-
-const SyntaxError = error{SyntaxError};
-const expectedToken = error{ExpectedToken};
-
-pub const BinOp = enum(u32) {
- Add = 0b000,
- Sub = 0b001,
- Mul = 0b100,
- Div = 0b101,
- Mod = 0b110,
-
- fn init(typ: tok.TokenType) ?BinOp {
- return switch (typ) {
- .plus => .Add,
- .minus => .Sub,
- .star => .Mul,
- .slash => .Div,
- else => null,
- };
- }
-
- fn greater(self: BinOp, other: ?BinOp) bool {
- if (other == null) return true;
- return (@intFromEnum(self) >> 2) > (@intFromEnum(other.?) >> 2);
- }
-};
-
-pub const Literal = union(enum) {
- Int: i32,
-};
-
-pub const AstExpr = struct {
- kind: ExprKind,
- allocator: std.mem.Allocator,
- pub fn init(allocator: std.mem.Allocator) !*AstExpr {
- const value = try allocator.create(AstExpr);
- value.* = .{
- .kind = .Void,
- .allocator = allocator,
- };
- return value;
- }
- pub fn deinit(self: *AstExpr) void {
- self.allocator.free(self);
- }
-};
-
-pub const AstStmt = struct {
- kind: StmtKind,
- allocator: std.mem.Allocator,
- pub fn init(allocator: std.mem.Allocator) !*AstStmt {
- const value = try allocator.create(AstStmt);
- value.* = .{
- .kind = .Void,
- .allocator = allocator,
- };
- return value;
- }
- pub fn deinit(self: *AstStmt) void {
- self.allocator.free(self);
- }
-};
-
-pub const Ast = union(enum) {
- Expr: AstExpr,
- Stmt: AstStmt,
-};
-
-const StmtKind = union(enum) {
- exit: AstExpr,
- Void,
-};
-
-const ExprKind = union(enum) {
- Literal: Literal,
- BinaryOp: struct {
- op: BinOp,
- left: *AstExpr,
- right: *AstExpr,
- },
- Void,
-};
-
-const AstParser = struct {
- tokens: tok.Iterator(tok.Token),
- arena: std.heap.ArenaAllocator,
- allocator: std.mem.Allocator,
-
- fn init(allocator: *std.heap.ArenaAllocator, tokens: tok.Iterator(tok.Token)) AstParser {
- return AstParser{
- .tokens = tokens,
- .arena = allocator.*,
- .allocator = allocator.allocator(),
- };
- }
-
- fn deinit(self: *AstParser) void {
- self.arena.deinit();
- }
-
- fn parseStmt(self: *AstParser) !AstStmt {
- return switch (self.tokens.peek().?) {
- .ret => self.exitStmt(),
- else => error.SyntaxError,
- };
- }
-
- fn parseExpr(self: *AstParser, lastOp: ?BinOp) !*AstExpr {
- if (!tok.checkType(self.tokens.peek().?, tok.TokenType.intLit)) return error.ExpectedToken;
- const kind = ExprKind{ .Literal = .{ .Int = self.tokens.consume().?.intLit } };
- var lhs = try AstExpr.init(self.allocator);
- lhs.*.kind = kind;
- while (self.tokens.peek()) |tokn| {
- const op = BinOp.init(tokn);
- if (op != null and op.?.greater(lastOp)) {
- self.tokens.skip();
- const rhs = try self.parseExpr(op);
- const newkind = ExprKind{ .BinaryOp = .{
- .op = op.?,
- .left = lhs,
- .right = rhs,
- } };
- lhs = try AstExpr.init(self.allocator);
- lhs.*.kind = newkind;
- }
- return lhs;
- }
- return lhs;
- }
-
- fn exitStmt(self: *AstParser) !AstStmt {
- if (!tok.checkType(self.tokens.consume().?, tok.TokenType.ret))
- return error.ExpectedToken;
- const value = try self.parseExpr(null);
-
- if (!tok.checkType(self.tokens.consume().?, tok.TokenType.semiCol)) return error.ExpectedToken;
- const kind = StmtKind{ .exit = value.* };
- const stmt = try AstStmt.init(self.allocator);
- stmt.kind = kind;
- return stmt.*;
- }
-};
-
-test "AstParse" {
- std.testing.log_level = std.log.Level.info;
- const expect = std.testing.expect;
- const testSource: []const u8 = "exit 120 + 150;";
- var toks = tok.Tokenizer.init(std.testing.allocator, testSource);
- defer toks.deinit();
- var arrtoks = try toks.tokenize();
- const slice = try arrtoks.toOwnedSlice();
- const iter = tok.Iterator(tok.Token).init(slice);
- var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
- var parser = AstParser.init(&arena, iter);
- defer parser.deinit();
- const stmt = try parser.parseStmt();
- _ = stmt;
- _ = expect;
-}
diff --git a/src/codegen.zig b/src/codegen.zig
new file mode 100644
index 0000000..c8f414e
--- /dev/null
+++ b/src/codegen.zig
@@ -0,0 +1,36 @@
+const std = @import("std");
+const parse = @import("parser.zig");
+
+pub const Generator = struct {
+ root: parse.NodeExit,
+ allocator: std.mem.Allocator,
+ code: std.ArrayList(u8),
+
+ pub fn init(allocator: std.mem.Allocator, root: parse.NodeExit) Generator {
+ return .{
+ .root = root,
+ .allocator = allocator,
+ .code = std.ArrayList(u8).init(allocator),
+ };
+ }
+
+ fn genExit(self: *Generator) ![]const u8 {
+ return try std.fmt.allocPrint(self.allocator,
+ \\ mov rax, 60
+ \\ mov rdi, {}
+ \\ syscall
+ \\
+ , .{self.root.expr.intLit.intLit});
+ }
+
+ pub fn generate(self: *Generator) ![]const u8 {
+ try self.code.appendSlice(
+ \\global _start:
+ \\
+ );
+ const exitStmt = try self.genExit();
+ defer self.allocator.free(exitStmt);
+ try self.code.appendSlice(exitStmt);
+ return try self.code.toOwnedSlice();
+ }
+};
diff --git a/src/main.zig b/src/main.zig
index 2ca0b68..472f1ad 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,13 +1,19 @@
const std = @import("std");
const tok = @import("tokenize.zig");
-const ast = @import("ast.zig");
-
-const gftCompilerError = error{NoInputFile};
+const parse = @import("parser.zig");
+const gen = @import("codegen.zig");
pub fn main() !void {
- if (std.os.argv.len < 2) return gftCompilerError.NoInputFile;
+ if (std.os.argv.len < 2) {
+ std.debug.print(
+ \\info: Usage: calico [input file]
+ \\
+ , .{});
+ return;
+ }
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ var allocator = gpa.allocator();
defer _ = gpa.deinit();
var args = std.process.args();
@@ -24,57 +30,46 @@ pub fn main() !void {
if (err != error.PathAlreadyExists) return err;
// Setup native code writer
- const outFileName = try getFileName(gpa.allocator(), out_name, "asm");
- defer gpa.allocator().free(outFileName);
+ const outFileName = try getFileName(allocator, out_name, "asm");
+ defer allocator.free(outFileName);
const outfile = try std.fs.cwd().createFile(outFileName, .{});
const outWriter = outfile.writer();
defer outfile.close();
// Turn the input file into a string
- const all = try inputFile.readToEndAlloc(gpa.allocator(), 2048);
- defer gpa.allocator().free(all);
+ const all = try inputFile.readToEndAlloc(allocator, 2048);
+ defer allocator.free(all);
// Tokenize
- var tokenizer = tok.Tokenizer.init(gpa.allocator(), all);
+ var tokenizer = tok.Tokenizer.init(allocator, all);
defer tokenizer.deinit();
- var tokIter = tok.Iterator(tok.Token).init((try tokenizer.tokenize()).items);
+ const tokens = try tokenizer.tokenize();
- // Parse tokens
- try outWriter.print("global _start:\n", .{});
- while (tokIter.next()) |t| {
- switch (t) {
- .ret => {
- const num = tokIter.next();
- if (!tok.checkType(num.?, tok.TokenType.intLit)) return error.SyntaxError;
+ // Parse
+ var parser = parse.Parser.init(tokens);
+ const tree = try parser.parse();
- if (!tok.checkType(tokIter.next().?, tok.TokenType.semiCol)) return error.SyntaxError;
- try outWriter.print(
- \\ mov rax, 60
- \\ mov rdi, {}
- \\ syscall
- \\
- , .{num.?.intLit});
- },
- // No other commands
- else => {},
- }
- }
+ // Codegen
+ var generator = gen.Generator.init(allocator, tree);
+ const code = try generator.generate();
+ defer allocator.free(code);
+ try outWriter.writeAll(code);
// Run nasm and ld to build the executable
// TODO: switch to qbe or llvm (preferabbly qbe)
const nasmargv = [_][]const u8{ "nasm", "-felf64", outFileName };
- const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = gpa.allocator() });
- defer gpa.allocator().free(nasmproc.stdout);
- defer gpa.allocator().free(nasmproc.stderr);
+ const nasmproc = try std.process.Child.run(.{ .argv = &nasmargv, .allocator = allocator });
+ defer allocator.free(nasmproc.stdout);
+ defer allocator.free(nasmproc.stderr);
- const ldFile = try getFileName(gpa.allocator(), out_name, "o");
- defer gpa.allocator().free(ldFile);
- const binFile = try getFileName(gpa.allocator(), out_name, "");
- defer gpa.allocator().free(binFile);
+ const ldFile = try getFileName(allocator, out_name, "o");
+ defer allocator.free(ldFile);
+ const binFile = try getFileName(allocator, out_name, "");
+ defer allocator.free(binFile);
const ldargv = [_][]const u8{ "ld", "-o", binFile, ldFile };
- const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = gpa.allocator() });
- defer gpa.allocator().free(ldproc.stdout);
- defer gpa.allocator().free(ldproc.stderr);
+ const ldproc = try std.process.Child.run(.{ .argv = &ldargv, .allocator = allocator });
+ defer allocator.free(ldproc.stdout);
+ defer allocator.free(ldproc.stderr);
}
/// Get file extension based on filename
diff --git a/src/parser.zig b/src/parser.zig
new file mode 100644
index 0000000..422c9cb
--- /dev/null
+++ b/src/parser.zig
@@ -0,0 +1,59 @@
+const std = @import("std");
+const tok = @import("tokenize.zig");
+const Iterator = tok.Iterator;
+const Token = tok.Token;
+
+const ParsingError = error{ InvalidExpression, ExpectedExit, ExpectedSemicolon };
+
+pub const NodeExpr = struct {
+ intLit: Token,
+};
+
+pub const NodeExit = struct {
+ expr: NodeExpr,
+};
+
+pub const Parser = struct {
+ tokens: Iterator(Token),
+
+ pub fn init(tokens: []Token) Parser {
+ return .{
+ .tokens = Iterator(Token).init(tokens),
+ };
+ }
+
+ fn parseExpr(self: *Parser) !NodeExpr {
+ if (tok.checkType(self.tokens.peek().?, tok.TokenType.intLit))
+ return NodeExpr{ .intLit = self.tokens.consume().? };
+ return ParsingError.InvalidExpression;
+ }
+
+ pub fn parse(self: *Parser) !NodeExit {
+ var root: NodeExit = undefined;
+ while (self.tokens.peek()) |token| {
+ switch (token) {
+ .exit => {
+ self.tokens.skip();
+ root.expr = try self.parseExpr();
+ if (!tok.checkType(self.tokens.peek().?, tok.TokenType.semiCol))
+ return ParsingError.ExpectedSemicolon;
+ self.tokens.skip();
+ },
+ else => return ParsingError.ExpectedExit,
+ }
+ }
+ return root;
+ }
+};
+
+test "Parser" {
+ const expect = std.testing.expect;
+ const src = "exit 120;";
+ var tokenizer = tok.Tokenizer.init(std.testing.allocator, src);
+ defer tokenizer.deinit();
+ const toks = try tokenizer.tokenize();
+ var parser = Parser.init(toks);
+ const parseTree = try parser.parse();
+ const exp = NodeExit{ .expr = NodeExpr{ .intLit = Token{ .intLit = 120 } } };
+ try expect(std.meta.eql(parseTree, exp));
+}
diff --git a/src/tokenize.zig b/src/tokenize.zig
index 40a7d00..f28cdcf 100644
--- a/src/tokenize.zig
+++ b/src/tokenize.zig
@@ -1,28 +1,41 @@
const std = @import("std");
-const TokenError = error{UnknownToken};
+const TokenizeError = error{
+ UnknownToken,
+ UnexpectedEOF,
+};
+
pub const TokenType = enum {
ident,
intLit,
- ret,
+ exit,
plus,
minus,
star,
slash,
semiCol,
- nil,
};
pub const Token = union(TokenType) {
ident: []const u8,
intLit: i32,
- ret,
+ exit,
plus,
minus,
star,
slash,
semiCol,
- nil,
+
+ pub fn fromChar(char: u8) !Token {
+ return switch (char) {
+ '+' => .plus,
+ '-' => .minus,
+ '*' => .star,
+ '/' => .slash,
+ ';' => .semiCol,
+ else => TokenizeError.UnknownToken,
+ };
+ }
};
pub fn checkType(tok: Token, comptime typ: TokenType) bool {
@@ -44,15 +57,20 @@ pub fn Iterator(comptime typ: type) type {
}
/// Get current item
+ pub fn peekAhead(self: Iterator(typ), ahead: u32) ?typ {
+ if (self.index + ahead >= self.items.len) return null;
+ return self.items[self.index + ahead];
+ }
+
pub fn peek(self: Iterator(typ)) ?typ {
- if (self.index >= self.items.len) return null;
- return self.items[self.index];
+ return peekAhead(self, 0);
}
/// Get current item and iterate index
pub fn consume(self: *Iterator(typ)) ?typ {
- defer self.index += 1;
- return self.peek();
+ const ret = self.peek();
+ self.index += 1;
+ return ret;
}
/// Get current item and iterate index
pub const next = consume;
@@ -86,67 +104,44 @@ pub const Tokenizer = struct {
}
/// Returns an ArrayList of tokens
- pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {
- var str = std.ArrayList(u8).init(self.allocator);
- defer str.deinit();
+ pub fn tokenize(self: *Tokenizer) ![]Token {
+ var buff = std.ArrayList(u8).init(self.allocator);
+ defer buff.deinit();
while (self.src.peek()) |char| {
- switch (char) {
+ try switch (char) {
' ', '\n', '\t' => self.src.skip(),
'0'...'9' => {
while (std.ascii.isDigit(self.src.peek().?))
- try str.append(self.src.consume().?);
+ try buff.append(self.src.consume().?);
- const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
+ const num: i32 = try std.fmt.parseInt(i32, buff.items, 10);
try self.toks.append(.{ .intLit = num });
- str.deinit();
- str = std.ArrayList(u8).init(self.allocator);
+ buff.clearAndFree();
},
'a'...'z', 'A'...'Z' => {
while (std.ascii.isAlphanumeric(self.src.peek().?))
- try str.append(self.src.consume().?);
-
- if (std.mem.eql(u8, "exit", str.items))
- try self.toks.append(.ret);
- str.deinit();
- str = std.ArrayList(u8).init(self.allocator);
- },
- ';' => {
- self.src.skip();
- try self.toks.append(.semiCol);
- },
- '+' => {
- self.src.skip();
- try self.toks.append(.plus);
- },
- '-' => {
- self.src.skip();
- try self.toks.append(.minus);
- },
- '*' => {
- self.src.skip();
- try self.toks.append(.star);
- },
- '/' => {
- self.src.skip();
- try self.toks.append(.slash);
+ try buff.append(self.src.consume().?);
+ if (std.mem.eql(u8, "exit", buff.items)) {
+ try self.toks.append(.exit);
+ } else return TokenizeError.UnknownToken;
+ buff.clearAndFree();
},
- else => {},
- }
+ else => self.toks.append(try Token.fromChar(self.src.consume().?)),
+ };
}
- return self.toks;
+ return self.toks.items;
}
};
test "Tokenize" {
- std.testing.log_level = std.log.Level.info;
const expect = std.testing.expect;
const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;";
- var toks = Tokenizer.init(std.testing.allocator, testSource);
- defer toks.deinit();
- const arrtoks = try toks.tokenize();
+ var tokenizer = Tokenizer.init(std.testing.allocator, testSource);
+ defer tokenizer.deinit();
+ const tokens = try tokenizer.tokenize();
const expected = &[_]Token{
- .ret,
+ .exit,
.{ .intLit = 120 },
.plus,
.{ .intLit = 150 },
@@ -158,16 +153,16 @@ test "Tokenize" {
.{ .intLit = 5 },
.semiCol,
};
- for (arrtoks.items, expected) |act, exp| {
+ for (tokens, expected) |act, exp| {
switch (act) {
- .ret => |v| try expect(v == exp.ret),
+ .exit => |v| try expect(v == exp.exit),
.intLit => |v| try expect(v == exp.intLit),
.semiCol => |v| try expect(v == exp.semiCol),
.plus => |v| try expect(v == exp.plus),
.minus => |v| try expect(v == exp.minus),
.star => |v| try expect(v == exp.star),
.slash => |v| try expect(v == exp.slash),
- else => {},
+ else => try expect(1 == 0),
}
}
}