diff options
| author | Nic Gaffney <gaffney_nic@protonmail.com> | 2026-01-25 20:14:54 -0600 |
|---|---|---|
| committer | Nic Gaffney <gaffney_nic@protonmail.com> | 2026-01-25 20:14:54 -0600 |
| commit | 117be1e9f9c2e353694b40e0eb686b2621317063 (patch) | |
| tree | 8381a11350285d682b1412d511721aeadd5135b9 /src | |
| download | gren-117be1e9f9c2e353694b40e0eb686b2621317063.tar.gz | |
Diffstat (limited to 'src')
| -rw-r--r-- | src/lexer.zig | 1 | ||||
| -rw-r--r-- | src/main.zig | 40 | ||||
| -rw-r--r-- | src/tokenizer.zig | 290 |
3 files changed, 331 insertions, 0 deletions
diff --git a/src/lexer.zig b/src/lexer.zig new file mode 100644 index 0000000..95a0b68 --- /dev/null +++ b/src/lexer.zig @@ -0,0 +1 @@ +const std = @import("std"); diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..6095e81 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,40 @@ +const std = @import("std"); +const tok = @import("tokenizer.zig"); + +pub fn main() !void { + const inneralloc = std.heap.smp_allocator; + var arena = std.heap.ArenaAllocator.init(inneralloc); + defer arena.deinit(); + const alloc = arena.allocator(); + + var outbuff: [64]u8 = undefined; + var inbuff: [64]u8 = undefined; + var stdout_writer = std.fs.File.stdout().writer(&outbuff); + var stdin_reader = std.fs.File.stdin().reader(&inbuff); + const stdout = &stdout_writer.interface; + const stdin = &stdin_reader.interface; + + var sb = try std.ArrayList(u8).initCapacity(alloc, 1024); + defer sb.deinit(alloc); + + var line = std.io.Writer.Allocating.init(alloc); + defer line.deinit(); + + while (true) { + _ = stdin.streamDelimiter(&line.writer, '\n') catch |err| { + if (err == error.EndOfStream) break else return err; + }; + try sb.appendSlice(alloc, line.written()); + try sb.appendSlice(alloc, try stdin.take(1)); + line.clearRetainingCapacity(); + } + if (line.written().len > 0) try sb.appendSlice(alloc, line.written()); + + std.debug.print("INPUT\n==========\n{s}\n==========\n", .{sb.items}); + + const tokens = try tok.tokenize(alloc, sb.items); + std.debug.print("{any}\n", .{tokens}); + for (tokens) |t| + try stdout.print("{s}\n", .{ try t.print(alloc) }); + try stdout.flush(); +} diff --git a/src/tokenizer.zig b/src/tokenizer.zig new file mode 100644 index 0000000..dc700c8 --- /dev/null +++ b/src/tokenizer.zig @@ -0,0 +1,290 @@ +const std = @import("std"); + +pub const Token_enum = enum { + RARROW, // -> + LARROW, // <- + BACKTICK, // ` + PERIOD, // . + COMMA, // , + QMARK, // ? + LPAREN, // ( + RPAREN, // ) + STRING, // "..." + BUILTIN, // !word + FUNC, // all chars + TYP, // Capital Letters + INT, // numbers +}; + +pub const Token = union(Token_enum) { + RARROW, // -> + LARROW, // <- + BACKTICK, // ` + PERIOD, // . + COMMA, // , + QMARK, // ? + LPAREN, // ( + RPAREN, // ) + STRING: []const u8, // "..." + BUILTIN: []const u8, // !word + FUNC: []const u8, // lowercase letters + TYP: []const u8, // Capital Letters + INT: i64, // numbers + + pub fn print(self: Token,alloc: std.mem.Allocator) ![]const u8 { + return switch (self) { + .RARROW => "->", + .LARROW => "<-", + .BACKTICK => "`", + .PERIOD => ".", + .COMMA => ",", + .QMARK => "?", + .LPAREN => "(", + .RPAREN => ")", + .STRING => |v| v, + .BUILTIN => |v| v, + .FUNC => |v| v, + .TYP => |v| v, + .INT => |v| try std.fmt.allocPrint(alloc, "{d}",.{v}), + }; + } +}; + +/// Creates a tokenizer over a slice of typ +pub fn Iterator(comptime typ: type) type { + return struct { + items: []const typ, + index: usize = 0, + + const SelfType = Iterator(typ); + const Error = error{ + OutOfBounds, + ExpectedItem, + EndOfItems, + }; + + /// Initialize tokenizer with a slice + pub fn init(items: []const typ) SelfType { + return Iterator(typ){ .items = items }; + } + + /// Get current item + pub fn peekAhead(self: *SelfType, ahead: u32) ?typ { + if (self.index + ahead >= self.items.len) return null; + return self.items[self.index + ahead]; + } + + pub fn peek(self: *SelfType) ?typ { + return self.peekAhead(0); + } + + /// Get current item and iterate index + pub fn next(self: *SelfType) ?typ { + const ret = self.peek(); + self.skip(); + return ret; + } + + pub fn consume(self: *SelfType, expected: typ) !?typ { + if (!std.meta.eql(self.peek().?, expected)) return Error.ExpectedItem; + return self.next(); + } + + pub fn consumeuntil(self: *SelfType, alloc: std.mem.Allocator, delims: []const typ) !?[]typ { + var arr = try std.ArrayList(typ).initCapacity(alloc, 128); + while (self.peek()) |item| { + for (delims) |d| + if (std.meta.eql(item, d)) break; + self.skip(); + try arr.append(alloc, item); + } + return try arr.toOwnedSlice(alloc); + } + + pub fn consumeuntilescape(self: *SelfType, alloc: std.mem.Allocator, delims: []const typ, escape: typ) !?[]typ { var arr = try std.ArrayList(typ).initCapacity(alloc, 128); + var previous: typ = undefined; + while (self.peek()) |item| { + for (delims) |d| + if (std.meta.eql(item, d) and !std.meta.eql(previous, escape)) break; + self.skip(); + try arr.append(alloc, item); + previous = item; + } + return try arr.toOwnedSlice(alloc); + } + + pub fn consumewhile(self: *SelfType, alloc: std.mem.Allocator, allowed: []const typ) !?[]typ { + var arr = try std.ArrayList(typ).initCapacity(alloc, 128); + while (self.peek()) |item| { + for (allowed) |d| + if (!std.meta.eql(item, d)) break; + self.skip(); + try arr.append(alloc, item); + } + return try arr.toOwnedSlice(alloc); + } + + + pub fn maybe(self: *SelfType, expected: typ) ?typ { + return self.consume(expected) catch null; + } + + /// Skip over current item + pub fn skip(self: *Iterator(typ)) void { + self.index += 1; + } + }; +} + +pub fn tokenize(allocator: std.mem.Allocator, input: []const u8) ![]Token { + var toks = std.ArrayList(Token){}; + var buff = std.ArrayList(u8){}; + defer buff.deinit(allocator); + var src = Iterator(u8).init(input); + + const internals = struct { + fn clearbuff(alloc: std.mem.Allocator, tok: *std.ArrayList(Token), buf: *std.ArrayList(u8)) !void { + if (buf.items.len == 0) return; + const str = try buf.toOwnedSlice(alloc); + try tok.append(alloc, .{ .FUNC = str }); + buf.clearAndFree(alloc); + } + }; + + while (src.peek()) |char| { + switch (char) { + '`' => { + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .BACKTICK); + }, + ',' => { + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .COMMA); + }, + '.' => { + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .PERIOD); + }, + '(' => { + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .LPAREN); + }, + ')' => { + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .RPAREN); + }, + '-' => { + src.skip(); + if (src.peek().? != '>') { + try buff.append(allocator, '-' ); + continue; + } + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .RARROW); + }, + '<' => { + src.skip(); + if (src.peek().? != '-') { + try buff.append(allocator, '<' ); + continue; + } + src.skip(); + try internals.clearbuff(allocator, &toks, &buff); + try toks.append(allocator, .LARROW); + }, + '0'...'9' => { + while (std.ascii.isDigit(src.peek().?)) + try buff.append(allocator, src.next().?); + + const num: i32 = try std.fmt.parseInt(i32, buff.items, 10); + try toks.append(allocator, .{ .INT = num }); + buff.clearAndFree(allocator); + }, + 'A'...'Z' => { + while (std.ascii.isAlphabetic(src.peek().?)) + try buff.append(allocator, src.next().?); + const str = try buff.toOwnedSlice(allocator); + try toks.append(allocator, .{ .TYP = str }); + buff.clearAndFree(allocator); + }, + '!' => { + src.skip(); + while (std.ascii.isAlphanumeric(src.peek().?)) + try buff.append(allocator, src.next().?); + const str = try buff.toOwnedSlice(allocator); + try toks.append(allocator, .{ .BUILTIN = str }); + buff.clearAndFree(allocator); + }, + '"' => { + _ = src.next(); + while (src.peek().? != '"') + try buff.append(allocator, src.next().?); + + _ = src.next(); + const token = Token{ .STRING = try buff.toOwnedSlice(allocator) }; + try toks.append(allocator, token); + buff.clearAndFree(allocator); + }, + ' ', '\t', '\n' => { + src.skip(); + if (buff.items.len == 0) continue; + try internals.clearbuff(allocator, &toks, &buff); + }, + else => try buff.append(allocator, src.next().?), + } + } + return toks.toOwnedSlice(allocator); +} + +// pub fn tokenize(allocator: std.mem.Allocator, input: []const u8) ![]Token { +// var arr = try std.ArrayList(Token).initCapacity(allocator, 1024); +// defer arr.deinit(allocator); +// var iterator = Iterator(u8).init(input); +// return parse: switch (iterator.next().?) { +// '-' => { +// if (iterator.maybe('>')) |_| try arr.append(allocator, .RARROW); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '<' => { +// if (iterator.maybe('-')) |_| try arr.append(allocator, .LARROW); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '.' => { +// try arr.append(allocator, .PERIOD); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// ',' => { +// try arr.append(allocator, .COMMA); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '`' => { +// try arr.append(allocator, .QMARK); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '?' => { +// try arr.append(allocator, .QMARK); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '!' => { +// const name = try iterator.consumeuntil(allocator, &std.ascii.whitespace); +// try arr.append(allocator, Token{.BUILTIN = name.?}); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '"' => { +// const name = try iterator.consumeuntilescape(allocator, "\"", '\\'); +// try arr.append(allocator, Token{.STRING = name.?}); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// 'A'...'Z' => { +// const name = try iterator.consumeuntil(allocator, &std.ascii.whitespace); +// try arr.append(allocator, Token{.TYP = name.?}); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// '0'...'9' => { +// const name = try iterator.consumewhile(allocator, "0123456789"); +// try arr.append(allocator, Token{.FUNC = name.?}); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// else => { +// const name = try iterator.consumeuntil(allocator, @constCast(&std.ascii.whitespace)); +// try arr.append(allocator, Token{.FUNC = name.?}); +// if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, +// }; + +// } |
