const std = @import("std"); pub const Token_enum = enum { RARROW, // -> LARROW, // <- BACKTICK, // ` PERIOD, // . COMMA, // , QMARK, // ? LPAREN, // ( RPAREN, // ) STRING, // "..." BUILTIN, // !word FUNC, // all chars TYP, // Capital Letters INT, // numbers }; pub const Token = union(Token_enum) { RARROW, // -> LARROW, // <- BACKTICK, // ` PERIOD, // . COMMA, // , QMARK, // ? LPAREN, // ( RPAREN, // ) STRING: []const u8, // "..." BUILTIN: []const u8, // !word FUNC: []const u8, // lowercase letters TYP: []const u8, // Capital Letters INT: i64, // numbers pub fn print(self: Token,alloc: std.mem.Allocator) ![]const u8 { return switch (self) { .RARROW => "->", .LARROW => "<-", .BACKTICK => "`", .PERIOD => ".", .COMMA => ",", .QMARK => "?", .LPAREN => "(", .RPAREN => ")", .STRING => |v| v, .BUILTIN => |v| v, .FUNC => |v| v, .TYP => |v| v, .INT => |v| try std.fmt.allocPrint(alloc, "{d}",.{v}), }; } }; /// Creates a tokenizer over a slice of typ pub fn Iterator(comptime typ: type) type { return struct { items: []const typ, index: usize = 0, const SelfType = Iterator(typ); const Error = error{ OutOfBounds, ExpectedItem, EndOfItems, }; /// Initialize tokenizer with a slice pub fn init(items: []const typ) SelfType { return Iterator(typ){ .items = items }; } /// Get current item pub fn peekAhead(self: *SelfType, ahead: u32) ?typ { if (self.index + ahead >= self.items.len) return null; return self.items[self.index + ahead]; } pub fn peek(self: *SelfType) ?typ { return self.peekAhead(0); } /// Get current item and iterate index pub fn next(self: *SelfType) ?typ { const ret = self.peek(); self.skip(); return ret; } pub fn consume(self: *SelfType, expected: typ) !?typ { if (!std.meta.eql(self.peek().?, expected)) return Error.ExpectedItem; return self.next(); } pub fn consumeuntil(self: *SelfType, alloc: std.mem.Allocator, delims: []const typ) !?[]typ { var arr = try std.ArrayList(typ).initCapacity(alloc, 128); while (self.peek()) |item| { for (delims) |d| if (std.meta.eql(item, d)) break; self.skip(); try arr.append(alloc, item); } return try arr.toOwnedSlice(alloc); } pub fn consumeuntilescape(self: *SelfType, alloc: std.mem.Allocator, delims: []const typ, escape: typ) !?[]typ { var arr = try std.ArrayList(typ).initCapacity(alloc, 128); var previous: typ = undefined; while (self.peek()) |item| { for (delims) |d| if (std.meta.eql(item, d) and !std.meta.eql(previous, escape)) break; self.skip(); try arr.append(alloc, item); previous = item; } return try arr.toOwnedSlice(alloc); } pub fn consumewhile(self: *SelfType, alloc: std.mem.Allocator, allowed: []const typ) !?[]typ { var arr = try std.ArrayList(typ).initCapacity(alloc, 128); while (self.peek()) |item| { for (allowed) |d| if (!std.meta.eql(item, d)) break; self.skip(); try arr.append(alloc, item); } return try arr.toOwnedSlice(alloc); } pub fn maybe(self: *SelfType, expected: typ) ?typ { return self.consume(expected) catch null; } /// Skip over current item pub fn skip(self: *Iterator(typ)) void { self.index += 1; } }; } pub fn tokenize(allocator: std.mem.Allocator, input: []const u8) ![]Token { var toks = std.ArrayList(Token){}; var buff = std.ArrayList(u8){}; defer buff.deinit(allocator); var src = Iterator(u8).init(input); const internals = struct { fn clearbuff(alloc: std.mem.Allocator, tok: *std.ArrayList(Token), buf: *std.ArrayList(u8)) !void { if (buf.items.len == 0) return; const str = try buf.toOwnedSlice(alloc); try tok.append(alloc, .{ .FUNC = str }); buf.clearAndFree(alloc); } }; while (src.peek()) |char| { switch (char) { '`' => { src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .BACKTICK); }, ',' => { src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .COMMA); }, '.' => { src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .PERIOD); }, '(' => { src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .LPAREN); }, ')' => { src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .RPAREN); }, '-' => { src.skip(); if (src.peek().? != '>') { try buff.append(allocator, '-' ); continue; } src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .RARROW); }, '<' => { src.skip(); if (src.peek().? != '-') { try buff.append(allocator, '<' ); continue; } src.skip(); try internals.clearbuff(allocator, &toks, &buff); try toks.append(allocator, .LARROW); }, '0'...'9' => { while (std.ascii.isDigit(src.peek().?)) try buff.append(allocator, src.next().?); const num: i32 = try std.fmt.parseInt(i32, buff.items, 10); try toks.append(allocator, .{ .INT = num }); buff.clearAndFree(allocator); }, 'A'...'Z' => { while (std.ascii.isAlphabetic(src.peek().?)) try buff.append(allocator, src.next().?); const str = try buff.toOwnedSlice(allocator); try toks.append(allocator, .{ .TYP = str }); buff.clearAndFree(allocator); }, '!' => { src.skip(); while (std.ascii.isAlphanumeric(src.peek().?)) try buff.append(allocator, src.next().?); const str = try buff.toOwnedSlice(allocator); try toks.append(allocator, .{ .BUILTIN = str }); buff.clearAndFree(allocator); }, '"' => { _ = src.next(); while (src.peek().? != '"') try buff.append(allocator, src.next().?); _ = src.next(); const token = Token{ .STRING = try buff.toOwnedSlice(allocator) }; try toks.append(allocator, token); buff.clearAndFree(allocator); }, ' ', '\t', '\n' => { src.skip(); if (buff.items.len == 0) continue; try internals.clearbuff(allocator, &toks, &buff); }, else => try buff.append(allocator, src.next().?), } } return toks.toOwnedSlice(allocator); } // pub fn tokenize(allocator: std.mem.Allocator, input: []const u8) ![]Token { // var arr = try std.ArrayList(Token).initCapacity(allocator, 1024); // defer arr.deinit(allocator); // var iterator = Iterator(u8).init(input); // return parse: switch (iterator.next().?) { // '-' => { // if (iterator.maybe('>')) |_| try arr.append(allocator, .RARROW); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '<' => { // if (iterator.maybe('-')) |_| try arr.append(allocator, .LARROW); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '.' => { // try arr.append(allocator, .PERIOD); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // ',' => { // try arr.append(allocator, .COMMA); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '`' => { // try arr.append(allocator, .QMARK); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '?' => { // try arr.append(allocator, .QMARK); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '!' => { // const name = try iterator.consumeuntil(allocator, &std.ascii.whitespace); // try arr.append(allocator, Token{.BUILTIN = name.?}); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '"' => { // const name = try iterator.consumeuntilescape(allocator, "\"", '\\'); // try arr.append(allocator, Token{.STRING = name.?}); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // 'A'...'Z' => { // const name = try iterator.consumeuntil(allocator, &std.ascii.whitespace); // try arr.append(allocator, Token{.TYP = name.?}); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // '0'...'9' => { // const name = try iterator.consumewhile(allocator, "0123456789"); // try arr.append(allocator, Token{.FUNC = name.?}); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // else => { // const name = try iterator.consumeuntil(allocator, @constCast(&std.ascii.whitespace)); // try arr.append(allocator, Token{.FUNC = name.?}); // if (iterator.peek()) |pk| continue :parse pk else break :parse arr.items; }, // }; // }