From 3c531d4bbc754cc1dac6a050b30c3851eb7a2223 Mon Sep 17 00:00:00 2001 From: Nic Gaffney Date: Thu, 11 Jul 2024 22:07:03 -0500 Subject: chore(everything): Changed up some small stuff, comments and the like. totally not me forgetting what i put in this commit noooo... --- src/tokenize.zig | 111 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 39 deletions(-) (limited to 'src/tokenize.zig') diff --git a/src/tokenize.zig b/src/tokenize.zig index 6225814..37080b4 100644 --- a/src/tokenize.zig +++ b/src/tokenize.zig @@ -2,74 +2,75 @@ const std = @import("std"); const TokenError = error{UnknownToken}; -const Token = union(enum) { +pub const Token = union(enum) { ret: []const u8, intLit: i32, + binaryOp: u8, semiCol, nil, }; -pub const TokenIterator = struct { - tokens: std.ArrayList(Token), - index: usize = 0, +/// Creates a tokenizer over a slice of typ +pub fn Iterator(comptime typ: type) type { + return struct { + items: []const typ, + index: usize = 0, - pub fn next(self: *TokenIterator) ?Token { - defer self.index = self.index + 1; - if (self.index >= self.tokens.items.len) return null; - return self.tokens.items[self.index]; - } -}; - -pub const StringIterator = struct { - string: []const u8, - index: usize = 0, - - pub fn init(string: []const u8) StringIterator { - return StringIterator{ .string = string }; - } + /// Initialize tokenizer with a slice + pub fn init(items: []const typ) Iterator(typ) { + return Iterator(typ){ .items = items }; + } - pub fn peek(self: StringIterator) ?u8 { - if (self.index >= self.string.len) return null; - return self.string[self.index]; - } + /// Get current item + pub fn peek(self: Iterator(typ)) ?typ { + if (self.index >= self.items.len) return null; + return self.items[self.index]; + } - pub fn consume(self: *StringIterator) ?u8 { - defer self.index += 1; - return self.peek(); - } + /// Get current item and iterate index + pub fn consume(self: *Iterator(typ)) ?typ { + defer self.index += 1; + return self.peek(); + } + /// Get current item and iterate index + pub const next = consume; - pub fn skip(self: *StringIterator) void { - self.index += 1; - } -}; + /// Skip over current item + pub fn skip(self: *Iterator(typ)) void { + self.index += 1; + } + }; +} +/// Tokenizes a string of source code pub const Tokenizer = struct { - src: StringIterator, + src: Iterator(u8), allocator: std.mem.Allocator, toks: std.ArrayList(Token), + /// Initializes a string of source code + /// Deinitialize with Tokenizer.deinit() pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer { return Tokenizer{ - .src = StringIterator.init(src), + .src = Iterator(u8).init(src), .allocator = allocator, .toks = std.ArrayList(Token).init(allocator), }; } + /// Releases allocated memory pub fn deinit(self: *Tokenizer) void { self.toks.deinit(); } + /// Returns an ArrayList of tokens pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) { var str = std.ArrayList(u8).init(self.allocator); defer str.deinit(); while (self.src.peek()) |char| { switch (char) { - ' ', '\n', '\t' => { - self.src.skip(); - continue; - }, + ' ', '\n', '\t' => self.src.skip(), '0'...'9' => { while (std.ascii.isDigit(self.src.peek().?)) try str.append(self.src.consume().?); @@ -91,12 +92,44 @@ pub const Tokenizer = struct { self.src.skip(); try self.toks.append(.semiCol); }, - '+', '-', '*', '/' => { - // Process operator - }, + '+', '-', '*', '/' => try self.toks.append(.{ .binaryOp = self.src.consume().? }), else => {}, } } return self.toks; } }; + +test "Tokenize" { + std.testing.log_level = std.log.Level.info; + const expect = std.testing.expect; + const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;"; + var toks = Tokenizer.init(std.testing.allocator, testSource); + defer toks.deinit(); + const arrtoks = try toks.tokenize(); + const expected = &[_]Token{ + .{ .ret = "exit" }, + .{ .intLit = 120 }, + .{ .binaryOp = '+' }, + .{ .intLit = 150 }, + .{ .binaryOp = '-' }, + .{ .intLit = 260 }, + .{ .binaryOp = '*' }, + .{ .intLit = 12 }, + .{ .binaryOp = '/' }, + .{ .intLit = 5 }, + .semiCol, + }; + for (arrtoks.items, expected) |act, exp| { + switch (act) { + .ret => |v| { + try expect(std.mem.eql(u8, v, exp.ret)); + std.testing.allocator.free(v); + }, + .intLit => |v| try expect(v == exp.intLit), + .semiCol => |v| try expect(v == exp.semiCol), + .binaryOp => |v| try expect(v == exp.binaryOp), + else => {}, + } + } +} -- cgit v1.2.3