summaryrefslogtreecommitdiff
path: root/src/tokenize.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenize.zig')
-rw-r--r--src/tokenize.zig132
1 files changed, 84 insertions, 48 deletions
diff --git a/src/tokenize.zig b/src/tokenize.zig
index b5d5d23..6225814 100644
--- a/src/tokenize.zig
+++ b/src/tokenize.zig
@@ -5,62 +5,98 @@ const TokenError = error{UnknownToken};
const Token = union(enum) {
ret: []const u8,
intLit: i32,
- semiCol: u8,
- nil: void,
+ semiCol,
+ nil,
};
pub const TokenIterator = struct {
- tokens: []const Token,
+ tokens: std.ArrayList(Token),
index: usize = 0,
pub fn next(self: *TokenIterator) ?Token {
- defer self.*.index = self.*.index + 1;
- if (self.*.index >= self.*.tokens.len) return null;
- return self.*.tokens[self.*.index];
+ defer self.index = self.index + 1;
+ if (self.index >= self.tokens.items.len) return null;
+ return self.tokens.items[self.index];
}
};
-pub fn tokenize(allocator: std.mem.Allocator, buff: []const u8) ![]const Token {
- var toks = std.ArrayList(Token).init(allocator);
- defer toks.deinit();
- var str = std.ArrayList(u8).init(allocator);
- defer str.deinit();
-
- var i: u32 = 0;
- while (i < buff.len) {
- switch (buff[i]) {
- ' ', '\n', '\t' => {
- i = i + 1;
- continue;
- },
- '0'...'9' => {
- while (std.ascii.isDigit(buff[i])) {
- try str.append(buff[i]);
- i = i + 1;
- }
- const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
- try toks.append(.{ .intLit = num });
- str.deinit();
- str = std.ArrayList(u8).init(allocator);
- },
- 'a'...'z', 'A'...'Z' => {
- while (std.ascii.isAlphanumeric(buff[i])) {
- try str.append(buff[i]);
- i = i + 1;
- }
- try toks.append(.{ .ret = try str.toOwnedSlice() });
- str.deinit();
- str = std.ArrayList(u8).init(allocator);
- },
- ';' => {
- i = i + 1;
- try toks.append(.{ .semiCol = ';' });
- },
- '+', '-', '*', '/' => {
- // Process operator
- },
- else => {},
+pub const StringIterator = struct {
+ string: []const u8,
+ index: usize = 0,
+
+ pub fn init(string: []const u8) StringIterator {
+ return StringIterator{ .string = string };
+ }
+
+ pub fn peek(self: StringIterator) ?u8 {
+ if (self.index >= self.string.len) return null;
+ return self.string[self.index];
+ }
+
+ pub fn consume(self: *StringIterator) ?u8 {
+ defer self.index += 1;
+ return self.peek();
+ }
+
+ pub fn skip(self: *StringIterator) void {
+ self.index += 1;
+ }
+};
+
+pub const Tokenizer = struct {
+ src: StringIterator,
+ allocator: std.mem.Allocator,
+ toks: std.ArrayList(Token),
+
+ pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer {
+ return Tokenizer{
+ .src = StringIterator.init(src),
+ .allocator = allocator,
+ .toks = std.ArrayList(Token).init(allocator),
+ };
+ }
+
+ pub fn deinit(self: *Tokenizer) void {
+ self.toks.deinit();
+ }
+
+ pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {
+ var str = std.ArrayList(u8).init(self.allocator);
+ defer str.deinit();
+
+ while (self.src.peek()) |char| {
+ switch (char) {
+ ' ', '\n', '\t' => {
+ self.src.skip();
+ continue;
+ },
+ '0'...'9' => {
+ while (std.ascii.isDigit(self.src.peek().?))
+ try str.append(self.src.consume().?);
+
+ const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
+ try self.toks.append(.{ .intLit = num });
+ str.deinit();
+ str = std.ArrayList(u8).init(self.allocator);
+ },
+ 'a'...'z', 'A'...'Z' => {
+ while (std.ascii.isAlphanumeric(self.src.peek().?))
+ try str.append(self.src.consume().?);
+
+ try self.toks.append(.{ .ret = try str.toOwnedSlice() });
+ str.deinit();
+ str = std.ArrayList(u8).init(self.allocator);
+ },
+ ';' => {
+ self.src.skip();
+ try self.toks.append(.semiCol);
+ },
+ '+', '-', '*', '/' => {
+ // Process operator
+ },
+ else => {},
+ }
}
+ return self.toks;
}
- return toks.toOwnedSlice();
-}
+};