summaryrefslogtreecommitdiff
path: root/src/tokenize.zig
blob: 6225814e586f81b609ed815f8280d354a3c6d3eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
const std = @import("std");

const TokenError = error{UnknownToken};

const Token = union(enum) {
    ret: []const u8,
    intLit: i32,
    semiCol,
    nil,
};

pub const TokenIterator = struct {
    tokens: std.ArrayList(Token),
    index: usize = 0,

    pub fn next(self: *TokenIterator) ?Token {
        defer self.index = self.index + 1;
        if (self.index >= self.tokens.items.len) return null;
        return self.tokens.items[self.index];
    }
};

pub const StringIterator = struct {
    string: []const u8,
    index: usize = 0,

    pub fn init(string: []const u8) StringIterator {
        return StringIterator{ .string = string };
    }

    pub fn peek(self: StringIterator) ?u8 {
        if (self.index >= self.string.len) return null;
        return self.string[self.index];
    }

    pub fn consume(self: *StringIterator) ?u8 {
        defer self.index += 1;
        return self.peek();
    }

    pub fn skip(self: *StringIterator) void {
        self.index += 1;
    }
};

pub const Tokenizer = struct {
    src: StringIterator,
    allocator: std.mem.Allocator,
    toks: std.ArrayList(Token),

    pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer {
        return Tokenizer{
            .src = StringIterator.init(src),
            .allocator = allocator,
            .toks = std.ArrayList(Token).init(allocator),
        };
    }

    pub fn deinit(self: *Tokenizer) void {
        self.toks.deinit();
    }

    pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {
        var str = std.ArrayList(u8).init(self.allocator);
        defer str.deinit();

        while (self.src.peek()) |char| {
            switch (char) {
                ' ', '\n', '\t' => {
                    self.src.skip();
                    continue;
                },
                '0'...'9' => {
                    while (std.ascii.isDigit(self.src.peek().?))
                        try str.append(self.src.consume().?);

                    const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
                    try self.toks.append(.{ .intLit = num });
                    str.deinit();
                    str = std.ArrayList(u8).init(self.allocator);
                },
                'a'...'z', 'A'...'Z' => {
                    while (std.ascii.isAlphanumeric(self.src.peek().?))
                        try str.append(self.src.consume().?);

                    try self.toks.append(.{ .ret = try str.toOwnedSlice() });
                    str.deinit();
                    str = std.ArrayList(u8).init(self.allocator);
                },
                ';' => {
                    self.src.skip();
                    try self.toks.append(.semiCol);
                },
                '+', '-', '*', '/' => {
                    // Process operator
                },
                else => {},
            }
        }
        return self.toks;
    }
};