summaryrefslogtreecommitdiff
path: root/src/tokenize.zig
blob: 37080b4fd419c83423ca8c81c3737a46015cde99 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
const std = @import("std");

const TokenError = error{UnknownToken};

pub const Token = union(enum) {
    ret: []const u8,
    intLit: i32,
    binaryOp: u8,
    semiCol,
    nil,
};

/// Creates a tokenizer over a slice of typ
pub fn Iterator(comptime typ: type) type {
    return struct {
        items: []const typ,
        index: usize = 0,

        /// Initialize tokenizer with a slice
        pub fn init(items: []const typ) Iterator(typ) {
            return Iterator(typ){ .items = items };
        }

        /// Get current item
        pub fn peek(self: Iterator(typ)) ?typ {
            if (self.index >= self.items.len) return null;
            return self.items[self.index];
        }

        /// Get current item and iterate index
        pub fn consume(self: *Iterator(typ)) ?typ {
            defer self.index += 1;
            return self.peek();
        }
        /// Get current item and iterate index
        pub const next = consume;

        /// Skip over current item
        pub fn skip(self: *Iterator(typ)) void {
            self.index += 1;
        }
    };
}

/// Tokenizes a string of source code
pub const Tokenizer = struct {
    src: Iterator(u8),
    allocator: std.mem.Allocator,
    toks: std.ArrayList(Token),

    /// Initializes a string of source code
    /// Deinitialize with Tokenizer.deinit()
    pub fn init(allocator: std.mem.Allocator, src: []const u8) Tokenizer {
        return Tokenizer{
            .src = Iterator(u8).init(src),
            .allocator = allocator,
            .toks = std.ArrayList(Token).init(allocator),
        };
    }

    /// Releases allocated memory
    pub fn deinit(self: *Tokenizer) void {
        self.toks.deinit();
    }

    /// Returns an ArrayList of tokens
    pub fn tokenize(self: *Tokenizer) !std.ArrayList(Token) {
        var str = std.ArrayList(u8).init(self.allocator);
        defer str.deinit();

        while (self.src.peek()) |char| {
            switch (char) {
                ' ', '\n', '\t' => self.src.skip(),
                '0'...'9' => {
                    while (std.ascii.isDigit(self.src.peek().?))
                        try str.append(self.src.consume().?);

                    const num: i32 = try std.fmt.parseInt(i32, str.items, 10);
                    try self.toks.append(.{ .intLit = num });
                    str.deinit();
                    str = std.ArrayList(u8).init(self.allocator);
                },
                'a'...'z', 'A'...'Z' => {
                    while (std.ascii.isAlphanumeric(self.src.peek().?))
                        try str.append(self.src.consume().?);

                    try self.toks.append(.{ .ret = try str.toOwnedSlice() });
                    str.deinit();
                    str = std.ArrayList(u8).init(self.allocator);
                },
                ';' => {
                    self.src.skip();
                    try self.toks.append(.semiCol);
                },
                '+', '-', '*', '/' => try self.toks.append(.{ .binaryOp = self.src.consume().? }),
                else => {},
            }
        }
        return self.toks;
    }
};

test "Tokenize" {
    std.testing.log_level = std.log.Level.info;
    const expect = std.testing.expect;
    const testSource: []const u8 = "exit 120 + 150 - 260 * 12 / 5;";
    var toks = Tokenizer.init(std.testing.allocator, testSource);
    defer toks.deinit();
    const arrtoks = try toks.tokenize();
    const expected = &[_]Token{
        .{ .ret = "exit" },
        .{ .intLit = 120 },
        .{ .binaryOp = '+' },
        .{ .intLit = 150 },
        .{ .binaryOp = '-' },
        .{ .intLit = 260 },
        .{ .binaryOp = '*' },
        .{ .intLit = 12 },
        .{ .binaryOp = '/' },
        .{ .intLit = 5 },
        .semiCol,
    };
    for (arrtoks.items, expected) |act, exp| {
        switch (act) {
            .ret => |v| {
                try expect(std.mem.eql(u8, v, exp.ret));
                std.testing.allocator.free(v);
            },
            .intLit => |v| try expect(v == exp.intLit),
            .semiCol => |v| try expect(v == exp.semiCol),
            .binaryOp => |v| try expect(v == exp.binaryOp),
            else => {},
        }
    }
}