alta.lua
· 3.8 KiB · Lua
Bruto
Playground
local function tokenize(string)
local tokens, current_symbol, current_token = {}, {}, {}
local function push_token()
if #current_token ~= 0 then
table.insert(current_symbol, table.concat(current_token))
current_token = {}
end
end
local function push_symbol()
push_token()
if #current_symbol ~= 0 then
table.insert(tokens, {type = "SYMBOL", value = table.concat(current_symbol, " ")})
current_symbol = {}
end
end
for char in string:gmatch(".") do
if char == "-" then
push_symbol()
table.insert(tokens, {type = "RULE"})
elseif char == ">" then
push_symbol()
table.insert(tokens, {type = "ACCEPT", value = "[accept]"})
elseif char == ":" then
push_symbol()
table.insert(tokens, {type = "SWITCH", value = "[switch]"})
elseif char == ";" then
push_symbol()
table.insert(tokens, {type = "DONE", value = "[done]"})
elseif char == "," then
push_symbol()
elseif char == " " or char == "\n" or char == "\t" then
push_token()
else
table.insert(current_token, char)
end
end
push_symbol()
return tokens
end
local function here(self)
return self.tokens[self.current]
end
local function advance(self)
self.current = self.current + 1
end
local function read(self)
return self:here(), self:advance()
end
local function done(self)
return self.current > #self.tokens
end
local function new_parser(tokens)
return {current = 1, tokens = tokens, here = here, advance = advance, read = read, done = done}
end
local parse_rules, parse_rule, parse_header
function parse_header(parser, header, body)
while not parser:done() do
local token = parser:read()
if token.type == "DONE" then
return parse_rules(parser, header, body)
elseif token.type == "RULE" then
return parse_rule(parser, header, body)
elseif token.type == "ACCEPT" then
-- skip this token in this context
else
table.insert(header, token.value)
end
end
return header, rules
end
function parse_rule(parser, header, body)
-- Starts a new chunk of opcodes
while not parser:done() do
local token = parser:read()
if token.type == "DONE" then
table.insert(body, "[done]")
return parse_rules(parser, header, body)
elseif token.type == "RULE" then
table.insert(body, "[done]")
return parse_rule(parser, header, body)
elseif token.type == "ACCEPT" then
table.insert(body, "[accept]")
elseif token.type == "SYMBOL" and parser:here().type == "SWITCH" then
parser:advance()
table.insert(body, token.value)
table.insert(body, "[switch]")
else
table.insert(body, token.value)
table.insert(body, "[match]")
end
end
table.insert(rules, body)
return header, rules
end
function parse_rules(parser, header, body)
while not parser:done() do
local token = parser:read()
if token.type == "RULE" and parser:here().type == "ACCEPT" then
parser:advance()
return parse_header(parser, header, body)
elseif token.type == "RULE" then
return parse_rule(parser, header, body)
end
end
return header, rules
end
local function parse(tokens)
local header, body, bytecode = {}, {}, {}
local parser = new_parser(tokens)
header, rules = parse_rules(parser, header, body)
for _, op in ipairs(header) do
table.insert(bytecode, op)
end
for _, op in ipairs(body) do
table.insert(bytecode, op)
end
return bytecode
end
1 | local function tokenize(string) |
2 | local tokens, current_symbol, current_token = {}, {}, {} |
3 | |
4 | local function push_token() |
5 | if #current_token ~= 0 then |
6 | table.insert(current_symbol, table.concat(current_token)) |
7 | current_token = {} |
8 | end |
9 | end |
10 | |
11 | local function push_symbol() |
12 | push_token() |
13 | if #current_symbol ~= 0 then |
14 | table.insert(tokens, {type = "SYMBOL", value = table.concat(current_symbol, " ")}) |
15 | current_symbol = {} |
16 | end |
17 | end |
18 | |
19 | for char in string:gmatch(".") do |
20 | if char == "-" then |
21 | push_symbol() |
22 | table.insert(tokens, {type = "RULE"}) |
23 | elseif char == ">" then |
24 | push_symbol() |
25 | table.insert(tokens, {type = "ACCEPT", value = "[accept]"}) |
26 | elseif char == ":" then |
27 | push_symbol() |
28 | table.insert(tokens, {type = "SWITCH", value = "[switch]"}) |
29 | elseif char == ";" then |
30 | push_symbol() |
31 | table.insert(tokens, {type = "DONE", value = "[done]"}) |
32 | elseif char == "," then |
33 | push_symbol() |
34 | elseif char == " " or char == "\n" or char == "\t" then |
35 | push_token() |
36 | else |
37 | table.insert(current_token, char) |
38 | end |
39 | end |
40 | |
41 | push_symbol() |
42 | return tokens |
43 | end |
44 | |
45 | local function here(self) |
46 | return self.tokens[self.current] |
47 | end |
48 | |
49 | local function advance(self) |
50 | self.current = self.current + 1 |
51 | end |
52 | |
53 | local function read(self) |
54 | return self:here(), self:advance() |
55 | end |
56 | |
57 | local function done(self) |
58 | return self.current > #self.tokens |
59 | end |
60 | |
61 | local function new_parser(tokens) |
62 | return {current = 1, tokens = tokens, here = here, advance = advance, read = read, done = done} |
63 | end |
64 | |
65 | local parse_rules, parse_rule, parse_header |
66 | |
67 | function parse_header(parser, header, body) |
68 | while not parser:done() do |
69 | local token = parser:read() |
70 | if token.type == "DONE" then |
71 | return parse_rules(parser, header, body) |
72 | elseif token.type == "RULE" then |
73 | return parse_rule(parser, header, body) |
74 | elseif token.type == "ACCEPT" then |
75 | -- skip this token in this context |
76 | else |
77 | table.insert(header, token.value) |
78 | end |
79 | end |
80 | return header, rules |
81 | end |
82 | |
83 | function parse_rule(parser, header, body) |
84 | -- Starts a new chunk of opcodes |
85 | while not parser:done() do |
86 | local token = parser:read() |
87 | if token.type == "DONE" then |
88 | table.insert(body, "[done]") |
89 | return parse_rules(parser, header, body) |
90 | elseif token.type == "RULE" then |
91 | table.insert(body, "[done]") |
92 | return parse_rule(parser, header, body) |
93 | elseif token.type == "ACCEPT" then |
94 | table.insert(body, "[accept]") |
95 | elseif token.type == "SYMBOL" and parser:here().type == "SWITCH" then |
96 | parser:advance() |
97 | table.insert(body, token.value) |
98 | table.insert(body, "[switch]") |
99 | else |
100 | table.insert(body, token.value) |
101 | table.insert(body, "[match]") |
102 | end |
103 | end |
104 | table.insert(rules, body) |
105 | return header, rules |
106 | end |
107 | |
108 | function parse_rules(parser, header, body) |
109 | while not parser:done() do |
110 | local token = parser:read() |
111 | if token.type == "RULE" and parser:here().type == "ACCEPT" then |
112 | parser:advance() |
113 | return parse_header(parser, header, body) |
114 | elseif token.type == "RULE" then |
115 | return parse_rule(parser, header, body) |
116 | end |
117 | end |
118 | return header, rules |
119 | end |
120 | |
121 | local function parse(tokens) |
122 | local header, body, bytecode = {}, {}, {} |
123 | local parser = new_parser(tokens) |
124 | header, rules = parse_rules(parser, header, body) |
125 | for _, op in ipairs(header) do |
126 | table.insert(bytecode, op) |
127 | end |
128 | |
129 | for _, op in ipairs(body) do |
130 | table.insert(bytecode, op) |
131 | end |
132 | return bytecode |
133 | end |