alta.lua
· 3.8 KiB · Lua
原始文件
Playground
local function tokenize(string)
local tokens, current_symbol, current_token = {}, {}, {}
local function push_token()
if #current_token ~= 0 then
table.insert(current_symbol, table.concat(current_token))
current_token = {}
end
end
local function push_symbol()
push_token()
if #current_symbol ~= 0 then
table.insert(tokens, {type = "SYMBOL", value = table.concat(current_symbol, " ")})
current_symbol = {}
end
end
for char in string:gmatch(".") do
if char == "-" then
push_symbol()
table.insert(tokens, {type = "RULE"})
elseif char == ">" then
push_symbol()
table.insert(tokens, {type = "ACCEPT", value = "[accept]"})
elseif char == ":" then
push_symbol()
table.insert(tokens, {type = "SWITCH", value = "[switch]"})
elseif char == ";" then
push_symbol()
table.insert(tokens, {type = "DONE", value = "[done]"})
elseif char == "," then
push_symbol()
elseif char == " " or char == "\n" or char == "\t" then
push_token()
else
table.insert(current_token, char)
end
end
push_symbol()
return tokens
end
local function here(self)
return self.tokens[self.current]
end
local function advance(self)
self.current = self.current + 1
end
local function read(self)
return self:here(), self:advance()
end
local function done(self)
return self.current > #self.tokens
end
local function new_parser(tokens)
return {current = 1, tokens = tokens, here = here, advance = advance, read = read, done = done}
end
local parse_rules, parse_rule, parse_header
function parse_header(parser, header, body)
while not parser:done() do
local token = parser:read()
if token.type == "DONE" then
return parse_rules(parser, header, body)
elseif token.type == "RULE" then
return parse_rule(parser, header, body)
elseif token.type == "ACCEPT" then
-- skip this token in this context
else
table.insert(header, token.value)
end
end
return header, rules
end
function parse_rule(parser, header, body)
-- Starts a new chunk of opcodes
while not parser:done() do
local token = parser:read()
if token.type == "DONE" then
table.insert(body, "[done]")
return parse_rules(parser, header, body)
elseif token.type == "RULE" then
table.insert(body, "[done]")
return parse_rule(parser, header, body)
elseif token.type == "ACCEPT" then
table.insert(body, "[accept]")
elseif token.type == "SYMBOL" and parser:here().type == "SWITCH" then
parser:advance()
table.insert(body, token.value)
table.insert(body, "[switch]")
else
table.insert(body, token.value)
table.insert(body, "[match]")
end
end
table.insert(rules, body)
return header, rules
end
function parse_rules(parser, header, body)
while not parser:done() do
local token = parser:read()
if token.type == "RULE" and parser:here().type == "ACCEPT" then
parser:advance()
return parse_header(parser, header, body)
elseif token.type == "RULE" then
return parse_rule(parser, header, body)
end
end
return header, rules
end
local function parse(tokens)
local header, body, bytecode = {}, {}, {}
local parser = new_parser(tokens)
header, rules = parse_rules(parser, header, body)
for _, op in ipairs(header) do
table.insert(bytecode, op)
end
for _, op in ipairs(body) do
table.insert(bytecode, op)
end
return bytecode
end
| 1 | local function tokenize(string) |
| 2 | local tokens, current_symbol, current_token = {}, {}, {} |
| 3 | |
| 4 | local function push_token() |
| 5 | if #current_token ~= 0 then |
| 6 | table.insert(current_symbol, table.concat(current_token)) |
| 7 | current_token = {} |
| 8 | end |
| 9 | end |
| 10 | |
| 11 | local function push_symbol() |
| 12 | push_token() |
| 13 | if #current_symbol ~= 0 then |
| 14 | table.insert(tokens, {type = "SYMBOL", value = table.concat(current_symbol, " ")}) |
| 15 | current_symbol = {} |
| 16 | end |
| 17 | end |
| 18 | |
| 19 | for char in string:gmatch(".") do |
| 20 | if char == "-" then |
| 21 | push_symbol() |
| 22 | table.insert(tokens, {type = "RULE"}) |
| 23 | elseif char == ">" then |
| 24 | push_symbol() |
| 25 | table.insert(tokens, {type = "ACCEPT", value = "[accept]"}) |
| 26 | elseif char == ":" then |
| 27 | push_symbol() |
| 28 | table.insert(tokens, {type = "SWITCH", value = "[switch]"}) |
| 29 | elseif char == ";" then |
| 30 | push_symbol() |
| 31 | table.insert(tokens, {type = "DONE", value = "[done]"}) |
| 32 | elseif char == "," then |
| 33 | push_symbol() |
| 34 | elseif char == " " or char == "\n" or char == "\t" then |
| 35 | push_token() |
| 36 | else |
| 37 | table.insert(current_token, char) |
| 38 | end |
| 39 | end |
| 40 | |
| 41 | push_symbol() |
| 42 | return tokens |
| 43 | end |
| 44 | |
| 45 | local function here(self) |
| 46 | return self.tokens[self.current] |
| 47 | end |
| 48 | |
| 49 | local function advance(self) |
| 50 | self.current = self.current + 1 |
| 51 | end |
| 52 | |
| 53 | local function read(self) |
| 54 | return self:here(), self:advance() |
| 55 | end |
| 56 | |
| 57 | local function done(self) |
| 58 | return self.current > #self.tokens |
| 59 | end |
| 60 | |
| 61 | local function new_parser(tokens) |
| 62 | return {current = 1, tokens = tokens, here = here, advance = advance, read = read, done = done} |
| 63 | end |
| 64 | |
| 65 | local parse_rules, parse_rule, parse_header |
| 66 | |
| 67 | function parse_header(parser, header, body) |
| 68 | while not parser:done() do |
| 69 | local token = parser:read() |
| 70 | if token.type == "DONE" then |
| 71 | return parse_rules(parser, header, body) |
| 72 | elseif token.type == "RULE" then |
| 73 | return parse_rule(parser, header, body) |
| 74 | elseif token.type == "ACCEPT" then |
| 75 | -- skip this token in this context |
| 76 | else |
| 77 | table.insert(header, token.value) |
| 78 | end |
| 79 | end |
| 80 | return header, rules |
| 81 | end |
| 82 | |
| 83 | function parse_rule(parser, header, body) |
| 84 | -- Starts a new chunk of opcodes |
| 85 | while not parser:done() do |
| 86 | local token = parser:read() |
| 87 | if token.type == "DONE" then |
| 88 | table.insert(body, "[done]") |
| 89 | return parse_rules(parser, header, body) |
| 90 | elseif token.type == "RULE" then |
| 91 | table.insert(body, "[done]") |
| 92 | return parse_rule(parser, header, body) |
| 93 | elseif token.type == "ACCEPT" then |
| 94 | table.insert(body, "[accept]") |
| 95 | elseif token.type == "SYMBOL" and parser:here().type == "SWITCH" then |
| 96 | parser:advance() |
| 97 | table.insert(body, token.value) |
| 98 | table.insert(body, "[switch]") |
| 99 | else |
| 100 | table.insert(body, token.value) |
| 101 | table.insert(body, "[match]") |
| 102 | end |
| 103 | end |
| 104 | table.insert(rules, body) |
| 105 | return header, rules |
| 106 | end |
| 107 | |
| 108 | function parse_rules(parser, header, body) |
| 109 | while not parser:done() do |
| 110 | local token = parser:read() |
| 111 | if token.type == "RULE" and parser:here().type == "ACCEPT" then |
| 112 | parser:advance() |
| 113 | return parse_header(parser, header, body) |
| 114 | elseif token.type == "RULE" then |
| 115 | return parse_rule(parser, header, body) |
| 116 | end |
| 117 | end |
| 118 | return header, rules |
| 119 | end |
| 120 | |
| 121 | local function parse(tokens) |
| 122 | local header, body, bytecode = {}, {}, {} |
| 123 | local parser = new_parser(tokens) |
| 124 | header, rules = parse_rules(parser, header, body) |
| 125 | for _, op in ipairs(header) do |
| 126 | table.insert(bytecode, op) |
| 127 | end |
| 128 | |
| 129 | for _, op in ipairs(body) do |
| 130 | table.insert(bytecode, op) |
| 131 | end |
| 132 | return bytecode |
| 133 | end |