local function tokenize(string) local tokens, current_symbol, current_token = {}, {}, {} local function push_token() if #current_token ~= 0 then table.insert(current_symbol, table.concat(current_token)) current_token = {} end end local function push_symbol() push_token() if #current_symbol ~= 0 then table.insert(tokens, {type = "SYMBOL", value = table.concat(current_symbol, " ")}) current_symbol = {} end end for char in string:gmatch(".") do if char == "-" then push_symbol() table.insert(tokens, {type = "RULE"}) elseif char == ">" then push_symbol() table.insert(tokens, {type = "ACCEPT", value = "[accept]"}) elseif char == ":" then push_symbol() table.insert(tokens, {type = "SWITCH", value = "[switch]"}) elseif char == ";" then push_symbol() table.insert(tokens, {type = "DONE", value = "[done]"}) elseif char == "," then push_symbol() elseif char == " " or char == "\n" or char == "\t" then push_token() else table.insert(current_token, char) end end push_symbol() return tokens end local function here(self) return self.tokens[self.current] end local function advance(self) self.current = self.current + 1 end local function read(self) return self:here(), self:advance() end local function done(self) return self.current > #self.tokens end local function new_parser(tokens) return {current = 1, tokens = tokens, here = here, advance = advance, read = read, done = done} end local parse_rules, parse_rule, parse_header function parse_header(parser, header, body) while not parser:done() do local token = parser:read() if token.type == "DONE" then return parse_rules(parser, header, body) elseif token.type == "RULE" then return parse_rule(parser, header, body) elseif token.type == "ACCEPT" then -- skip this token in this context else table.insert(header, token.value) end end return header, rules end function parse_rule(parser, header, body) -- Starts a new chunk of opcodes while not parser:done() do local token = parser:read() if token.type == "DONE" then table.insert(body, "[done]") return parse_rules(parser, header, body) elseif token.type == "RULE" then table.insert(body, "[done]") return parse_rule(parser, header, body) elseif token.type == "ACCEPT" then table.insert(body, "[accept]") elseif token.type == "SYMBOL" and parser:here().type == "SWITCH" then parser:advance() table.insert(body, token.value) table.insert(body, "[switch]") else table.insert(body, token.value) table.insert(body, "[match]") end end table.insert(rules, body) return header, rules end function parse_rules(parser, header, body) while not parser:done() do local token = parser:read() if token.type == "RULE" and parser:here().type == "ACCEPT" then parser:advance() return parse_header(parser, header, body) elseif token.type == "RULE" then return parse_rule(parser, header, body) end end return header, rules end local function parse(tokens) local header, body, bytecode = {}, {}, {} local parser = new_parser(tokens) header, rules = parse_rules(parser, header, body) for _, op in ipairs(header) do table.insert(bytecode, op) end for _, op in ipairs(body) do table.insert(bytecode, op) end return bytecode end