tokenizer.js
· 2.5 KiB · JavaScript
Eredeti
Playground
function tokenize(program) {
var delimiter = null;
var tokens = [];
var current_token = [];
// Doing this for
var STATE_DELIM = "detect the delimiter";
var STATE_COLLECT_TOKENS = "collecting the tokens";
var STATE_RAW_STRING = "collecting a raw string";
var state = "detect the delimiter";
function is_raw_start(idx) {
return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "[";
}
function is_raw_end(idx) {
return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]";
}
for (var i = 0; i < program.length; i++) {
var current_character = program[i];
if (state == STATE_DELIM) {
if (!is_whitespace(current_character)) {
delimiter = current_character;
tokens.push(delimiter);
state = STATE_COLLECT_TOKENS;
}
} else if (state == STATE_COLLECT_TOKENS) {
if (is_raw_start(i)) {
i += 2; // The i++ at the top of the loop will take care of the rest
state = STATE_RAW_STRING;
if (current_token.length > 0) {
tokens.push(current_token.join(""))
current_token = [];
}
continue;
}
if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) {
if (current_token.length != 0) {
tokens.push(current_token.join(""));
current_token = [];
}
if (!is_whitespace(current_character)) {
tokens.push(current_character);
}
} else {
current_token.push(current_character);
}
} else if (state == STATE_RAW_STRING) {
if (is_raw_end(i)) {
state = STATE_COLLECT_TOKENS;
i+=2;
tokens.push(current_token.join(""));
current_token = [];
continue;
}
current_token.push(current_character);
}
}
if (current_token.length > 0) {
tokens.push(current_token.join(""));
}
return {delimiter: delimiter, tokens: tokens};
}
| 1 | function tokenize(program) { |
| 2 | var delimiter = null; |
| 3 | var tokens = []; |
| 4 | var current_token = []; |
| 5 | // Doing this for |
| 6 | var STATE_DELIM = "detect the delimiter"; |
| 7 | var STATE_COLLECT_TOKENS = "collecting the tokens"; |
| 8 | var STATE_RAW_STRING = "collecting a raw string"; |
| 9 | var state = "detect the delimiter"; |
| 10 | |
| 11 | function is_raw_start(idx) { |
| 12 | return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "["; |
| 13 | } |
| 14 | |
| 15 | function is_raw_end(idx) { |
| 16 | return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]"; |
| 17 | } |
| 18 | |
| 19 | |
| 20 | |
| 21 | for (var i = 0; i < program.length; i++) { |
| 22 | var current_character = program[i]; |
| 23 | if (state == STATE_DELIM) { |
| 24 | if (!is_whitespace(current_character)) { |
| 25 | delimiter = current_character; |
| 26 | tokens.push(delimiter); |
| 27 | state = STATE_COLLECT_TOKENS; |
| 28 | } |
| 29 | } else if (state == STATE_COLLECT_TOKENS) { |
| 30 | if (is_raw_start(i)) { |
| 31 | i += 2; // The i++ at the top of the loop will take care of the rest |
| 32 | state = STATE_RAW_STRING; |
| 33 | if (current_token.length > 0) { |
| 34 | tokens.push(current_token.join("")) |
| 35 | current_token = []; |
| 36 | } |
| 37 | continue; |
| 38 | } |
| 39 | if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) { |
| 40 | if (current_token.length != 0) { |
| 41 | tokens.push(current_token.join("")); |
| 42 | current_token = []; |
| 43 | } |
| 44 | if (!is_whitespace(current_character)) { |
| 45 | tokens.push(current_character); |
| 46 | } |
| 47 | } else { |
| 48 | current_token.push(current_character); |
| 49 | } |
| 50 | } else if (state == STATE_RAW_STRING) { |
| 51 | if (is_raw_end(i)) { |
| 52 | state = STATE_COLLECT_TOKENS; |
| 53 | i+=2; |
| 54 | tokens.push(current_token.join("")); |
| 55 | current_token = []; |
| 56 | continue; |
| 57 | } |
| 58 | current_token.push(current_character); |
| 59 | } |
| 60 | } |
| 61 | if (current_token.length > 0) { |
| 62 | tokens.push(current_token.join("")); |
| 63 | } |
| 64 | |
| 65 | return {delimiter: delimiter, tokens: tokens}; |
| 66 | } |