tokenizer.js
· 2.5 KiB · JavaScript
原始檔案
Playground
function tokenize(program) {
var delimiter = null;
var tokens = [];
var current_token = [];
// Doing this for
var STATE_DELIM = "detect the delimiter";
var STATE_COLLECT_TOKENS = "collecting the tokens";
var STATE_RAW_STRING = "collecting a raw string";
var state = "detect the delimiter";
function is_raw_start(idx) {
return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "[";
}
function is_raw_end(idx) {
return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]";
}
for (var i = 0; i < program.length; i++) {
var current_character = program[i];
if (state == STATE_DELIM) {
if (!is_whitespace(current_character)) {
delimiter = current_character;
tokens.push(delimiter);
state = STATE_COLLECT_TOKENS;
}
} else if (state == STATE_COLLECT_TOKENS) {
if (is_raw_start(i)) {
i += 2; // The i++ at the top of the loop will take care of the rest
state = STATE_RAW_STRING;
if (current_token.length > 0) {
tokens.push(current_token.join(""))
current_token = [];
}
continue;
}
if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) {
if (current_token.length != 0) {
tokens.push(current_token.join(""));
current_token = [];
}
if (!is_whitespace(current_character)) {
tokens.push(current_character);
}
} else {
current_token.push(current_character);
}
} else if (state == STATE_RAW_STRING) {
if (is_raw_end(i)) {
state = STATE_COLLECT_TOKENS;
i+=2;
tokens.push(current_token.join(""));
current_token = [];
continue;
}
current_token.push(current_character);
}
}
if (current_token.length > 0) {
tokens.push(current_token.join(""));
}
return {delimiter: delimiter, tokens: tokens};
}
1 | function tokenize(program) { |
2 | var delimiter = null; |
3 | var tokens = []; |
4 | var current_token = []; |
5 | // Doing this for |
6 | var STATE_DELIM = "detect the delimiter"; |
7 | var STATE_COLLECT_TOKENS = "collecting the tokens"; |
8 | var STATE_RAW_STRING = "collecting a raw string"; |
9 | var state = "detect the delimiter"; |
10 | |
11 | function is_raw_start(idx) { |
12 | return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "["; |
13 | } |
14 | |
15 | function is_raw_end(idx) { |
16 | return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]"; |
17 | } |
18 | |
19 | |
20 | |
21 | for (var i = 0; i < program.length; i++) { |
22 | var current_character = program[i]; |
23 | if (state == STATE_DELIM) { |
24 | if (!is_whitespace(current_character)) { |
25 | delimiter = current_character; |
26 | tokens.push(delimiter); |
27 | state = STATE_COLLECT_TOKENS; |
28 | } |
29 | } else if (state == STATE_COLLECT_TOKENS) { |
30 | if (is_raw_start(i)) { |
31 | i += 2; // The i++ at the top of the loop will take care of the rest |
32 | state = STATE_RAW_STRING; |
33 | if (current_token.length > 0) { |
34 | tokens.push(current_token.join("")) |
35 | current_token = []; |
36 | } |
37 | continue; |
38 | } |
39 | if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) { |
40 | if (current_token.length != 0) { |
41 | tokens.push(current_token.join("")); |
42 | current_token = []; |
43 | } |
44 | if (!is_whitespace(current_character)) { |
45 | tokens.push(current_character); |
46 | } |
47 | } else { |
48 | current_token.push(current_character); |
49 | } |
50 | } else if (state == STATE_RAW_STRING) { |
51 | if (is_raw_end(i)) { |
52 | state = STATE_COLLECT_TOKENS; |
53 | i+=2; |
54 | tokens.push(current_token.join("")); |
55 | current_token = []; |
56 | continue; |
57 | } |
58 | current_token.push(current_character); |
59 | } |
60 | } |
61 | if (current_token.length > 0) { |
62 | tokens.push(current_token.join("")); |
63 | } |
64 | |
65 | return {delimiter: delimiter, tokens: tokens}; |
66 | } |