yumaikas ha revisionato questo gist . Vai alla revisione
1 file changed, 1 insertion, 1 deletion
tokenizer.js
@@ -1,4 +1,4 @@ | |||
1 | - | okenize(program) { | |
1 | + | function tokenize(program) { | |
2 | 2 | var delimiter = null; | |
3 | 3 | var tokens = []; | |
4 | 4 | var current_token = []; |
yumaikas ha revisionato questo gist . Vai alla revisione
1 file changed, 66 insertions
tokenizer.js(file creato)
@@ -0,0 +1,66 @@ | |||
1 | + | okenize(program) { | |
2 | + | var delimiter = null; | |
3 | + | var tokens = []; | |
4 | + | var current_token = []; | |
5 | + | // Doing this for | |
6 | + | var STATE_DELIM = "detect the delimiter"; | |
7 | + | var STATE_COLLECT_TOKENS = "collecting the tokens"; | |
8 | + | var STATE_RAW_STRING = "collecting a raw string"; | |
9 | + | var state = "detect the delimiter"; | |
10 | + | ||
11 | + | function is_raw_start(idx) { | |
12 | + | return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "["; | |
13 | + | } | |
14 | + | ||
15 | + | function is_raw_end(idx) { | |
16 | + | return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]"; | |
17 | + | } | |
18 | + | ||
19 | + | ||
20 | + | ||
21 | + | for (var i = 0; i < program.length; i++) { | |
22 | + | var current_character = program[i]; | |
23 | + | if (state == STATE_DELIM) { | |
24 | + | if (!is_whitespace(current_character)) { | |
25 | + | delimiter = current_character; | |
26 | + | tokens.push(delimiter); | |
27 | + | state = STATE_COLLECT_TOKENS; | |
28 | + | } | |
29 | + | } else if (state == STATE_COLLECT_TOKENS) { | |
30 | + | if (is_raw_start(i)) { | |
31 | + | i += 2; // The i++ at the top of the loop will take care of the rest | |
32 | + | state = STATE_RAW_STRING; | |
33 | + | if (current_token.length > 0) { | |
34 | + | tokens.push(current_token.join("")) | |
35 | + | current_token = []; | |
36 | + | } | |
37 | + | continue; | |
38 | + | } | |
39 | + | if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) { | |
40 | + | if (current_token.length != 0) { | |
41 | + | tokens.push(current_token.join("")); | |
42 | + | current_token = []; | |
43 | + | } | |
44 | + | if (!is_whitespace(current_character)) { | |
45 | + | tokens.push(current_character); | |
46 | + | } | |
47 | + | } else { | |
48 | + | current_token.push(current_character); | |
49 | + | } | |
50 | + | } else if (state == STATE_RAW_STRING) { | |
51 | + | if (is_raw_end(i)) { | |
52 | + | state = STATE_COLLECT_TOKENS; | |
53 | + | i+=2; | |
54 | + | tokens.push(current_token.join("")); | |
55 | + | current_token = []; | |
56 | + | continue; | |
57 | + | } | |
58 | + | current_token.push(current_character); | |
59 | + | } | |
60 | + | } | |
61 | + | if (current_token.length > 0) { | |
62 | + | tokens.push(current_token.join("")); | |
63 | + | } | |
64 | + | ||
65 | + | return {delimiter: delimiter, tokens: tokens}; | |
66 | + | } |