Last active 1734021598

tokenizer.js Raw Playground
1function tokenize(program) {
2 var delimiter = null;
3 var tokens = [];
4 var current_token = [];
5 // Doing this for
6 var STATE_DELIM = "detect the delimiter";
7 var STATE_COLLECT_TOKENS = "collecting the tokens";
8 var STATE_RAW_STRING = "collecting a raw string";
9 var state = "detect the delimiter";
10
11 function is_raw_start(idx) {
12 return program[idx] === "[" && program[idx+1] === "=" && program[idx+2] == "[";
13 }
14
15 function is_raw_end(idx) {
16 return program[idx] === "]" && program[idx+1] === "=" && program[idx+2] == "]";
17 }
18
19
20
21 for (var i = 0; i < program.length; i++) {
22 var current_character = program[i];
23 if (state == STATE_DELIM) {
24 if (!is_whitespace(current_character)) {
25 delimiter = current_character;
26 tokens.push(delimiter);
27 state = STATE_COLLECT_TOKENS;
28 }
29 } else if (state == STATE_COLLECT_TOKENS) {
30 if (is_raw_start(i)) {
31 i += 2; // The i++ at the top of the loop will take care of the rest
32 state = STATE_RAW_STRING;
33 if (current_token.length > 0) {
34 tokens.push(current_token.join(""))
35 current_token = [];
36 }
37 continue;
38 }
39 if (is_whitespace(current_character) || is_default_delimiter(current_character) || current_character == delimiter) {
40 if (current_token.length != 0) {
41 tokens.push(current_token.join(""));
42 current_token = [];
43 }
44 if (!is_whitespace(current_character)) {
45 tokens.push(current_character);
46 }
47 } else {
48 current_token.push(current_character);
49 }
50 } else if (state == STATE_RAW_STRING) {
51 if (is_raw_end(i)) {
52 state = STATE_COLLECT_TOKENS;
53 i+=2;
54 tokens.push(current_token.join(""));
55 current_token = [];
56 continue;
57 }
58 current_token.push(current_character);
59 }
60 }
61 if (current_token.length > 0) {
62 tokens.push(current_token.join(""));
63 }
64
65 return {delimiter: delimiter, tokens: tokens};
66 }