Skip to content

Commit d932563

Browse files
committed
unwrap tokenizer
1 parent 07135d1 commit d932563

File tree

1 file changed

+127
-130
lines changed

1 file changed

+127
-130
lines changed

src/intertyper.js

+127-130
Original file line numberDiff line numberDiff line change
@@ -4,151 +4,148 @@
44
// to be processed by the later stages.
55

66
// Line tokenizer
7-
var tokenizer = {
8-
processItem: function _tokenizer(item, inner) {
9-
//assert(item.lineNum != 40000);
10-
//if (item.lineNum) print(item.lineNum);
11-
var tokens = [];
12-
var quotes = 0;
13-
var lastToken = null;
14-
var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied
15-
// Note: '{' is not an encloser, as its use in functions is split over many lines
16-
var enclosers = {
17-
'[': 0,
18-
']': '[',
19-
'(': 0,
20-
')': '(',
21-
'<': 0,
22-
'>': '<'
23-
};
24-
var totalEnclosing = 0;
25-
function makeToken(text) {
26-
if (text.length == 0) return;
27-
// merge certain tokens
28-
if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) {
29-
lastToken.text += text;
30-
return;
31-
}
7+
function tokenizer(item, inner) {
8+
//assert(item.lineNum != 40000);
9+
//if (item.lineNum) print(item.lineNum);
10+
var tokens = [];
11+
var quotes = 0;
12+
var lastToken = null;
13+
var CHUNKSIZE = 64; // How much forward to peek forward. Too much means too many string segments copied
14+
// Note: '{' is not an encloser, as its use in functions is split over many lines
15+
var enclosers = {
16+
'[': 0,
17+
']': '[',
18+
'(': 0,
19+
')': '(',
20+
'<': 0,
21+
'>': '<'
22+
};
23+
var totalEnclosing = 0;
24+
function makeToken(text) {
25+
if (text.length == 0) return;
26+
// merge certain tokens
27+
if (lastToken && ( (lastToken.text == '%' && text[0] == '"') || /^\**$/.test(text) ) ) {
28+
lastToken.text += text;
29+
return;
30+
}
3231

33-
var token = {
34-
text: text
35-
};
36-
if (text[0] in enclosers) {
37-
token.item = tokenizer.processItem({
38-
lineText: text.substr(1, text.length-2)
39-
}, true);
40-
token.type = text[0];
41-
}
42-
// merge certain tokens
43-
if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
44-
lastToken.text += ' ' + text;
45-
} else if (lastToken && text[0] == '}') { // }, }*, etc.
46-
var openBrace = tokens.length-1;
47-
while (tokens[openBrace].text.substr(-1) != '{') openBrace --;
48-
token = combineTokens(tokens.slice(openBrace+1));
49-
tokens.splice(openBrace, tokens.length-openBrace+1);
50-
tokens.push(token);
51-
token.type = '{';
52-
token.text = '{ ' + token.text + ' }';
53-
var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text);
54-
while (pointingLevelsToAdd > 0) {
55-
token.text += '*';
56-
pointingLevelsToAdd--;
57-
}
58-
lastToken = token;
59-
} else {
60-
tokens.push(token);
61-
lastToken = token;
32+
var token = {
33+
text: text
34+
};
35+
if (text[0] in enclosers) {
36+
token.item = tokenizer({
37+
lineText: text.substr(1, text.length-2)
38+
}, true);
39+
token.type = text[0];
40+
}
41+
// merge certain tokens
42+
if (lastToken && isType(lastToken.text) && isFunctionDef(token)) {
43+
lastToken.text += ' ' + text;
44+
} else if (lastToken && text[0] == '}') { // }, }*, etc.
45+
var openBrace = tokens.length-1;
46+
while (tokens[openBrace].text.substr(-1) != '{') openBrace --;
47+
token = combineTokens(tokens.slice(openBrace+1));
48+
tokens.splice(openBrace, tokens.length-openBrace+1);
49+
tokens.push(token);
50+
token.type = '{';
51+
token.text = '{ ' + token.text + ' }';
52+
var pointingLevelsToAdd = pointingLevels(text) - pointingLevels(token.text);
53+
while (pointingLevelsToAdd > 0) {
54+
token.text += '*';
55+
pointingLevelsToAdd--;
6256
}
57+
lastToken = token;
58+
} else {
59+
tokens.push(token);
60+
lastToken = token;
6361
}
64-
// Split using meaningful characters
65-
var lineText = item.lineText + ' ';
66-
var re = /[\[\]\(\)<>, "]/g;
67-
var segments = lineText.split(re);
68-
segments.pop();
69-
var len = segments.length;
70-
var i = -1;
71-
var curr = '';
72-
var segment, letter;
73-
for (var s = 0; s < len; s++) {
74-
segment = segments[s];
75-
i += segment.length + 1;
76-
letter = lineText[i];
77-
curr += segment;
78-
switch (letter) {
79-
case ' ':
80-
if (totalEnclosing == 0 && quotes == 0) {
81-
makeToken(curr);
82-
curr = '';
83-
} else {
84-
curr += ' ';
85-
}
86-
break;
87-
case '"':
88-
if (totalEnclosing == 0) {
89-
if (quotes == 0) {
90-
if (curr == '@' || curr == '%') {
91-
curr += '"';
92-
} else {
93-
makeToken(curr);
94-
curr = '"';
95-
}
62+
}
63+
// Split using meaningful characters
64+
var lineText = item.lineText + ' ';
65+
var re = /[\[\]\(\)<>, "]/g;
66+
var segments = lineText.split(re);
67+
segments.pop();
68+
var len = segments.length;
69+
var i = -1;
70+
var curr = '';
71+
var segment, letter;
72+
for (var s = 0; s < len; s++) {
73+
segment = segments[s];
74+
i += segment.length + 1;
75+
letter = lineText[i];
76+
curr += segment;
77+
switch (letter) {
78+
case ' ':
79+
if (totalEnclosing == 0 && quotes == 0) {
80+
makeToken(curr);
81+
curr = '';
82+
} else {
83+
curr += ' ';
84+
}
85+
break;
86+
case '"':
87+
if (totalEnclosing == 0) {
88+
if (quotes == 0) {
89+
if (curr == '@' || curr == '%') {
90+
curr += '"';
9691
} else {
97-
makeToken(curr + '"');
98-
curr = '';
92+
makeToken(curr);
93+
curr = '"';
9994
}
10095
} else {
101-
curr += '"';
96+
makeToken(curr + '"');
97+
curr = '';
10298
}
103-
quotes = 1-quotes;
99+
} else {
100+
curr += '"';
101+
}
102+
quotes = 1-quotes;
103+
break;
104+
case ',':
105+
if (totalEnclosing == 0 && quotes == 0) {
106+
makeToken(curr);
107+
curr = '';
108+
tokens.push({ text: ',' });
109+
} else {
110+
curr += ',';
111+
}
112+
break;
113+
default:
114+
assert(letter in enclosers);
115+
if (quotes) {
116+
curr += letter;
104117
break;
105-
case ',':
106-
if (totalEnclosing == 0 && quotes == 0) {
118+
}
119+
if (letter in ENCLOSER_STARTERS) {
120+
if (totalEnclosing == 0) {
107121
makeToken(curr);
108122
curr = '';
109-
tokens.push({ text: ',' });
110-
} else {
111-
curr += ',';
112-
}
113-
break;
114-
default:
115-
assert(letter in enclosers);
116-
if (quotes) {
117-
curr += letter;
118-
break;
119123
}
120-
if (letter in ENCLOSER_STARTERS) {
121-
if (totalEnclosing == 0) {
122-
makeToken(curr);
123-
curr = '';
124-
}
125-
curr += letter;
126-
enclosers[letter]++;
127-
totalEnclosing++;
124+
curr += letter;
125+
enclosers[letter]++;
126+
totalEnclosing++;
127+
} else {
128+
enclosers[enclosers[letter]]--;
129+
totalEnclosing--;
130+
if (totalEnclosing == 0) {
131+
makeToken(curr + letter);
132+
curr = '';
128133
} else {
129-
enclosers[enclosers[letter]]--;
130-
totalEnclosing--;
131-
if (totalEnclosing == 0) {
132-
makeToken(curr + letter);
133-
curr = '';
134-
} else {
135-
curr += letter;
136-
}
134+
curr += letter;
137135
}
138-
}
136+
}
139137
}
140-
var newItem = {
141-
tokens: tokens,
142-
indent: lineText.search(/[^ ]/),
143-
lineNum: item.lineNum
144-
};
145-
return newItem;
146-
return null;
147138
}
148-
};
139+
var newItem = {
140+
tokens: tokens,
141+
indent: lineText.search(/[^ ]/),
142+
lineNum: item.lineNum
143+
};
144+
return newItem;
145+
}
149146

150147
function tokenize(text) {
151-
return tokenizer.processItem({ lineText: text }, true);
148+
return tokenizer({ lineText: text }, true);
152149
}
153150

154151
// Handy sets
@@ -252,7 +249,7 @@ function intertyper(lines, sidePass, baseLineNums) {
252249
if (mainPass && /^}.*/.test(line)) {
253250
inFunction = false;
254251
if (mainPass) {
255-
var func = funcHeaderHandler(tokenizer.processItem({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true));
252+
var func = funcHeaderHandler(tokenizer({ lineText: currFunctionLines[0], lineNum: currFunctionLineNum }, true));
256253

257254
if (SKIP_STACK_IN_SMALL && /emscripten_autodebug/.exec(func.ident)) {
258255
warnOnce('Disabling SKIP_STACK_IN_SMALL because we are apparently processing autodebugger data');
@@ -991,7 +988,7 @@ function intertyper(lines, sidePass, baseLineNums) {
991988

992989
// Input
993990

994-
return lineSplitter().map(tokenizer.processItem).filter(function(item) { return item }).map(triager).filter(function(result) {
991+
return lineSplitter().map(tokenizer).filter(function(item) { return item }).map(triager).filter(function(result) {
995992
if (!result) return false;
996993
if (result.tokens) result.tokens = null; // We do not need tokens, past the intertyper. Clean them up as soon as possible here.
997994
return true;

0 commit comments

Comments
 (0)