blob: 49c246e82b13d21f834dee55eb14292d008075bf [file] [log] [blame]
Scott Baker999edc62014-06-18 15:40:56 -07001// tokens.js
2// 2009-05-17
3
4// (c) 2006 Douglas Crockford
5
6// Produce an array of simple token objects from a string.
7// A simple token object contains these members:
8// type: 'name', 'string', 'number', 'operator'
9// value: string or number value of the token
10// from: index of first character of the token
11// to: index of the last character + 1
12
13// Comments of the // type are ignored.
14
15// Operators are by default single characters. Multicharacter
16// operators can be made by supplying a string of prefix and
17// suffix characters.
18// characters. For example,
19// '<>+-&', '=>&:'
20// will match any of these:
21// <= >> >>> <> >= +: -: &: &&: &&
22
23
24
25String.prototype.tokens = function (prefix, suffix) {
26 var c; // The current character.
27 var from; // The index of the start of the token.
28 var i = 0; // The index of the current character.
29 var length = this.length;
30 var n; // The number value.
31 var q; // The quote character.
32 var str; // The string value.
33
34 var result = []; // An array to hold the results.
35
36 var make = function (type, value) {
37
38// Make a token object.
39
40 return {
41 type: type,
42 value: value,
43 from: from,
44 to: i
45 };
46 };
47
48// Begin tokenization. If the source string is empty, return nothing.
49
50 if (!this) {
51 return;
52 }
53
54// If prefix and suffix strings are not provided, supply defaults.
55
56 if (typeof prefix !== 'string') {
57 prefix = '<>+-&';
58 }
59 if (typeof suffix !== 'string') {
60 suffix = '=>&:';
61 }
62
63
64// Loop through this text, one character at a time.
65
66 c = this.charAt(i);
67 while (c) {
68 from = i;
69
70// Ignore whitespace.
71
72 if (c <= ' ') {
73 i += 1;
74 c = this.charAt(i);
75
76// name.
77
78 } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
79 str = c;
80 i += 1;
81 for (;;) {
82 c = this.charAt(i);
83 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
84 (c >= '0' && c <= '9') || c === '_') {
85 str += c;
86 i += 1;
87 } else {
88 break;
89 }
90 }
91 result.push(make('name', str));
92
93// number.
94
95// A number cannot start with a decimal point. It must start with a digit,
96// possibly '0'.
97
98 } else if (c >= '0' && c <= '9') {
99 str = c;
100 i += 1;
101
102// Look for more digits.
103
104 for (;;) {
105 c = this.charAt(i);
106 if (c < '0' || c > '9') {
107 break;
108 }
109 i += 1;
110 str += c;
111 }
112
113// Look for a decimal fraction part.
114
115 if (c === '.') {
116 i += 1;
117 str += c;
118 for (;;) {
119 c = this.charAt(i);
120 if (c < '0' || c > '9') {
121 break;
122 }
123 i += 1;
124 str += c;
125 }
126 }
127
128// Look for an exponent part.
129
130 if (c === 'e' || c === 'E') {
131 i += 1;
132 str += c;
133 c = this.charAt(i);
134 if (c === '-' || c === '+') {
135 i += 1;
136 str += c;
137 c = this.charAt(i);
138 }
139 if (c < '0' || c > '9') {
140 make('number', str).error("Bad exponent");
141 }
142 do {
143 i += 1;
144 str += c;
145 c = this.charAt(i);
146 } while (c >= '0' && c <= '9');
147 }
148
149// Make sure the next character is not a letter.
150
151 if (c >= 'a' && c <= 'z') {
152 str += c;
153 i += 1;
154 make('number', str).error("Bad number");
155 }
156
157// Convert the string value to a number. If it is finite, then it is a good
158// token.
159
160 n = +str;
161 if (isFinite(n)) {
162 result.push(make('number', n));
163 } else {
164 make('number', str).error("Bad number");
165 }
166
167// string
168
169 } else if (c === '\'' || c === '"') {
170 str = '';
171 q = c;
172 i += 1;
173 for (;;) {
174 c = this.charAt(i);
175 if (c < ' ') {
176 make('string', str).error(c === '\n' || c === '\r' || c === '' ?
177 "Unterminated string." :
178 "Control character in string.", make('', str));
179 }
180
181// Look for the closing quote.
182
183 if (c === q) {
184 break;
185 }
186
187// Look for escapement.
188
189 if (c === '\\') {
190 i += 1;
191 if (i >= length) {
192 make('string', str).error("Unterminated string");
193 }
194 c = this.charAt(i);
195 switch (c) {
196 case 'b':
197 c = '\b';
198 break;
199 case 'f':
200 c = '\f';
201 break;
202 case 'n':
203 c = '\n';
204 break;
205 case 'r':
206 c = '\r';
207 break;
208 case 't':
209 c = '\t';
210 break;
211 case 'u':
212 if (i >= length) {
213 make('string', str).error("Unterminated string");
214 }
215 c = parseInt(this.substr(i + 1, 4), 16);
216 if (!isFinite(c) || c < 0) {
217 make('string', str).error("Unterminated string");
218 }
219 c = String.fromCharCode(c);
220 i += 4;
221 break;
222 }
223 }
224 str += c;
225 i += 1;
226 }
227 i += 1;
228 result.push(make('string', str));
229 c = this.charAt(i);
230
231// comment.
232
233 } else if (c === '/' && this.charAt(i + 1) === '/') {
234 i += 1;
235 for (;;) {
236 c = this.charAt(i);
237 if (c === '\n' || c === '\r' || c === '') {
238 break;
239 }
240 i += 1;
241 }
242
243// combining
244
245 } else if (prefix.indexOf(c) >= 0) {
246 str = c;
247 i += 1;
248 while (i < length) {
249 c = this.charAt(i);
250 if (suffix.indexOf(c) < 0) {
251 break;
252 }
253 str += c;
254 i += 1;
255 }
256 result.push(make('operator', str));
257
258// single-character operator
259
260 } else {
261 i += 1;
262 result.push(make('operator', c));
263 c = this.charAt(i);
264 }
265 }
266 return result;
267};
268