1 | "use strict";
|
2 | var __importDefault = (this && this.__importDefault) || function (mod) {
|
3 | return (mod && mod.__esModule) ? mod : { "default": mod };
|
4 | };
|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
6 | var decode_codepoint_1 = __importDefault(require("entities/lib/decode_codepoint"));
|
7 | var decode_1 = require("entities/lib/decode");
|
8 | function whitespace(c) {
|
9 | return (c === 32 ||
|
10 | c === 10 ||
|
11 | c === 9 ||
|
12 | c === 12 ||
|
13 | c === 13 );
|
14 | }
|
15 | function isASCIIAlpha(c) {
|
16 | return ((c >= 97 && c <= 122 ) ||
|
17 | (c >= 65 && c <= 90 ));
|
18 | }
|
19 | function ifElseState(upper, SUCCESS, FAILURE) {
|
20 | var upperCode = upper.charCodeAt(0);
|
21 | var lowerCode = upper.toLowerCase().charCodeAt(0);
|
22 | return function (t, c) {
|
23 | if (c === lowerCode || c === upperCode) {
|
24 | t._state = SUCCESS;
|
25 | }
|
26 | else {
|
27 | t._state = FAILURE;
|
28 | t._index--;
|
29 | }
|
30 | };
|
31 | }
|
32 | var stateBeforeCdata1 = ifElseState("C", 24 , 16 );
|
33 | var stateBeforeCdata2 = ifElseState("D", 25 , 16 );
|
34 | var stateBeforeCdata3 = ifElseState("A", 26 , 16 );
|
35 | var stateBeforeCdata4 = ifElseState("T", 27 , 16 );
|
36 | var stateBeforeCdata5 = ifElseState("A", 28 , 16 );
|
37 | var stateBeforeScript1 = ifElseState("R", 35 , 3 );
|
38 | var stateBeforeScript2 = ifElseState("I", 36 , 3 );
|
39 | var stateBeforeScript3 = ifElseState("P", 37 , 3 );
|
40 | var stateBeforeScript4 = ifElseState("T", 38 , 3 );
|
41 | var stateAfterScript1 = ifElseState("R", 40 , 1 );
|
42 | var stateAfterScript2 = ifElseState("I", 41 , 1 );
|
43 | var stateAfterScript3 = ifElseState("P", 42 , 1 );
|
44 | var stateAfterScript4 = ifElseState("T", 43 , 1 );
|
45 | var stateBeforeStyle1 = ifElseState("Y", 45 , 3 );
|
46 | var stateBeforeStyle2 = ifElseState("L", 46 , 3 );
|
47 | var stateBeforeStyle3 = ifElseState("E", 47 , 3 );
|
48 | var stateAfterStyle1 = ifElseState("Y", 49 , 1 );
|
49 | var stateAfterStyle2 = ifElseState("L", 50 , 1 );
|
50 | var stateAfterStyle3 = ifElseState("E", 51 , 1 );
|
51 | var stateBeforeSpecialT = ifElseState("I", 54 , 3 );
|
52 | var stateBeforeTitle1 = ifElseState("T", 55 , 3 );
|
53 | var stateBeforeTitle2 = ifElseState("L", 56 , 3 );
|
54 | var stateBeforeTitle3 = ifElseState("E", 57 , 3 );
|
55 | var stateBeforeSpecialTEnd = ifElseState("I", 58 , 1 );
|
56 | var stateAfterTitle1 = ifElseState("T", 59 , 1 );
|
57 | var stateAfterTitle2 = ifElseState("L", 60 , 1 );
|
58 | var stateAfterTitle3 = ifElseState("E", 61 , 1 );
|
59 | var stateBeforeNumericEntity = ifElseState("X", 66 , 65 );
|
60 | var Tokenizer = (function () {
|
61 | function Tokenizer(_a, cbs) {
|
62 | var _b = _a.xmlMode, xmlMode = _b === void 0 ? false : _b, _c = _a.decodeEntities, decodeEntities = _c === void 0 ? true : _c;
|
63 | this.cbs = cbs;
|
64 |
|
65 | this._state = 1 ;
|
66 |
|
67 | this.buffer = "";
|
68 |
|
69 | this.sectionStart = 0;
|
70 |
|
71 | this._index = 0;
|
72 | |
73 |
|
74 |
|
75 |
|
76 | this.bufferOffset = 0;
|
77 |
|
78 | this.baseState = 1 ;
|
79 |
|
80 | this.special = 1 ;
|
81 |
|
82 | this.running = true;
|
83 |
|
84 | this.ended = false;
|
85 | this.trieIndex = 0;
|
86 | this.trieCurrent = 0;
|
87 | this.trieResult = null;
|
88 | this.trieExcess = 0;
|
89 | this.xmlMode = xmlMode;
|
90 | this.decodeEntities = decodeEntities;
|
91 | this.entityTrie = xmlMode ? decode_1.xmlDecodeTree : decode_1.htmlDecodeTree;
|
92 | }
|
93 | Tokenizer.prototype.reset = function () {
|
94 | this._state = 1 ;
|
95 | this.buffer = "";
|
96 | this.sectionStart = 0;
|
97 | this._index = 0;
|
98 | this.bufferOffset = 0;
|
99 | this.baseState = 1 ;
|
100 | this.special = 1 ;
|
101 | this.running = true;
|
102 | this.ended = false;
|
103 | };
|
104 | Tokenizer.prototype.write = function (chunk) {
|
105 | if (this.ended)
|
106 | return this.cbs.onerror(Error(".write() after done!"));
|
107 | if (this.buffer.length)
|
108 | this.buffer += chunk;
|
109 | else
|
110 | this.buffer = chunk;
|
111 | this.parse();
|
112 | };
|
113 | Tokenizer.prototype.end = function (chunk) {
|
114 | if (this.ended)
|
115 | return this.cbs.onerror(Error(".end() after done!"));
|
116 | if (chunk)
|
117 | this.write(chunk);
|
118 | this.ended = true;
|
119 | if (this.running)
|
120 | this.finish();
|
121 | };
|
122 | Tokenizer.prototype.pause = function () {
|
123 | this.running = false;
|
124 | };
|
125 | Tokenizer.prototype.resume = function () {
|
126 | this.running = true;
|
127 | if (this._index < this.buffer.length) {
|
128 | this.parse();
|
129 | }
|
130 | if (this.ended) {
|
131 | this.finish();
|
132 | }
|
133 | };
|
134 | |
135 |
|
136 |
|
137 | Tokenizer.prototype.getAbsoluteSectionStart = function () {
|
138 | return this.sectionStart + this.bufferOffset;
|
139 | };
|
140 | |
141 |
|
142 |
|
143 | Tokenizer.prototype.getAbsoluteIndex = function () {
|
144 | return this.bufferOffset + this._index;
|
145 | };
|
146 | Tokenizer.prototype.stateText = function (c) {
|
147 | if (c === 60 ) {
|
148 | if (this._index > this.sectionStart) {
|
149 | this.cbs.ontext(this.getSection());
|
150 | }
|
151 | this._state = 2 ;
|
152 | this.sectionStart = this._index;
|
153 | }
|
154 | else if (this.decodeEntities &&
|
155 | c === 38 &&
|
156 | (this.special === 1 || this.special === 4 )) {
|
157 | if (this._index > this.sectionStart) {
|
158 | this.cbs.ontext(this.getSection());
|
159 | }
|
160 | this.baseState = 1 ;
|
161 | this._state = 62 ;
|
162 | this.sectionStart = this._index;
|
163 | }
|
164 | };
|
165 | |
166 |
|
167 |
|
168 |
|
169 |
|
170 |
|
171 | Tokenizer.prototype.isTagStartChar = function (c) {
|
172 | return (isASCIIAlpha(c) ||
|
173 | (this.xmlMode &&
|
174 | !whitespace(c) &&
|
175 | c !== 47 &&
|
176 | c !== 62 ));
|
177 | };
|
178 | Tokenizer.prototype.stateBeforeTagName = function (c) {
|
179 | if (c === 47 ) {
|
180 | this._state = 5 ;
|
181 | }
|
182 | else if (c === 60 ) {
|
183 | this.cbs.ontext(this.getSection());
|
184 | this.sectionStart = this._index;
|
185 | }
|
186 | else if (c === 62 ||
|
187 | this.special !== 1 ||
|
188 | whitespace(c)) {
|
189 | this._state = 1 ;
|
190 | }
|
191 | else if (c === 33 ) {
|
192 | this._state = 15 ;
|
193 | this.sectionStart = this._index + 1;
|
194 | }
|
195 | else if (c === 63 ) {
|
196 | this._state = 17 ;
|
197 | this.sectionStart = this._index + 1;
|
198 | }
|
199 | else if (!this.isTagStartChar(c)) {
|
200 | this._state = 1 ;
|
201 | }
|
202 | else {
|
203 | this._state =
|
204 | !this.xmlMode &&
|
205 | (c === 115 || c === 83 )
|
206 | ? 32
|
207 | : !this.xmlMode &&
|
208 | (c === 116 || c === 84 )
|
209 | ? 52
|
210 | : 3 ;
|
211 | this.sectionStart = this._index;
|
212 | }
|
213 | };
|
214 | Tokenizer.prototype.stateInTagName = function (c) {
|
215 | if (c === 47 || c === 62 || whitespace(c)) {
|
216 | this.cbs.onopentagname(this.getSection());
|
217 | this.sectionStart = -1;
|
218 | this._state = 8 ;
|
219 | this.stateBeforeAttributeName(c);
|
220 | }
|
221 | };
|
222 | Tokenizer.prototype.stateBeforeClosingTagName = function (c) {
|
223 | if (whitespace(c)) {
|
224 |
|
225 | }
|
226 | else if (c === 62 ) {
|
227 | this._state = 1 ;
|
228 | }
|
229 | else if (this.special !== 1 ) {
|
230 | if (this.special !== 4 &&
|
231 | (c === 115 || c === 83 )) {
|
232 | this._state = 33 ;
|
233 | }
|
234 | else if (this.special === 4 &&
|
235 | (c === 116 || c === 84 )) {
|
236 | this._state = 53 ;
|
237 | }
|
238 | else {
|
239 | this._state = 1 ;
|
240 | this.stateText(c);
|
241 | }
|
242 | }
|
243 | else if (!this.isTagStartChar(c)) {
|
244 | this._state = 20 ;
|
245 | this.sectionStart = this._index;
|
246 | }
|
247 | else {
|
248 | this._state = 6 ;
|
249 | this.sectionStart = this._index;
|
250 | }
|
251 | };
|
252 | Tokenizer.prototype.stateInClosingTagName = function (c) {
|
253 | if (c === 62 || whitespace(c)) {
|
254 | this.cbs.onclosetag(this.getSection());
|
255 | this.sectionStart = -1;
|
256 | this._state = 7 ;
|
257 | this.stateAfterClosingTagName(c);
|
258 | }
|
259 | };
|
260 | Tokenizer.prototype.stateAfterClosingTagName = function (c) {
|
261 |
|
262 | if (c === 62 ) {
|
263 | this._state = 1 ;
|
264 | this.sectionStart = this._index + 1;
|
265 | }
|
266 | };
|
267 | Tokenizer.prototype.stateBeforeAttributeName = function (c) {
|
268 | if (c === 62 ) {
|
269 | this.cbs.onopentagend();
|
270 | this._state = 1 ;
|
271 | this.sectionStart = this._index + 1;
|
272 | }
|
273 | else if (c === 47 ) {
|
274 | this._state = 4 ;
|
275 | }
|
276 | else if (!whitespace(c)) {
|
277 | this._state = 9 ;
|
278 | this.sectionStart = this._index;
|
279 | }
|
280 | };
|
281 | Tokenizer.prototype.stateInSelfClosingTag = function (c) {
|
282 | if (c === 62 ) {
|
283 | this.cbs.onselfclosingtag();
|
284 | this._state = 1 ;
|
285 | this.sectionStart = this._index + 1;
|
286 | this.special = 1 ;
|
287 | }
|
288 | else if (!whitespace(c)) {
|
289 | this._state = 8 ;
|
290 | this.stateBeforeAttributeName(c);
|
291 | }
|
292 | };
|
293 | Tokenizer.prototype.stateInAttributeName = function (c) {
|
294 | if (c === 61 ||
|
295 | c === 47 ||
|
296 | c === 62 ||
|
297 | whitespace(c)) {
|
298 | this.cbs.onattribname(this.getSection());
|
299 | this.sectionStart = -1;
|
300 | this._state = 10 ;
|
301 | this.stateAfterAttributeName(c);
|
302 | }
|
303 | };
|
304 | Tokenizer.prototype.stateAfterAttributeName = function (c) {
|
305 | if (c === 61 ) {
|
306 | this._state = 11 ;
|
307 | }
|
308 | else if (c === 47 || c === 62 ) {
|
309 | this.cbs.onattribend(undefined);
|
310 | this._state = 8 ;
|
311 | this.stateBeforeAttributeName(c);
|
312 | }
|
313 | else if (!whitespace(c)) {
|
314 | this.cbs.onattribend(undefined);
|
315 | this._state = 9 ;
|
316 | this.sectionStart = this._index;
|
317 | }
|
318 | };
|
319 | Tokenizer.prototype.stateBeforeAttributeValue = function (c) {
|
320 | if (c === 34 ) {
|
321 | this._state = 12 ;
|
322 | this.sectionStart = this._index + 1;
|
323 | }
|
324 | else if (c === 39 ) {
|
325 | this._state = 13 ;
|
326 | this.sectionStart = this._index + 1;
|
327 | }
|
328 | else if (!whitespace(c)) {
|
329 | this.sectionStart = this._index;
|
330 | this._state = 14 ;
|
331 | this.stateInAttributeValueNoQuotes(c);
|
332 | }
|
333 | };
|
334 | Tokenizer.prototype.handleInAttributeValue = function (c, quote) {
|
335 | if (c === quote) {
|
336 | this.cbs.onattribdata(this.getSection());
|
337 | this.sectionStart = -1;
|
338 | this.cbs.onattribend(String.fromCharCode(quote));
|
339 | this._state = 8 ;
|
340 | }
|
341 | else if (this.decodeEntities && c === 38 ) {
|
342 | this.cbs.onattribdata(this.getSection());
|
343 | this.baseState = this._state;
|
344 | this._state = 62 ;
|
345 | this.sectionStart = this._index;
|
346 | }
|
347 | };
|
348 | Tokenizer.prototype.stateInAttributeValueDoubleQuotes = function (c) {
|
349 | this.handleInAttributeValue(c, 34 );
|
350 | };
|
351 | Tokenizer.prototype.stateInAttributeValueSingleQuotes = function (c) {
|
352 | this.handleInAttributeValue(c, 39 );
|
353 | };
|
354 | Tokenizer.prototype.stateInAttributeValueNoQuotes = function (c) {
|
355 | if (whitespace(c) || c === 62 ) {
|
356 | this.cbs.onattribdata(this.getSection());
|
357 | this.sectionStart = -1;
|
358 | this.cbs.onattribend(null);
|
359 | this._state = 8 ;
|
360 | this.stateBeforeAttributeName(c);
|
361 | }
|
362 | else if (this.decodeEntities && c === 38 ) {
|
363 | this.cbs.onattribdata(this.getSection());
|
364 | this.baseState = this._state;
|
365 | this._state = 62 ;
|
366 | this.sectionStart = this._index;
|
367 | }
|
368 | };
|
369 | Tokenizer.prototype.stateBeforeDeclaration = function (c) {
|
370 | this._state =
|
371 | c === 91
|
372 | ? 23
|
373 | : c === 45
|
374 | ? 18
|
375 | : 16 ;
|
376 | };
|
377 | Tokenizer.prototype.stateInDeclaration = function (c) {
|
378 | if (c === 62 ) {
|
379 | this.cbs.ondeclaration(this.getSection());
|
380 | this._state = 1 ;
|
381 | this.sectionStart = this._index + 1;
|
382 | }
|
383 | };
|
384 | Tokenizer.prototype.stateInProcessingInstruction = function (c) {
|
385 | if (c === 62 ) {
|
386 | this.cbs.onprocessinginstruction(this.getSection());
|
387 | this._state = 1 ;
|
388 | this.sectionStart = this._index + 1;
|
389 | }
|
390 | };
|
391 | Tokenizer.prototype.stateBeforeComment = function (c) {
|
392 | if (c === 45 ) {
|
393 | this._state = 19 ;
|
394 | this.sectionStart = this._index + 1;
|
395 | }
|
396 | else {
|
397 | this._state = 16 ;
|
398 | }
|
399 | };
|
400 | Tokenizer.prototype.stateInComment = function (c) {
|
401 | if (c === 45 )
|
402 | this._state = 21 ;
|
403 | };
|
404 | Tokenizer.prototype.stateInSpecialComment = function (c) {
|
405 | if (c === 62 ) {
|
406 | this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index));
|
407 | this._state = 1 ;
|
408 | this.sectionStart = this._index + 1;
|
409 | }
|
410 | };
|
411 | Tokenizer.prototype.stateAfterComment1 = function (c) {
|
412 | if (c === 45 ) {
|
413 | this._state = 22 ;
|
414 | }
|
415 | else {
|
416 | this._state = 19 ;
|
417 | }
|
418 | };
|
419 | Tokenizer.prototype.stateAfterComment2 = function (c) {
|
420 | if (c === 62 ) {
|
421 |
|
422 | this.cbs.oncomment(this.buffer.substring(this.sectionStart, this._index - 2));
|
423 | this._state = 1 ;
|
424 | this.sectionStart = this._index + 1;
|
425 | }
|
426 | else if (c !== 45 ) {
|
427 | this._state = 19 ;
|
428 | }
|
429 |
|
430 | };
|
431 | Tokenizer.prototype.stateBeforeCdata6 = function (c) {
|
432 | if (c === 91 ) {
|
433 | this._state = 29 ;
|
434 | this.sectionStart = this._index + 1;
|
435 | }
|
436 | else {
|
437 | this._state = 16 ;
|
438 | this.stateInDeclaration(c);
|
439 | }
|
440 | };
|
441 | Tokenizer.prototype.stateInCdata = function (c) {
|
442 | if (c === 93 )
|
443 | this._state = 30 ;
|
444 | };
|
445 | Tokenizer.prototype.stateAfterCdata1 = function (c) {
|
446 | if (c === 93 )
|
447 | this._state = 31 ;
|
448 | else
|
449 | this._state = 29 ;
|
450 | };
|
451 | Tokenizer.prototype.stateAfterCdata2 = function (c) {
|
452 | if (c === 62 ) {
|
453 |
|
454 | this.cbs.oncdata(this.buffer.substring(this.sectionStart, this._index - 2));
|
455 | this._state = 1 ;
|
456 | this.sectionStart = this._index + 1;
|
457 | }
|
458 | else if (c !== 93 ) {
|
459 | this._state = 29 ;
|
460 | }
|
461 |
|
462 | };
|
463 | Tokenizer.prototype.stateBeforeSpecialS = function (c) {
|
464 | if (c === 99 || c === 67 ) {
|
465 | this._state = 34 ;
|
466 | }
|
467 | else if (c === 116 || c === 84 ) {
|
468 | this._state = 44 ;
|
469 | }
|
470 | else {
|
471 | this._state = 3 ;
|
472 | this.stateInTagName(c);
|
473 | }
|
474 | };
|
475 | Tokenizer.prototype.stateBeforeSpecialSEnd = function (c) {
|
476 | if (this.special === 2 &&
|
477 | (c === 99 || c === 67 )) {
|
478 | this._state = 39 ;
|
479 | }
|
480 | else if (this.special === 3 &&
|
481 | (c === 116 || c === 84 )) {
|
482 | this._state = 48 ;
|
483 | }
|
484 | else
|
485 | this._state = 1 ;
|
486 | };
|
487 | Tokenizer.prototype.stateBeforeSpecialLast = function (c, special) {
|
488 | if (c === 47 || c === 62 || whitespace(c)) {
|
489 | this.special = special;
|
490 | }
|
491 | this._state = 3 ;
|
492 | this.stateInTagName(c);
|
493 | };
|
494 | Tokenizer.prototype.stateAfterSpecialLast = function (c, sectionStartOffset) {
|
495 | if (c === 62 || whitespace(c)) {
|
496 | this.sectionStart = this._index - sectionStartOffset;
|
497 | this.special = 1 ;
|
498 | this._state = 6 ;
|
499 | this.stateInClosingTagName(c);
|
500 | }
|
501 | else
|
502 | this._state = 1 ;
|
503 | };
|
504 | Tokenizer.prototype.stateBeforeEntity = function (c) {
|
505 | if (c === 35 ) {
|
506 | this._state = 63 ;
|
507 | }
|
508 | else if (c === 38 ) {
|
509 |
|
510 | this.emitPartial(this.getSection());
|
511 | this.sectionStart = this._index;
|
512 | }
|
513 | else {
|
514 | this._state = 64 ;
|
515 | this.trieIndex = 0;
|
516 | this.trieCurrent = this.entityTrie[0];
|
517 | this.trieResult = null;
|
518 |
|
519 | this.trieExcess = 1;
|
520 | this._index--;
|
521 | }
|
522 | };
|
523 | Tokenizer.prototype.stateInNamedEntity = function (c) {
|
524 | this.trieExcess += 1;
|
525 | this.trieIndex = (0, decode_1.determineBranch)(this.entityTrie, this.trieCurrent, this.trieIndex + 1, c);
|
526 | if (this.trieIndex < 0) {
|
527 | this.emitNamedEntity();
|
528 | this._index--;
|
529 | return;
|
530 | }
|
531 | this.trieCurrent = this.entityTrie[this.trieIndex];
|
532 |
|
533 | if (this.trieCurrent & decode_1.BinTrieFlags.HAS_VALUE) {
|
534 |
|
535 | if (!this.allowLegacyEntity() && c !== 59 ) {
|
536 |
|
537 | this.trieIndex += 1;
|
538 | }
|
539 | else {
|
540 |
|
541 | this.trieResult =
|
542 | this.trieCurrent & decode_1.BinTrieFlags.MULTI_BYTE
|
543 | ? String.fromCharCode(this.entityTrie[++this.trieIndex], this.entityTrie[++this.trieIndex])
|
544 | : String.fromCharCode(this.entityTrie[++this.trieIndex]);
|
545 | this.trieExcess = 0;
|
546 | }
|
547 | }
|
548 | };
|
549 | Tokenizer.prototype.emitNamedEntity = function () {
|
550 | if (this.trieResult) {
|
551 | this.emitPartial(this.trieResult);
|
552 | }
|
553 | this.sectionStart = this._index - this.trieExcess + 1;
|
554 | this._state = this.baseState;
|
555 | };
|
556 | Tokenizer.prototype.decodeNumericEntity = function (base, strict) {
|
557 | var sectionStart = this.sectionStart + 2 + (base >> 4);
|
558 | if (sectionStart !== this._index) {
|
559 |
|
560 | var entity = this.buffer.substring(sectionStart, this._index);
|
561 | var parsed = parseInt(entity, base);
|
562 | this.emitPartial((0, decode_codepoint_1.default)(parsed));
|
563 | this.sectionStart = this._index + Number(strict);
|
564 | }
|
565 | this._state = this.baseState;
|
566 | };
|
567 | Tokenizer.prototype.stateInNumericEntity = function (c) {
|
568 | if (c === 59 ) {
|
569 | this.decodeNumericEntity(10, true);
|
570 | }
|
571 | else if (c < 48 || c > 57 ) {
|
572 | if (this.allowLegacyEntity()) {
|
573 | this.decodeNumericEntity(10, false);
|
574 | }
|
575 | else {
|
576 | this._state = this.baseState;
|
577 | }
|
578 | this._index--;
|
579 | }
|
580 | };
|
581 | Tokenizer.prototype.stateInHexEntity = function (c) {
|
582 | if (c === 59 ) {
|
583 | this.decodeNumericEntity(16, true);
|
584 | }
|
585 | else if ((c < 97 || c > 102 ) &&
|
586 | (c < 65 || c > 70 ) &&
|
587 | (c < 48 || c > 57 )) {
|
588 | if (this.allowLegacyEntity()) {
|
589 | this.decodeNumericEntity(16, false);
|
590 | }
|
591 | else {
|
592 | this._state = this.baseState;
|
593 | }
|
594 | this._index--;
|
595 | }
|
596 | };
|
597 | Tokenizer.prototype.allowLegacyEntity = function () {
|
598 | return !this.xmlMode && this.baseState === 1 ;
|
599 | };
|
600 | |
601 |
|
602 |
|
603 | Tokenizer.prototype.cleanup = function () {
|
604 |
|
605 | if (this.running &&
|
606 | this._state === 1 &&
|
607 | this.sectionStart !== this._index) {
|
608 |
|
609 | this.cbs.ontext(this.buffer.substr(this.sectionStart));
|
610 | this.sectionStart = this._index;
|
611 | }
|
612 | var start = this.sectionStart < 0 ? this._index : this.sectionStart;
|
613 | this.buffer =
|
614 | start === this.buffer.length ? "" : this.buffer.substr(start);
|
615 | this._index -= start;
|
616 | this.bufferOffset += start;
|
617 | if (this.sectionStart > 0) {
|
618 | this.sectionStart = 0;
|
619 | }
|
620 | };
|
621 | |
622 |
|
623 |
|
624 |
|
625 |
|
626 | Tokenizer.prototype.parse = function () {
|
627 | while (this._index < this.buffer.length && this.running) {
|
628 | var c = this.buffer.charCodeAt(this._index);
|
629 | if (this._state === 1 ) {
|
630 | this.stateText(c);
|
631 | }
|
632 | else if (this._state === 12 ) {
|
633 | this.stateInAttributeValueDoubleQuotes(c);
|
634 | }
|
635 | else if (this._state === 9 ) {
|
636 | this.stateInAttributeName(c);
|
637 | }
|
638 | else if (this._state === 19 ) {
|
639 | this.stateInComment(c);
|
640 | }
|
641 | else if (this._state === 20 ) {
|
642 | this.stateInSpecialComment(c);
|
643 | }
|
644 | else if (this._state === 8 ) {
|
645 | this.stateBeforeAttributeName(c);
|
646 | }
|
647 | else if (this._state === 3 ) {
|
648 | this.stateInTagName(c);
|
649 | }
|
650 | else if (this._state === 6 ) {
|
651 | this.stateInClosingTagName(c);
|
652 | }
|
653 | else if (this._state === 2 ) {
|
654 | this.stateBeforeTagName(c);
|
655 | }
|
656 | else if (this._state === 10 ) {
|
657 | this.stateAfterAttributeName(c);
|
658 | }
|
659 | else if (this._state === 13 ) {
|
660 | this.stateInAttributeValueSingleQuotes(c);
|
661 | }
|
662 | else if (this._state === 11 ) {
|
663 | this.stateBeforeAttributeValue(c);
|
664 | }
|
665 | else if (this._state === 5 ) {
|
666 | this.stateBeforeClosingTagName(c);
|
667 | }
|
668 | else if (this._state === 7 ) {
|
669 | this.stateAfterClosingTagName(c);
|
670 | }
|
671 | else if (this._state === 32 ) {
|
672 | this.stateBeforeSpecialS(c);
|
673 | }
|
674 | else if (this._state === 21 ) {
|
675 | this.stateAfterComment1(c);
|
676 | }
|
677 | else if (this._state === 14 ) {
|
678 | this.stateInAttributeValueNoQuotes(c);
|
679 | }
|
680 | else if (this._state === 4 ) {
|
681 | this.stateInSelfClosingTag(c);
|
682 | }
|
683 | else if (this._state === 16 ) {
|
684 | this.stateInDeclaration(c);
|
685 | }
|
686 | else if (this._state === 15 ) {
|
687 | this.stateBeforeDeclaration(c);
|
688 | }
|
689 | else if (this._state === 22 ) {
|
690 | this.stateAfterComment2(c);
|
691 | }
|
692 | else if (this._state === 18 ) {
|
693 | this.stateBeforeComment(c);
|
694 | }
|
695 | else if (this._state === 33 ) {
|
696 | this.stateBeforeSpecialSEnd(c);
|
697 | }
|
698 | else if (this._state === 53 ) {
|
699 | stateBeforeSpecialTEnd(this, c);
|
700 | }
|
701 | else if (this._state === 39 ) {
|
702 | stateAfterScript1(this, c);
|
703 | }
|
704 | else if (this._state === 40 ) {
|
705 | stateAfterScript2(this, c);
|
706 | }
|
707 | else if (this._state === 41 ) {
|
708 | stateAfterScript3(this, c);
|
709 | }
|
710 | else if (this._state === 34 ) {
|
711 | stateBeforeScript1(this, c);
|
712 | }
|
713 | else if (this._state === 35 ) {
|
714 | stateBeforeScript2(this, c);
|
715 | }
|
716 | else if (this._state === 36 ) {
|
717 | stateBeforeScript3(this, c);
|
718 | }
|
719 | else if (this._state === 37 ) {
|
720 | stateBeforeScript4(this, c);
|
721 | }
|
722 | else if (this._state === 38 ) {
|
723 | this.stateBeforeSpecialLast(c, 2 );
|
724 | }
|
725 | else if (this._state === 42 ) {
|
726 | stateAfterScript4(this, c);
|
727 | }
|
728 | else if (this._state === 43 ) {
|
729 | this.stateAfterSpecialLast(c, 6);
|
730 | }
|
731 | else if (this._state === 44 ) {
|
732 | stateBeforeStyle1(this, c);
|
733 | }
|
734 | else if (this._state === 29 ) {
|
735 | this.stateInCdata(c);
|
736 | }
|
737 | else if (this._state === 45 ) {
|
738 | stateBeforeStyle2(this, c);
|
739 | }
|
740 | else if (this._state === 46 ) {
|
741 | stateBeforeStyle3(this, c);
|
742 | }
|
743 | else if (this._state === 47 ) {
|
744 | this.stateBeforeSpecialLast(c, 3 );
|
745 | }
|
746 | else if (this._state === 48 ) {
|
747 | stateAfterStyle1(this, c);
|
748 | }
|
749 | else if (this._state === 49 ) {
|
750 | stateAfterStyle2(this, c);
|
751 | }
|
752 | else if (this._state === 50 ) {
|
753 | stateAfterStyle3(this, c);
|
754 | }
|
755 | else if (this._state === 51 ) {
|
756 | this.stateAfterSpecialLast(c, 5);
|
757 | }
|
758 | else if (this._state === 52 ) {
|
759 | stateBeforeSpecialT(this, c);
|
760 | }
|
761 | else if (this._state === 54 ) {
|
762 | stateBeforeTitle1(this, c);
|
763 | }
|
764 | else if (this._state === 55 ) {
|
765 | stateBeforeTitle2(this, c);
|
766 | }
|
767 | else if (this._state === 56 ) {
|
768 | stateBeforeTitle3(this, c);
|
769 | }
|
770 | else if (this._state === 57 ) {
|
771 | this.stateBeforeSpecialLast(c, 4 );
|
772 | }
|
773 | else if (this._state === 58 ) {
|
774 | stateAfterTitle1(this, c);
|
775 | }
|
776 | else if (this._state === 59 ) {
|
777 | stateAfterTitle2(this, c);
|
778 | }
|
779 | else if (this._state === 60 ) {
|
780 | stateAfterTitle3(this, c);
|
781 | }
|
782 | else if (this._state === 61 ) {
|
783 | this.stateAfterSpecialLast(c, 5);
|
784 | }
|
785 | else if (this._state === 17 ) {
|
786 | this.stateInProcessingInstruction(c);
|
787 | }
|
788 | else if (this._state === 64 ) {
|
789 | this.stateInNamedEntity(c);
|
790 | }
|
791 | else if (this._state === 23 ) {
|
792 | stateBeforeCdata1(this, c);
|
793 | }
|
794 | else if (this._state === 62 ) {
|
795 | this.stateBeforeEntity(c);
|
796 | }
|
797 | else if (this._state === 24 ) {
|
798 | stateBeforeCdata2(this, c);
|
799 | }
|
800 | else if (this._state === 25 ) {
|
801 | stateBeforeCdata3(this, c);
|
802 | }
|
803 | else if (this._state === 30 ) {
|
804 | this.stateAfterCdata1(c);
|
805 | }
|
806 | else if (this._state === 31 ) {
|
807 | this.stateAfterCdata2(c);
|
808 | }
|
809 | else if (this._state === 26 ) {
|
810 | stateBeforeCdata4(this, c);
|
811 | }
|
812 | else if (this._state === 27 ) {
|
813 | stateBeforeCdata5(this, c);
|
814 | }
|
815 | else if (this._state === 28 ) {
|
816 | this.stateBeforeCdata6(c);
|
817 | }
|
818 | else if (this._state === 66 ) {
|
819 | this.stateInHexEntity(c);
|
820 | }
|
821 | else if (this._state === 65 ) {
|
822 | this.stateInNumericEntity(c);
|
823 | }
|
824 | else {
|
825 |
|
826 | stateBeforeNumericEntity(this, c);
|
827 | }
|
828 | this._index++;
|
829 | }
|
830 | this.cleanup();
|
831 | };
|
832 | Tokenizer.prototype.finish = function () {
|
833 |
|
834 | if (this.sectionStart < this._index) {
|
835 | this.handleTrailingData();
|
836 | }
|
837 | this.cbs.onend();
|
838 | };
|
839 |
|
840 | Tokenizer.prototype.handleTrailingData = function () {
|
841 | var data = this.buffer.substr(this.sectionStart);
|
842 | if (this._state === 29 ||
|
843 | this._state === 30 ||
|
844 | this._state === 31 ) {
|
845 | this.cbs.oncdata(data);
|
846 | }
|
847 | else if (this._state === 19 ||
|
848 | this._state === 21 ||
|
849 | this._state === 22 ) {
|
850 | this.cbs.oncomment(data);
|
851 | }
|
852 | else if (this._state === 64 && !this.xmlMode) {
|
853 |
|
854 | this.trieExcess++;
|
855 | this.emitNamedEntity();
|
856 | if (this.sectionStart < this._index) {
|
857 | this._state = this.baseState;
|
858 | this.handleTrailingData();
|
859 | }
|
860 | }
|
861 | else if (this._state === 65 && !this.xmlMode) {
|
862 | this.decodeNumericEntity(10, false);
|
863 |
|
864 | }
|
865 | else if (this._state === 66 && !this.xmlMode) {
|
866 | this.decodeNumericEntity(16, false);
|
867 |
|
868 | }
|
869 | else if (this._state === 3 ||
|
870 | this._state === 8 ||
|
871 | this._state === 11 ||
|
872 | this._state === 10 ||
|
873 | this._state === 9 ||
|
874 | this._state === 13 ||
|
875 | this._state === 12 ||
|
876 | this._state === 14 ||
|
877 | this._state === 6 ) {
|
878 | |
879 |
|
880 |
|
881 |
|
882 | }
|
883 | else {
|
884 | this.cbs.ontext(data);
|
885 | }
|
886 | };
|
887 | Tokenizer.prototype.getSection = function () {
|
888 | return this.buffer.substring(this.sectionStart, this._index);
|
889 | };
|
890 | Tokenizer.prototype.emitPartial = function (value) {
|
891 | if (this.baseState !== 1 ) {
|
892 | this.cbs.onattribdata(value);
|
893 | }
|
894 | else {
|
895 | this.cbs.ontext(value);
|
896 | }
|
897 | };
|
898 | return Tokenizer;
|
899 | }());
|
900 | exports.default = Tokenizer;
|