UNPKG

145 kBJavaScriptView Raw
1/**
2 * @license
3 * Copyright Google LLC All Rights Reserved.
4 *
5 * Use of this source code is governed by an MIT-style license that can be
6 * found in the LICENSE file at https://angular.io/license
7 */
8import * as chars from '../chars';
9import { ParseError, ParseLocation, ParseSourceFile, ParseSourceSpan } from '../parse_util';
10import { NAMED_ENTITIES } from './entities';
11import { DEFAULT_INTERPOLATION_CONFIG } from './interpolation_config';
12import { TagContentType } from './tags';
13export class TokenError extends ParseError {
14 constructor(errorMsg, tokenType, span) {
15 super(span, errorMsg);
16 this.tokenType = tokenType;
17 }
18}
19export class TokenizeResult {
20 constructor(tokens, errors, nonNormalizedIcuExpressions) {
21 this.tokens = tokens;
22 this.errors = errors;
23 this.nonNormalizedIcuExpressions = nonNormalizedIcuExpressions;
24 }
25}
26export function tokenize(source, url, getTagDefinition, options = {}) {
27 const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagDefinition, options);
28 tokenizer.tokenize();
29 return new TokenizeResult(mergeTextTokens(tokenizer.tokens), tokenizer.errors, tokenizer.nonNormalizedIcuExpressions);
30}
31const _CR_OR_CRLF_REGEXP = /\r\n?/g;
32function _unexpectedCharacterErrorMsg(charCode) {
33 const char = charCode === chars.$EOF ? 'EOF' : String.fromCharCode(charCode);
34 return `Unexpected character "${char}"`;
35}
36function _unknownEntityErrorMsg(entitySrc) {
37 return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
38}
39function _unparsableEntityErrorMsg(type, entityStr) {
40 return `Unable to parse entity "${entityStr}" - ${type} character reference entities must end with ";"`;
41}
42var CharacterReferenceType;
43(function (CharacterReferenceType) {
44 CharacterReferenceType["HEX"] = "hexadecimal";
45 CharacterReferenceType["DEC"] = "decimal";
46})(CharacterReferenceType || (CharacterReferenceType = {}));
47class _ControlFlowError {
48 constructor(error) {
49 this.error = error;
50 }
51}
52// See https://www.w3.org/TR/html51/syntax.html#writing-html-documents
53class _Tokenizer {
54 /**
55 * @param _file The html source file being tokenized.
56 * @param _getTagDefinition A function that will retrieve a tag definition for a given tag name.
57 * @param options Configuration of the tokenization.
58 */
59 constructor(_file, _getTagDefinition, options) {
60 this._getTagDefinition = _getTagDefinition;
61 this._currentTokenStart = null;
62 this._currentTokenType = null;
63 this._expansionCaseStack = [];
64 this._inInterpolation = false;
65 this.tokens = [];
66 this.errors = [];
67 this.nonNormalizedIcuExpressions = [];
68 this._tokenizeIcu = options.tokenizeExpansionForms || false;
69 this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
70 this._leadingTriviaCodePoints =
71 options.leadingTriviaChars && options.leadingTriviaChars.map(c => c.codePointAt(0) || 0);
72 const range = options.range || { endPos: _file.content.length, startPos: 0, startLine: 0, startCol: 0 };
73 this._cursor = options.escapedString ? new EscapedCharacterCursor(_file, range) :
74 new PlainCharacterCursor(_file, range);
75 this._preserveLineEndings = options.preserveLineEndings || false;
76 this._escapedString = options.escapedString || false;
77 this._i18nNormalizeLineEndingsInICUs = options.i18nNormalizeLineEndingsInICUs || false;
78 try {
79 this._cursor.init();
80 }
81 catch (e) {
82 this.handleError(e);
83 }
84 }
85 _processCarriageReturns(content) {
86 if (this._preserveLineEndings) {
87 return content;
88 }
89 // https://www.w3.org/TR/html51/syntax.html#preprocessing-the-input-stream
90 // In order to keep the original position in the source, we can not
91 // pre-process it.
92 // Instead CRs are processed right before instantiating the tokens.
93 return content.replace(_CR_OR_CRLF_REGEXP, '\n');
94 }
95 tokenize() {
96 while (this._cursor.peek() !== chars.$EOF) {
97 const start = this._cursor.clone();
98 try {
99 if (this._attemptCharCode(chars.$LT)) {
100 if (this._attemptCharCode(chars.$BANG)) {
101 if (this._attemptCharCode(chars.$LBRACKET)) {
102 this._consumeCdata(start);
103 }
104 else if (this._attemptCharCode(chars.$MINUS)) {
105 this._consumeComment(start);
106 }
107 else {
108 this._consumeDocType(start);
109 }
110 }
111 else if (this._attemptCharCode(chars.$SLASH)) {
112 this._consumeTagClose(start);
113 }
114 else {
115 this._consumeTagOpen(start);
116 }
117 }
118 else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
119 // In (possibly interpolated) text the end of the text is given by `isTextEnd()`, while
120 // the premature end of an interpolation is given by the start of a new HTML element.
121 this._consumeWithInterpolation(5 /* TEXT */, 8 /* INTERPOLATION */, () => this._isTextEnd(), () => this._isTagStart());
122 }
123 }
124 catch (e) {
125 this.handleError(e);
126 }
127 }
128 this._beginToken(24 /* EOF */);
129 this._endToken([]);
130 }
131 /**
132 * @returns whether an ICU token has been created
133 * @internal
134 */
135 _tokenizeExpansionForm() {
136 if (this.isExpansionFormStart()) {
137 this._consumeExpansionFormStart();
138 return true;
139 }
140 if (isExpansionCaseStart(this._cursor.peek()) && this._isInExpansionForm()) {
141 this._consumeExpansionCaseStart();
142 return true;
143 }
144 if (this._cursor.peek() === chars.$RBRACE) {
145 if (this._isInExpansionCase()) {
146 this._consumeExpansionCaseEnd();
147 return true;
148 }
149 if (this._isInExpansionForm()) {
150 this._consumeExpansionFormEnd();
151 return true;
152 }
153 }
154 return false;
155 }
156 _beginToken(type, start = this._cursor.clone()) {
157 this._currentTokenStart = start;
158 this._currentTokenType = type;
159 }
160 _endToken(parts, end) {
161 if (this._currentTokenStart === null) {
162 throw new TokenError('Programming error - attempted to end a token when there was no start to the token', this._currentTokenType, this._cursor.getSpan(end));
163 }
164 if (this._currentTokenType === null) {
165 throw new TokenError('Programming error - attempted to end a token which has no token type', null, this._cursor.getSpan(this._currentTokenStart));
166 }
167 const token = {
168 type: this._currentTokenType,
169 parts,
170 sourceSpan: (end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints),
171 };
172 this.tokens.push(token);
173 this._currentTokenStart = null;
174 this._currentTokenType = null;
175 return token;
176 }
177 _createError(msg, span) {
178 if (this._isInExpansionForm()) {
179 msg += ` (Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)`;
180 }
181 const error = new TokenError(msg, this._currentTokenType, span);
182 this._currentTokenStart = null;
183 this._currentTokenType = null;
184 return new _ControlFlowError(error);
185 }
186 handleError(e) {
187 if (e instanceof CursorError) {
188 e = this._createError(e.msg, this._cursor.getSpan(e.cursor));
189 }
190 if (e instanceof _ControlFlowError) {
191 this.errors.push(e.error);
192 }
193 else {
194 throw e;
195 }
196 }
197 _attemptCharCode(charCode) {
198 if (this._cursor.peek() === charCode) {
199 this._cursor.advance();
200 return true;
201 }
202 return false;
203 }
204 _attemptCharCodeCaseInsensitive(charCode) {
205 if (compareCharCodeCaseInsensitive(this._cursor.peek(), charCode)) {
206 this._cursor.advance();
207 return true;
208 }
209 return false;
210 }
211 _requireCharCode(charCode) {
212 const location = this._cursor.clone();
213 if (!this._attemptCharCode(charCode)) {
214 throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
215 }
216 }
217 _attemptStr(chars) {
218 const len = chars.length;
219 if (this._cursor.charsLeft() < len) {
220 return false;
221 }
222 const initialPosition = this._cursor.clone();
223 for (let i = 0; i < len; i++) {
224 if (!this._attemptCharCode(chars.charCodeAt(i))) {
225 // If attempting to parse the string fails, we want to reset the parser
226 // to where it was before the attempt
227 this._cursor = initialPosition;
228 return false;
229 }
230 }
231 return true;
232 }
233 _attemptStrCaseInsensitive(chars) {
234 for (let i = 0; i < chars.length; i++) {
235 if (!this._attemptCharCodeCaseInsensitive(chars.charCodeAt(i))) {
236 return false;
237 }
238 }
239 return true;
240 }
241 _requireStr(chars) {
242 const location = this._cursor.clone();
243 if (!this._attemptStr(chars)) {
244 throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
245 }
246 }
247 _attemptCharCodeUntilFn(predicate) {
248 while (!predicate(this._cursor.peek())) {
249 this._cursor.advance();
250 }
251 }
252 _requireCharCodeUntilFn(predicate, len) {
253 const start = this._cursor.clone();
254 this._attemptCharCodeUntilFn(predicate);
255 if (this._cursor.diff(start) < len) {
256 throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
257 }
258 }
259 _attemptUntilChar(char) {
260 while (this._cursor.peek() !== char) {
261 this._cursor.advance();
262 }
263 }
264 _readChar() {
265 // Don't rely upon reading directly from `_input` as the actual char value
266 // may have been generated from an escape sequence.
267 const char = String.fromCodePoint(this._cursor.peek());
268 this._cursor.advance();
269 return char;
270 }
271 _consumeEntity(textTokenType) {
272 this._beginToken(9 /* ENCODED_ENTITY */);
273 const start = this._cursor.clone();
274 this._cursor.advance();
275 if (this._attemptCharCode(chars.$HASH)) {
276 const isHex = this._attemptCharCode(chars.$x) || this._attemptCharCode(chars.$X);
277 const codeStart = this._cursor.clone();
278 this._attemptCharCodeUntilFn(isDigitEntityEnd);
279 if (this._cursor.peek() != chars.$SEMICOLON) {
280 // Advance cursor to include the peeked character in the string provided to the error
281 // message.
282 this._cursor.advance();
283 const entityType = isHex ? CharacterReferenceType.HEX : CharacterReferenceType.DEC;
284 throw this._createError(_unparsableEntityErrorMsg(entityType, this._cursor.getChars(start)), this._cursor.getSpan());
285 }
286 const strNum = this._cursor.getChars(codeStart);
287 this._cursor.advance();
288 try {
289 const charCode = parseInt(strNum, isHex ? 16 : 10);
290 this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
291 }
292 catch {
293 throw this._createError(_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
294 }
295 }
296 else {
297 const nameStart = this._cursor.clone();
298 this._attemptCharCodeUntilFn(isNamedEntityEnd);
299 if (this._cursor.peek() != chars.$SEMICOLON) {
300 // No semicolon was found so abort the encoded entity token that was in progress, and treat
301 // this as a text token
302 this._beginToken(textTokenType, start);
303 this._cursor = nameStart;
304 this._endToken(['&']);
305 }
306 else {
307 const name = this._cursor.getChars(nameStart);
308 this._cursor.advance();
309 const char = NAMED_ENTITIES[name];
310 if (!char) {
311 throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
312 }
313 this._endToken([char, `&${name};`]);
314 }
315 }
316 }
317 _consumeRawText(consumeEntities, endMarkerPredicate) {
318 this._beginToken(consumeEntities ? 6 /* ESCAPABLE_RAW_TEXT */ : 7 /* RAW_TEXT */);
319 const parts = [];
320 while (true) {
321 const tagCloseStart = this._cursor.clone();
322 const foundEndMarker = endMarkerPredicate();
323 this._cursor = tagCloseStart;
324 if (foundEndMarker) {
325 break;
326 }
327 if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
328 this._endToken([this._processCarriageReturns(parts.join(''))]);
329 parts.length = 0;
330 this._consumeEntity(6 /* ESCAPABLE_RAW_TEXT */);
331 this._beginToken(6 /* ESCAPABLE_RAW_TEXT */);
332 }
333 else {
334 parts.push(this._readChar());
335 }
336 }
337 this._endToken([this._processCarriageReturns(parts.join(''))]);
338 }
339 _consumeComment(start) {
340 this._beginToken(10 /* COMMENT_START */, start);
341 this._requireCharCode(chars.$MINUS);
342 this._endToken([]);
343 this._consumeRawText(false, () => this._attemptStr('-->'));
344 this._beginToken(11 /* COMMENT_END */);
345 this._requireStr('-->');
346 this._endToken([]);
347 }
348 _consumeCdata(start) {
349 this._beginToken(12 /* CDATA_START */, start);
350 this._requireStr('CDATA[');
351 this._endToken([]);
352 this._consumeRawText(false, () => this._attemptStr(']]>'));
353 this._beginToken(13 /* CDATA_END */);
354 this._requireStr(']]>');
355 this._endToken([]);
356 }
357 _consumeDocType(start) {
358 this._beginToken(18 /* DOC_TYPE */, start);
359 const contentStart = this._cursor.clone();
360 this._attemptUntilChar(chars.$GT);
361 const content = this._cursor.getChars(contentStart);
362 this._cursor.advance();
363 this._endToken([content]);
364 }
365 _consumePrefixAndName() {
366 const nameOrPrefixStart = this._cursor.clone();
367 let prefix = '';
368 while (this._cursor.peek() !== chars.$COLON && !isPrefixEnd(this._cursor.peek())) {
369 this._cursor.advance();
370 }
371 let nameStart;
372 if (this._cursor.peek() === chars.$COLON) {
373 prefix = this._cursor.getChars(nameOrPrefixStart);
374 this._cursor.advance();
375 nameStart = this._cursor.clone();
376 }
377 else {
378 nameStart = nameOrPrefixStart;
379 }
380 this._requireCharCodeUntilFn(isNameEnd, prefix === '' ? 0 : 1);
381 const name = this._cursor.getChars(nameStart);
382 return [prefix, name];
383 }
384 _consumeTagOpen(start) {
385 let tagName;
386 let prefix;
387 let openTagToken;
388 try {
389 if (!chars.isAsciiLetter(this._cursor.peek())) {
390 throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
391 }
392 openTagToken = this._consumeTagOpenStart(start);
393 prefix = openTagToken.parts[0];
394 tagName = openTagToken.parts[1];
395 this._attemptCharCodeUntilFn(isNotWhitespace);
396 while (this._cursor.peek() !== chars.$SLASH && this._cursor.peek() !== chars.$GT &&
397 this._cursor.peek() !== chars.$LT && this._cursor.peek() !== chars.$EOF) {
398 this._consumeAttributeName();
399 this._attemptCharCodeUntilFn(isNotWhitespace);
400 if (this._attemptCharCode(chars.$EQ)) {
401 this._attemptCharCodeUntilFn(isNotWhitespace);
402 this._consumeAttributeValue();
403 }
404 this._attemptCharCodeUntilFn(isNotWhitespace);
405 }
406 this._consumeTagOpenEnd();
407 }
408 catch (e) {
409 if (e instanceof _ControlFlowError) {
410 if (openTagToken) {
411 // We errored before we could close the opening tag, so it is incomplete.
412 openTagToken.type = 4 /* INCOMPLETE_TAG_OPEN */;
413 }
414 else {
415 // When the start tag is invalid, assume we want a "<" as text.
416 // Back to back text tokens are merged at the end.
417 this._beginToken(5 /* TEXT */, start);
418 this._endToken(['<']);
419 }
420 return;
421 }
422 throw e;
423 }
424 const contentTokenType = this._getTagDefinition(tagName).getContentType(prefix);
425 if (contentTokenType === TagContentType.RAW_TEXT) {
426 this._consumeRawTextWithTagClose(prefix, tagName, false);
427 }
428 else if (contentTokenType === TagContentType.ESCAPABLE_RAW_TEXT) {
429 this._consumeRawTextWithTagClose(prefix, tagName, true);
430 }
431 }
432 _consumeRawTextWithTagClose(prefix, tagName, consumeEntities) {
433 this._consumeRawText(consumeEntities, () => {
434 if (!this._attemptCharCode(chars.$LT))
435 return false;
436 if (!this._attemptCharCode(chars.$SLASH))
437 return false;
438 this._attemptCharCodeUntilFn(isNotWhitespace);
439 if (!this._attemptStrCaseInsensitive(tagName))
440 return false;
441 this._attemptCharCodeUntilFn(isNotWhitespace);
442 return this._attemptCharCode(chars.$GT);
443 });
444 this._beginToken(3 /* TAG_CLOSE */);
445 this._requireCharCodeUntilFn(code => code === chars.$GT, 3);
446 this._cursor.advance(); // Consume the `>`
447 this._endToken([prefix, tagName]);
448 }
449 _consumeTagOpenStart(start) {
450 this._beginToken(0 /* TAG_OPEN_START */, start);
451 const parts = this._consumePrefixAndName();
452 return this._endToken(parts);
453 }
454 _consumeAttributeName() {
455 const attrNameStart = this._cursor.peek();
456 if (attrNameStart === chars.$SQ || attrNameStart === chars.$DQ) {
457 throw this._createError(_unexpectedCharacterErrorMsg(attrNameStart), this._cursor.getSpan());
458 }
459 this._beginToken(14 /* ATTR_NAME */);
460 const prefixAndName = this._consumePrefixAndName();
461 this._endToken(prefixAndName);
462 }
463 _consumeAttributeValue() {
464 let value;
465 if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
466 const quoteChar = this._cursor.peek();
467 this._consumeQuote(quoteChar);
468 // In an attribute then end of the attribute value and the premature end to an interpolation
469 // are both triggered by the `quoteChar`.
470 const endPredicate = () => this._cursor.peek() === quoteChar;
471 this._consumeWithInterpolation(16 /* ATTR_VALUE_TEXT */, 17 /* ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
472 this._consumeQuote(quoteChar);
473 }
474 else {
475 const endPredicate = () => isNameEnd(this._cursor.peek());
476 this._consumeWithInterpolation(16 /* ATTR_VALUE_TEXT */, 17 /* ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
477 }
478 }
479 _consumeQuote(quoteChar) {
480 this._beginToken(15 /* ATTR_QUOTE */);
481 this._requireCharCode(quoteChar);
482 this._endToken([String.fromCodePoint(quoteChar)]);
483 }
484 _consumeTagOpenEnd() {
485 const tokenType = this._attemptCharCode(chars.$SLASH) ? 2 /* TAG_OPEN_END_VOID */ : 1 /* TAG_OPEN_END */;
486 this._beginToken(tokenType);
487 this._requireCharCode(chars.$GT);
488 this._endToken([]);
489 }
490 _consumeTagClose(start) {
491 this._beginToken(3 /* TAG_CLOSE */, start);
492 this._attemptCharCodeUntilFn(isNotWhitespace);
493 const prefixAndName = this._consumePrefixAndName();
494 this._attemptCharCodeUntilFn(isNotWhitespace);
495 this._requireCharCode(chars.$GT);
496 this._endToken(prefixAndName);
497 }
498 _consumeExpansionFormStart() {
499 this._beginToken(19 /* EXPANSION_FORM_START */);
500 this._requireCharCode(chars.$LBRACE);
501 this._endToken([]);
502 this._expansionCaseStack.push(19 /* EXPANSION_FORM_START */);
503 this._beginToken(7 /* RAW_TEXT */);
504 const condition = this._readUntil(chars.$COMMA);
505 const normalizedCondition = this._processCarriageReturns(condition);
506 if (this._i18nNormalizeLineEndingsInICUs) {
507 // We explicitly want to normalize line endings for this text.
508 this._endToken([normalizedCondition]);
509 }
510 else {
511 // We are not normalizing line endings.
512 const conditionToken = this._endToken([condition]);
513 if (normalizedCondition !== condition) {
514 this.nonNormalizedIcuExpressions.push(conditionToken);
515 }
516 }
517 this._requireCharCode(chars.$COMMA);
518 this._attemptCharCodeUntilFn(isNotWhitespace);
519 this._beginToken(7 /* RAW_TEXT */);
520 const type = this._readUntil(chars.$COMMA);
521 this._endToken([type]);
522 this._requireCharCode(chars.$COMMA);
523 this._attemptCharCodeUntilFn(isNotWhitespace);
524 }
525 _consumeExpansionCaseStart() {
526 this._beginToken(20 /* EXPANSION_CASE_VALUE */);
527 const value = this._readUntil(chars.$LBRACE).trim();
528 this._endToken([value]);
529 this._attemptCharCodeUntilFn(isNotWhitespace);
530 this._beginToken(21 /* EXPANSION_CASE_EXP_START */);
531 this._requireCharCode(chars.$LBRACE);
532 this._endToken([]);
533 this._attemptCharCodeUntilFn(isNotWhitespace);
534 this._expansionCaseStack.push(21 /* EXPANSION_CASE_EXP_START */);
535 }
536 _consumeExpansionCaseEnd() {
537 this._beginToken(22 /* EXPANSION_CASE_EXP_END */);
538 this._requireCharCode(chars.$RBRACE);
539 this._endToken([]);
540 this._attemptCharCodeUntilFn(isNotWhitespace);
541 this._expansionCaseStack.pop();
542 }
543 _consumeExpansionFormEnd() {
544 this._beginToken(23 /* EXPANSION_FORM_END */);
545 this._requireCharCode(chars.$RBRACE);
546 this._endToken([]);
547 this._expansionCaseStack.pop();
548 }
549 /**
550 * Consume a string that may contain interpolation expressions.
551 *
552 * The first token consumed will be of `tokenType` and then there will be alternating
553 * `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true.
554 *
555 * If an interpolation token ends prematurely it will have no end marker in its `parts` array.
556 *
557 * @param textTokenType the kind of tokens to interleave around interpolation tokens.
558 * @param interpolationTokenType the kind of tokens that contain interpolation.
559 * @param endPredicate a function that should return true when we should stop consuming.
560 * @param endInterpolation a function that should return true if there is a premature end to an
561 * interpolation expression - i.e. before we get to the normal interpolation closing marker.
562 */
563 _consumeWithInterpolation(textTokenType, interpolationTokenType, endPredicate, endInterpolation) {
564 this._beginToken(textTokenType);
565 const parts = [];
566 while (!endPredicate()) {
567 const current = this._cursor.clone();
568 if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
569 this._endToken([this._processCarriageReturns(parts.join(''))], current);
570 parts.length = 0;
571 this._consumeInterpolation(interpolationTokenType, current, endInterpolation);
572 this._beginToken(textTokenType);
573 }
574 else if (this._cursor.peek() === chars.$AMPERSAND) {
575 this._endToken([this._processCarriageReturns(parts.join(''))]);
576 parts.length = 0;
577 this._consumeEntity(textTokenType);
578 this._beginToken(textTokenType);
579 }
580 else {
581 parts.push(this._readChar());
582 }
583 }
584 // It is possible that an interpolation was started but not ended inside this text token.
585 // Make sure that we reset the state of the lexer correctly.
586 this._inInterpolation = false;
587 this._endToken([this._processCarriageReturns(parts.join(''))]);
588 }
589 /**
590 * Consume a block of text that has been interpreted as an Angular interpolation.
591 *
592 * @param interpolationTokenType the type of the interpolation token to generate.
593 * @param interpolationStart a cursor that points to the start of this interpolation.
594 * @param prematureEndPredicate a function that should return true if the next characters indicate
595 * an end to the interpolation before its normal closing marker.
596 */
597 _consumeInterpolation(interpolationTokenType, interpolationStart, prematureEndPredicate) {
598 const parts = [];
599 this._beginToken(interpolationTokenType, interpolationStart);
600 parts.push(this._interpolationConfig.start);
601 // Find the end of the interpolation, ignoring content inside quotes.
602 const expressionStart = this._cursor.clone();
603 let inQuote = null;
604 let inComment = false;
605 while (this._cursor.peek() !== chars.$EOF &&
606 (prematureEndPredicate === null || !prematureEndPredicate())) {
607 const current = this._cursor.clone();
608 if (this._isTagStart()) {
609 // We are starting what looks like an HTML element in the middle of this interpolation.
610 // Reset the cursor to before the `<` character and end the interpolation token.
611 // (This is actually wrong but here for backward compatibility).
612 this._cursor = current;
613 parts.push(this._getProcessedChars(expressionStart, current));
614 this._endToken(parts);
615 return;
616 }
617 if (inQuote === null) {
618 if (this._attemptStr(this._interpolationConfig.end)) {
619 // We are not in a string, and we hit the end interpolation marker
620 parts.push(this._getProcessedChars(expressionStart, current));
621 parts.push(this._interpolationConfig.end);
622 this._endToken(parts);
623 return;
624 }
625 else if (this._attemptStr('//')) {
626 // Once we are in a comment we ignore any quotes
627 inComment = true;
628 }
629 }
630 const char = this._cursor.peek();
631 this._cursor.advance();
632 if (char === chars.$BACKSLASH) {
633 // Skip the next character because it was escaped.
634 this._cursor.advance();
635 }
636 else if (char === inQuote) {
637 // Exiting the current quoted string
638 inQuote = null;
639 }
640 else if (!inComment && inQuote === null && chars.isQuote(char)) {
641 // Entering a new quoted string
642 inQuote = char;
643 }
644 }
645 // We hit EOF without finding a closing interpolation marker
646 parts.push(this._getProcessedChars(expressionStart, this._cursor));
647 this._endToken(parts);
648 }
649 _getProcessedChars(start, end) {
650 return this._processCarriageReturns(end.getChars(start));
651 }
652 _isTextEnd() {
653 if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
654 return true;
655 }
656 if (this._tokenizeIcu && !this._inInterpolation) {
657 if (this.isExpansionFormStart()) {
658 // start of an expansion form
659 return true;
660 }
661 if (this._cursor.peek() === chars.$RBRACE && this._isInExpansionCase()) {
662 // end of and expansion case
663 return true;
664 }
665 }
666 return false;
667 }
668 /**
669 * Returns true if the current cursor is pointing to the start of a tag
670 * (opening/closing/comments/cdata/etc).
671 */
672 _isTagStart() {
673 if (this._cursor.peek() === chars.$LT) {
674 // We assume that `<` followed by whitespace is not the start of an HTML element.
675 const tmp = this._cursor.clone();
676 tmp.advance();
677 // If the next character is alphabetic, ! nor / then it is a tag start
678 const code = tmp.peek();
679 if ((chars.$a <= code && code <= chars.$z) || (chars.$A <= code && code <= chars.$Z) ||
680 code === chars.$SLASH || code === chars.$BANG) {
681 return true;
682 }
683 }
684 return false;
685 }
686 _readUntil(char) {
687 const start = this._cursor.clone();
688 this._attemptUntilChar(char);
689 return this._cursor.getChars(start);
690 }
691 _isInExpansionCase() {
692 return this._expansionCaseStack.length > 0 &&
693 this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
694 21 /* EXPANSION_CASE_EXP_START */;
695 }
696 _isInExpansionForm() {
697 return this._expansionCaseStack.length > 0 &&
698 this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
699 19 /* EXPANSION_FORM_START */;
700 }
701 isExpansionFormStart() {
702 if (this._cursor.peek() !== chars.$LBRACE) {
703 return false;
704 }
705 if (this._interpolationConfig) {
706 const start = this._cursor.clone();
707 const isInterpolation = this._attemptStr(this._interpolationConfig.start);
708 this._cursor = start;
709 return !isInterpolation;
710 }
711 return true;
712 }
713}
714function isNotWhitespace(code) {
715 return !chars.isWhitespace(code) || code === chars.$EOF;
716}
717function isNameEnd(code) {
718 return chars.isWhitespace(code) || code === chars.$GT || code === chars.$LT ||
719 code === chars.$SLASH || code === chars.$SQ || code === chars.$DQ || code === chars.$EQ ||
720 code === chars.$EOF;
721}
722function isPrefixEnd(code) {
723 return (code < chars.$a || chars.$z < code) && (code < chars.$A || chars.$Z < code) &&
724 (code < chars.$0 || code > chars.$9);
725}
726function isDigitEntityEnd(code) {
727 return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiHexDigit(code);
728}
729function isNamedEntityEnd(code) {
730 return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiLetter(code);
731}
732function isExpansionCaseStart(peek) {
733 return peek !== chars.$RBRACE;
734}
735function compareCharCodeCaseInsensitive(code1, code2) {
736 return toUpperCaseCharCode(code1) === toUpperCaseCharCode(code2);
737}
738function toUpperCaseCharCode(code) {
739 return code >= chars.$a && code <= chars.$z ? code - chars.$a + chars.$A : code;
740}
741function mergeTextTokens(srcTokens) {
742 const dstTokens = [];
743 let lastDstToken = undefined;
744 for (let i = 0; i < srcTokens.length; i++) {
745 const token = srcTokens[i];
746 if ((lastDstToken && lastDstToken.type === 5 /* TEXT */ && token.type === 5 /* TEXT */) ||
747 (lastDstToken && lastDstToken.type === 16 /* ATTR_VALUE_TEXT */ &&
748 token.type === 16 /* ATTR_VALUE_TEXT */)) {
749 lastDstToken.parts[0] += token.parts[0];
750 lastDstToken.sourceSpan.end = token.sourceSpan.end;
751 }
752 else {
753 lastDstToken = token;
754 dstTokens.push(lastDstToken);
755 }
756 }
757 return dstTokens;
758}
759class PlainCharacterCursor {
760 constructor(fileOrCursor, range) {
761 if (fileOrCursor instanceof PlainCharacterCursor) {
762 this.file = fileOrCursor.file;
763 this.input = fileOrCursor.input;
764 this.end = fileOrCursor.end;
765 const state = fileOrCursor.state;
766 // Note: avoid using `{...fileOrCursor.state}` here as that has a severe performance penalty.
767 // In ES5 bundles the object spread operator is translated into the `__assign` helper, which
768 // is not optimized by VMs as efficiently as a raw object literal. Since this constructor is
769 // called in tight loops, this difference matters.
770 this.state = {
771 peek: state.peek,
772 offset: state.offset,
773 line: state.line,
774 column: state.column,
775 };
776 }
777 else {
778 if (!range) {
779 throw new Error('Programming error: the range argument must be provided with a file argument.');
780 }
781 this.file = fileOrCursor;
782 this.input = fileOrCursor.content;
783 this.end = range.endPos;
784 this.state = {
785 peek: -1,
786 offset: range.startPos,
787 line: range.startLine,
788 column: range.startCol,
789 };
790 }
791 }
792 clone() {
793 return new PlainCharacterCursor(this);
794 }
795 peek() {
796 return this.state.peek;
797 }
798 charsLeft() {
799 return this.end - this.state.offset;
800 }
801 diff(other) {
802 return this.state.offset - other.state.offset;
803 }
804 advance() {
805 this.advanceState(this.state);
806 }
807 init() {
808 this.updatePeek(this.state);
809 }
810 getSpan(start, leadingTriviaCodePoints) {
811 start = start || this;
812 let fullStart = start;
813 if (leadingTriviaCodePoints) {
814 while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
815 if (fullStart === start) {
816 start = start.clone();
817 }
818 start.advance();
819 }
820 }
821 const startLocation = this.locationFromCursor(start);
822 const endLocation = this.locationFromCursor(this);
823 const fullStartLocation = fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
824 return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
825 }
826 getChars(start) {
827 return this.input.substring(start.state.offset, this.state.offset);
828 }
829 charAt(pos) {
830 return this.input.charCodeAt(pos);
831 }
832 advanceState(state) {
833 if (state.offset >= this.end) {
834 this.state = state;
835 throw new CursorError('Unexpected character "EOF"', this);
836 }
837 const currentChar = this.charAt(state.offset);
838 if (currentChar === chars.$LF) {
839 state.line++;
840 state.column = 0;
841 }
842 else if (!chars.isNewLine(currentChar)) {
843 state.column++;
844 }
845 state.offset++;
846 this.updatePeek(state);
847 }
848 updatePeek(state) {
849 state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
850 }
851 locationFromCursor(cursor) {
852 return new ParseLocation(cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
853 }
854}
855class EscapedCharacterCursor extends PlainCharacterCursor {
856 constructor(fileOrCursor, range) {
857 if (fileOrCursor instanceof EscapedCharacterCursor) {
858 super(fileOrCursor);
859 this.internalState = { ...fileOrCursor.internalState };
860 }
861 else {
862 super(fileOrCursor, range);
863 this.internalState = this.state;
864 }
865 }
866 advance() {
867 this.state = this.internalState;
868 super.advance();
869 this.processEscapeSequence();
870 }
871 init() {
872 super.init();
873 this.processEscapeSequence();
874 }
875 clone() {
876 return new EscapedCharacterCursor(this);
877 }
878 getChars(start) {
879 const cursor = start.clone();
880 let chars = '';
881 while (cursor.internalState.offset < this.internalState.offset) {
882 chars += String.fromCodePoint(cursor.peek());
883 cursor.advance();
884 }
885 return chars;
886 }
887 /**
888 * Process the escape sequence that starts at the current position in the text.
889 *
890 * This method is called to ensure that `peek` has the unescaped value of escape sequences.
891 */
892 processEscapeSequence() {
893 const peek = () => this.internalState.peek;
894 if (peek() === chars.$BACKSLASH) {
895 // We have hit an escape sequence so we need the internal state to become independent
896 // of the external state.
897 this.internalState = { ...this.state };
898 // Move past the backslash
899 this.advanceState(this.internalState);
900 // First check for standard control char sequences
901 if (peek() === chars.$n) {
902 this.state.peek = chars.$LF;
903 }
904 else if (peek() === chars.$r) {
905 this.state.peek = chars.$CR;
906 }
907 else if (peek() === chars.$v) {
908 this.state.peek = chars.$VTAB;
909 }
910 else if (peek() === chars.$t) {
911 this.state.peek = chars.$TAB;
912 }
913 else if (peek() === chars.$b) {
914 this.state.peek = chars.$BSPACE;
915 }
916 else if (peek() === chars.$f) {
917 this.state.peek = chars.$FF;
918 }
919 // Now consider more complex sequences
920 else if (peek() === chars.$u) {
921 // Unicode code-point sequence
922 this.advanceState(this.internalState); // advance past the `u` char
923 if (peek() === chars.$LBRACE) {
924 // Variable length Unicode, e.g. `\x{123}`
925 this.advanceState(this.internalState); // advance past the `{` char
926 // Advance past the variable number of hex digits until we hit a `}` char
927 const digitStart = this.clone();
928 let length = 0;
929 while (peek() !== chars.$RBRACE) {
930 this.advanceState(this.internalState);
931 length++;
932 }
933 this.state.peek = this.decodeHexDigits(digitStart, length);
934 }
935 else {
936 // Fixed length Unicode, e.g. `\u1234`
937 const digitStart = this.clone();
938 this.advanceState(this.internalState);
939 this.advanceState(this.internalState);
940 this.advanceState(this.internalState);
941 this.state.peek = this.decodeHexDigits(digitStart, 4);
942 }
943 }
944 else if (peek() === chars.$x) {
945 // Hex char code, e.g. `\x2F`
946 this.advanceState(this.internalState); // advance past the `x` char
947 const digitStart = this.clone();
948 this.advanceState(this.internalState);
949 this.state.peek = this.decodeHexDigits(digitStart, 2);
950 }
951 else if (chars.isOctalDigit(peek())) {
952 // Octal char code, e.g. `\012`,
953 let octal = '';
954 let length = 0;
955 let previous = this.clone();
956 while (chars.isOctalDigit(peek()) && length < 3) {
957 previous = this.clone();
958 octal += String.fromCodePoint(peek());
959 this.advanceState(this.internalState);
960 length++;
961 }
962 this.state.peek = parseInt(octal, 8);
963 // Backup one char
964 this.internalState = previous.internalState;
965 }
966 else if (chars.isNewLine(this.internalState.peek)) {
967 // Line continuation `\` followed by a new line
968 this.advanceState(this.internalState); // advance over the newline
969 this.state = this.internalState;
970 }
971 else {
972 // If none of the `if` blocks were executed then we just have an escaped normal character.
973 // In that case we just, effectively, skip the backslash from the character.
974 this.state.peek = this.internalState.peek;
975 }
976 }
977 }
978 decodeHexDigits(start, length) {
979 const hex = this.input.substr(start.internalState.offset, length);
980 const charCode = parseInt(hex, 16);
981 if (!isNaN(charCode)) {
982 return charCode;
983 }
984 else {
985 start.state = start.internalState;
986 throw new CursorError('Invalid hexadecimal escape sequence', start);
987 }
988 }
989}
990export class CursorError {
991 constructor(msg, cursor) {
992 this.msg = msg;
993 this.cursor = cursor;
994 }
995}
996//# sourceMappingURL=data:application/json;base64,
\No newline at end of file