UNPKG

65 kBJavaScriptView Raw
1/**
2 * @license
3 * Copyright Google LLC All Rights Reserved.
4 *
5 * Use of this source code is governed by an MIT-style license that can be
6 * found in the LICENSE file at https://angular.io/license
7 */
8import { ParseError, ParseSourceSpan } from '../parse_util';
9import * as html from './ast';
10import { NAMED_ENTITIES } from './entities';
11import { tokenize } from './lexer';
12import { getNsPrefix, mergeNsAndName, splitNsName } from './tags';
13export class TreeError extends ParseError {
14 constructor(elementName, span, msg) {
15 super(span, msg);
16 this.elementName = elementName;
17 }
18 static create(elementName, span, msg) {
19 return new TreeError(elementName, span, msg);
20 }
21}
22export class ParseTreeResult {
23 constructor(rootNodes, errors) {
24 this.rootNodes = rootNodes;
25 this.errors = errors;
26 }
27}
28export class Parser {
29 constructor(getTagDefinition) {
30 this.getTagDefinition = getTagDefinition;
31 }
32 parse(source, url, options) {
33 const tokenizeResult = tokenize(source, url, this.getTagDefinition, options);
34 const parser = new _TreeBuilder(tokenizeResult.tokens, this.getTagDefinition);
35 parser.build();
36 return new ParseTreeResult(parser.rootNodes, tokenizeResult.errors.concat(parser.errors));
37 }
38}
39class _TreeBuilder {
40 constructor(tokens, getTagDefinition) {
41 this.tokens = tokens;
42 this.getTagDefinition = getTagDefinition;
43 this._index = -1;
44 this._elementStack = [];
45 this.rootNodes = [];
46 this.errors = [];
47 this._advance();
48 }
49 build() {
50 while (this._peek.type !== 24 /* EOF */) {
51 if (this._peek.type === 0 /* TAG_OPEN_START */ ||
52 this._peek.type === 4 /* INCOMPLETE_TAG_OPEN */) {
53 this._consumeStartTag(this._advance());
54 }
55 else if (this._peek.type === 3 /* TAG_CLOSE */) {
56 this._consumeEndTag(this._advance());
57 }
58 else if (this._peek.type === 12 /* CDATA_START */) {
59 this._closeVoidElement();
60 this._consumeCdata(this._advance());
61 }
62 else if (this._peek.type === 10 /* COMMENT_START */) {
63 this._closeVoidElement();
64 this._consumeComment(this._advance());
65 }
66 else if (this._peek.type === 5 /* TEXT */ || this._peek.type === 7 /* RAW_TEXT */ ||
67 this._peek.type === 6 /* ESCAPABLE_RAW_TEXT */) {
68 this._closeVoidElement();
69 this._consumeText(this._advance());
70 }
71 else if (this._peek.type === 19 /* EXPANSION_FORM_START */) {
72 this._consumeExpansion(this._advance());
73 }
74 else {
75 // Skip all other tokens...
76 this._advance();
77 }
78 }
79 }
80 _advance() {
81 const prev = this._peek;
82 if (this._index < this.tokens.length - 1) {
83 // Note: there is always an EOF token at the end
84 this._index++;
85 }
86 this._peek = this.tokens[this._index];
87 return prev;
88 }
89 _advanceIf(type) {
90 if (this._peek.type === type) {
91 return this._advance();
92 }
93 return null;
94 }
95 _consumeCdata(_startToken) {
96 this._consumeText(this._advance());
97 this._advanceIf(13 /* CDATA_END */);
98 }
99 _consumeComment(token) {
100 const text = this._advanceIf(7 /* RAW_TEXT */);
101 this._advanceIf(11 /* COMMENT_END */);
102 const value = text != null ? text.parts[0].trim() : null;
103 this._addToParent(new html.Comment(value, token.sourceSpan));
104 }
105 _consumeExpansion(token) {
106 const switchValue = this._advance();
107 const type = this._advance();
108 const cases = [];
109 // read =
110 while (this._peek.type === 20 /* EXPANSION_CASE_VALUE */) {
111 const expCase = this._parseExpansionCase();
112 if (!expCase)
113 return; // error
114 cases.push(expCase);
115 }
116 // read the final }
117 if (this._peek.type !== 23 /* EXPANSION_FORM_END */) {
118 this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '}'.`));
119 return;
120 }
121 const sourceSpan = new ParseSourceSpan(token.sourceSpan.start, this._peek.sourceSpan.end, token.sourceSpan.fullStart);
122 this._addToParent(new html.Expansion(switchValue.parts[0], type.parts[0], cases, sourceSpan, switchValue.sourceSpan));
123 this._advance();
124 }
125 _parseExpansionCase() {
126 const value = this._advance();
127 // read {
128 if (this._peek.type !== 21 /* EXPANSION_CASE_EXP_START */) {
129 this.errors.push(TreeError.create(null, this._peek.sourceSpan, `Invalid ICU message. Missing '{'.`));
130 return null;
131 }
132 // read until }
133 const start = this._advance();
134 const exp = this._collectExpansionExpTokens(start);
135 if (!exp)
136 return null;
137 const end = this._advance();
138 exp.push({ type: 24 /* EOF */, parts: [], sourceSpan: end.sourceSpan });
139 // parse everything in between { and }
140 const expansionCaseParser = new _TreeBuilder(exp, this.getTagDefinition);
141 expansionCaseParser.build();
142 if (expansionCaseParser.errors.length > 0) {
143 this.errors = this.errors.concat(expansionCaseParser.errors);
144 return null;
145 }
146 const sourceSpan = new ParseSourceSpan(value.sourceSpan.start, end.sourceSpan.end, value.sourceSpan.fullStart);
147 const expSourceSpan = new ParseSourceSpan(start.sourceSpan.start, end.sourceSpan.end, start.sourceSpan.fullStart);
148 return new html.ExpansionCase(value.parts[0], expansionCaseParser.rootNodes, sourceSpan, value.sourceSpan, expSourceSpan);
149 }
150 _collectExpansionExpTokens(start) {
151 const exp = [];
152 const expansionFormStack = [21 /* EXPANSION_CASE_EXP_START */];
153 while (true) {
154 if (this._peek.type === 19 /* EXPANSION_FORM_START */ ||
155 this._peek.type === 21 /* EXPANSION_CASE_EXP_START */) {
156 expansionFormStack.push(this._peek.type);
157 }
158 if (this._peek.type === 22 /* EXPANSION_CASE_EXP_END */) {
159 if (lastOnStack(expansionFormStack, 21 /* EXPANSION_CASE_EXP_START */)) {
160 expansionFormStack.pop();
161 if (expansionFormStack.length === 0)
162 return exp;
163 }
164 else {
165 this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
166 return null;
167 }
168 }
169 if (this._peek.type === 23 /* EXPANSION_FORM_END */) {
170 if (lastOnStack(expansionFormStack, 19 /* EXPANSION_FORM_START */)) {
171 expansionFormStack.pop();
172 }
173 else {
174 this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
175 return null;
176 }
177 }
178 if (this._peek.type === 24 /* EOF */) {
179 this.errors.push(TreeError.create(null, start.sourceSpan, `Invalid ICU message. Missing '}'.`));
180 return null;
181 }
182 exp.push(this._advance());
183 }
184 }
185 _consumeText(token) {
186 const tokens = [token];
187 const startSpan = token.sourceSpan;
188 let text = token.parts[0];
189 if (text.length > 0 && text[0] === '\n') {
190 const parent = this._getParentElement();
191 if (parent != null && parent.children.length === 0 &&
192 this.getTagDefinition(parent.name).ignoreFirstLf) {
193 text = text.substring(1);
194 tokens[0] = { type: token.type, sourceSpan: token.sourceSpan, parts: [text] };
195 }
196 }
197 while (this._peek.type === 8 /* INTERPOLATION */ || this._peek.type === 5 /* TEXT */ ||
198 this._peek.type === 9 /* ENCODED_ENTITY */) {
199 token = this._advance();
200 tokens.push(token);
201 if (token.type === 8 /* INTERPOLATION */) {
202 // For backward compatibility we decode HTML entities that appear in interpolation
203 // expressions. This is arguably a bug, but it could be a considerable breaking change to
204 // fix it. It should be addressed in a larger project to refactor the entire parser/lexer
205 // chain after View Engine has been removed.
206 text += token.parts.join('').replace(/&([^;]+);/g, decodeEntity);
207 }
208 else if (token.type === 9 /* ENCODED_ENTITY */) {
209 text += token.parts[0];
210 }
211 else {
212 text += token.parts.join('');
213 }
214 }
215 if (text.length > 0) {
216 const endSpan = token.sourceSpan;
217 this._addToParent(new html.Text(text, new ParseSourceSpan(startSpan.start, endSpan.end, startSpan.fullStart, startSpan.details), tokens));
218 }
219 }
220 _closeVoidElement() {
221 const el = this._getParentElement();
222 if (el && this.getTagDefinition(el.name).isVoid) {
223 this._elementStack.pop();
224 }
225 }
226 _consumeStartTag(startTagToken) {
227 const [prefix, name] = startTagToken.parts;
228 const attrs = [];
229 while (this._peek.type === 14 /* ATTR_NAME */) {
230 attrs.push(this._consumeAttr(this._advance()));
231 }
232 const fullName = this._getElementFullName(prefix, name, this._getParentElement());
233 let selfClosing = false;
234 // Note: There could have been a tokenizer error
235 // so that we don't get a token for the end tag...
236 if (this._peek.type === 2 /* TAG_OPEN_END_VOID */) {
237 this._advance();
238 selfClosing = true;
239 const tagDef = this.getTagDefinition(fullName);
240 if (!(tagDef.canSelfClose || getNsPrefix(fullName) !== null || tagDef.isVoid)) {
241 this.errors.push(TreeError.create(fullName, startTagToken.sourceSpan, `Only void and foreign elements can be self closed "${startTagToken.parts[1]}"`));
242 }
243 }
244 else if (this._peek.type === 1 /* TAG_OPEN_END */) {
245 this._advance();
246 selfClosing = false;
247 }
248 const end = this._peek.sourceSpan.fullStart;
249 const span = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
250 // Create a separate `startSpan` because `span` will be modified when there is an `end` span.
251 const startSpan = new ParseSourceSpan(startTagToken.sourceSpan.start, end, startTagToken.sourceSpan.fullStart);
252 const el = new html.Element(fullName, attrs, [], span, startSpan, undefined);
253 this._pushElement(el);
254 if (selfClosing) {
255 // Elements that are self-closed have their `endSourceSpan` set to the full span, as the
256 // element start tag also represents the end tag.
257 this._popElement(fullName, span);
258 }
259 else if (startTagToken.type === 4 /* INCOMPLETE_TAG_OPEN */) {
260 // We already know the opening tag is not complete, so it is unlikely it has a corresponding
261 // close tag. Let's optimistically parse it as a full element and emit an error.
262 this._popElement(fullName, null);
263 this.errors.push(TreeError.create(fullName, span, `Opening tag "${fullName}" not terminated.`));
264 }
265 }
266 _pushElement(el) {
267 const parentEl = this._getParentElement();
268 if (parentEl && this.getTagDefinition(parentEl.name).isClosedByChild(el.name)) {
269 this._elementStack.pop();
270 }
271 this._addToParent(el);
272 this._elementStack.push(el);
273 }
274 _consumeEndTag(endTagToken) {
275 const fullName = this._getElementFullName(endTagToken.parts[0], endTagToken.parts[1], this._getParentElement());
276 if (this.getTagDefinition(fullName).isVoid) {
277 this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, `Void elements do not have end tags "${endTagToken.parts[1]}"`));
278 }
279 else if (!this._popElement(fullName, endTagToken.sourceSpan)) {
280 const errMsg = `Unexpected closing tag "${fullName}". It may happen when the tag has already been closed by another tag. For more info see https://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags`;
281 this.errors.push(TreeError.create(fullName, endTagToken.sourceSpan, errMsg));
282 }
283 }
284 /**
285 * Closes the nearest element with the tag name `fullName` in the parse tree.
286 * `endSourceSpan` is the span of the closing tag, or null if the element does
287 * not have a closing tag (for example, this happens when an incomplete
288 * opening tag is recovered).
289 */
290 _popElement(fullName, endSourceSpan) {
291 let unexpectedCloseTagDetected = false;
292 for (let stackIndex = this._elementStack.length - 1; stackIndex >= 0; stackIndex--) {
293 const el = this._elementStack[stackIndex];
294 if (el.name === fullName) {
295 // Record the parse span with the element that is being closed. Any elements that are
296 // removed from the element stack at this point are closed implicitly, so they won't get
297 // an end source span (as there is no explicit closing element).
298 el.endSourceSpan = endSourceSpan;
299 el.sourceSpan.end = endSourceSpan !== null ? endSourceSpan.end : el.sourceSpan.end;
300 this._elementStack.splice(stackIndex, this._elementStack.length - stackIndex);
301 return !unexpectedCloseTagDetected;
302 }
303 if (!this.getTagDefinition(el.name).closedByParent) {
304 // Note that we encountered an unexpected close tag but continue processing the element
305 // stack so we can assign an `endSourceSpan` if there is a corresponding start tag for this
306 // end tag in the stack.
307 unexpectedCloseTagDetected = true;
308 }
309 }
310 return false;
311 }
312 _consumeAttr(attrName) {
313 const fullName = mergeNsAndName(attrName.parts[0], attrName.parts[1]);
314 let attrEnd = attrName.sourceSpan.end;
315 // Consume any quote
316 if (this._peek.type === 15 /* ATTR_QUOTE */) {
317 this._advance();
318 }
319 // Consume the attribute value
320 let value = '';
321 const valueTokens = [];
322 let valueStartSpan = undefined;
323 let valueEnd = undefined;
324 // NOTE: We need to use a new variable `nextTokenType` here to hide the actual type of
325 // `_peek.type` from TS. Otherwise TS will narrow the type of `_peek.type` preventing it from
326 // being able to consider `ATTR_VALUE_INTERPOLATION` as an option. This is because TS is not
327 // able to see that `_advance()` will actually mutate `_peek`.
328 const nextTokenType = this._peek.type;
329 if (nextTokenType === 16 /* ATTR_VALUE_TEXT */) {
330 valueStartSpan = this._peek.sourceSpan;
331 valueEnd = this._peek.sourceSpan.end;
332 while (this._peek.type === 16 /* ATTR_VALUE_TEXT */ ||
333 this._peek.type === 17 /* ATTR_VALUE_INTERPOLATION */ ||
334 this._peek.type === 9 /* ENCODED_ENTITY */) {
335 const valueToken = this._advance();
336 valueTokens.push(valueToken);
337 if (valueToken.type === 17 /* ATTR_VALUE_INTERPOLATION */) {
338 // For backward compatibility we decode HTML entities that appear in interpolation
339 // expressions. This is arguably a bug, but it could be a considerable breaking change to
340 // fix it. It should be addressed in a larger project to refactor the entire parser/lexer
341 // chain after View Engine has been removed.
342 value += valueToken.parts.join('').replace(/&([^;]+);/g, decodeEntity);
343 }
344 else if (valueToken.type === 9 /* ENCODED_ENTITY */) {
345 value += valueToken.parts[0];
346 }
347 else {
348 value += valueToken.parts.join('');
349 }
350 valueEnd = attrEnd = valueToken.sourceSpan.end;
351 }
352 }
353 // Consume any quote
354 if (this._peek.type === 15 /* ATTR_QUOTE */) {
355 const quoteToken = this._advance();
356 attrEnd = quoteToken.sourceSpan.end;
357 }
358 const valueSpan = valueStartSpan && valueEnd &&
359 new ParseSourceSpan(valueStartSpan.start, valueEnd, valueStartSpan.fullStart);
360 return new html.Attribute(fullName, value, new ParseSourceSpan(attrName.sourceSpan.start, attrEnd, attrName.sourceSpan.fullStart), attrName.sourceSpan, valueSpan, valueTokens.length > 0 ? valueTokens : undefined, undefined);
361 }
362 _getParentElement() {
363 return this._elementStack.length > 0 ? this._elementStack[this._elementStack.length - 1] : null;
364 }
365 _addToParent(node) {
366 const parent = this._getParentElement();
367 if (parent != null) {
368 parent.children.push(node);
369 }
370 else {
371 this.rootNodes.push(node);
372 }
373 }
374 _getElementFullName(prefix, localName, parentElement) {
375 if (prefix === '') {
376 prefix = this.getTagDefinition(localName).implicitNamespacePrefix || '';
377 if (prefix === '' && parentElement != null) {
378 const parentTagName = splitNsName(parentElement.name)[1];
379 const parentTagDefinition = this.getTagDefinition(parentTagName);
380 if (!parentTagDefinition.preventNamespaceInheritance) {
381 prefix = getNsPrefix(parentElement.name);
382 }
383 }
384 }
385 return mergeNsAndName(prefix, localName);
386 }
387}
388function lastOnStack(stack, element) {
389 return stack.length > 0 && stack[stack.length - 1] === element;
390}
391/**
392 * Decode the `entity` string, which we believe is the contents of an HTML entity.
393 *
394 * If the string is not actually a valid/known entity then just return the original `match` string.
395 */
396function decodeEntity(match, entity) {
397 if (NAMED_ENTITIES[entity] !== undefined) {
398 return NAMED_ENTITIES[entity] || match;
399 }
400 if (/^#x[a-f0-9]+$/i.test(entity)) {
401 return String.fromCodePoint(parseInt(entity.slice(2), 16));
402 }
403 if (/^#\d+$/.test(entity)) {
404 return String.fromCodePoint(parseInt(entity.slice(1), 10));
405 }
406 return match;
407}
408//# sourceMappingURL=data:application/json;base64,
\No newline at end of file