UNPKG

13 kBJavaScriptView Raw
1/**
2 * @licstart The following is the entire license notice for the
3 * JavaScript code in this page
4 *
5 * Copyright 2022 Mozilla Foundation
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 * @licend The above is the entire license notice for the
20 * JavaScript code in this page
21 */
22"use strict";
23
24Object.defineProperty(exports, "__esModule", {
25 value: true
26});
27exports.XMLParserErrorCode = exports.XMLParserBase = exports.SimpleXMLParser = exports.SimpleDOMNode = void 0;
28
29var _core_utils = require("./core_utils.js");
30
31const XMLParserErrorCode = {
32 NoError: 0,
33 EndOfDocument: -1,
34 UnterminatedCdat: -2,
35 UnterminatedXmlDeclaration: -3,
36 UnterminatedDoctypeDeclaration: -4,
37 UnterminatedComment: -5,
38 MalformedElement: -6,
39 OutOfMemory: -7,
40 UnterminatedAttributeValue: -8,
41 UnterminatedElement: -9,
42 ElementNeverBegun: -10
43};
44exports.XMLParserErrorCode = XMLParserErrorCode;
45
46function isWhitespace(s, index) {
47 const ch = s[index];
48 return ch === " " || ch === "\n" || ch === "\r" || ch === "\t";
49}
50
51function isWhitespaceString(s) {
52 for (let i = 0, ii = s.length; i < ii; i++) {
53 if (!isWhitespace(s, i)) {
54 return false;
55 }
56 }
57
58 return true;
59}
60
61class XMLParserBase {
62 _resolveEntities(s) {
63 return s.replace(/&([^;]+);/g, (all, entity) => {
64 if (entity.substring(0, 2) === "#x") {
65 return String.fromCodePoint(parseInt(entity.substring(2), 16));
66 } else if (entity.substring(0, 1) === "#") {
67 return String.fromCodePoint(parseInt(entity.substring(1), 10));
68 }
69
70 switch (entity) {
71 case "lt":
72 return "<";
73
74 case "gt":
75 return ">";
76
77 case "amp":
78 return "&";
79
80 case "quot":
81 return '"';
82
83 case "apos":
84 return "'";
85 }
86
87 return this.onResolveEntity(entity);
88 });
89 }
90
91 _parseContent(s, start) {
92 const attributes = [];
93 let pos = start;
94
95 function skipWs() {
96 while (pos < s.length && isWhitespace(s, pos)) {
97 ++pos;
98 }
99 }
100
101 while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "/") {
102 ++pos;
103 }
104
105 const name = s.substring(start, pos);
106 skipWs();
107
108 while (pos < s.length && s[pos] !== ">" && s[pos] !== "/" && s[pos] !== "?") {
109 skipWs();
110 let attrName = "",
111 attrValue = "";
112
113 while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== "=") {
114 attrName += s[pos];
115 ++pos;
116 }
117
118 skipWs();
119
120 if (s[pos] !== "=") {
121 return null;
122 }
123
124 ++pos;
125 skipWs();
126 const attrEndChar = s[pos];
127
128 if (attrEndChar !== '"' && attrEndChar !== "'") {
129 return null;
130 }
131
132 const attrEndIndex = s.indexOf(attrEndChar, ++pos);
133
134 if (attrEndIndex < 0) {
135 return null;
136 }
137
138 attrValue = s.substring(pos, attrEndIndex);
139 attributes.push({
140 name: attrName,
141 value: this._resolveEntities(attrValue)
142 });
143 pos = attrEndIndex + 1;
144 skipWs();
145 }
146
147 return {
148 name,
149 attributes,
150 parsed: pos - start
151 };
152 }
153
154 _parseProcessingInstruction(s, start) {
155 let pos = start;
156
157 function skipWs() {
158 while (pos < s.length && isWhitespace(s, pos)) {
159 ++pos;
160 }
161 }
162
163 while (pos < s.length && !isWhitespace(s, pos) && s[pos] !== ">" && s[pos] !== "?" && s[pos] !== "/") {
164 ++pos;
165 }
166
167 const name = s.substring(start, pos);
168 skipWs();
169 const attrStart = pos;
170
171 while (pos < s.length && (s[pos] !== "?" || s[pos + 1] !== ">")) {
172 ++pos;
173 }
174
175 const value = s.substring(attrStart, pos);
176 return {
177 name,
178 value,
179 parsed: pos - start
180 };
181 }
182
183 parseXml(s) {
184 let i = 0;
185
186 while (i < s.length) {
187 const ch = s[i];
188 let j = i;
189
190 if (ch === "<") {
191 ++j;
192 const ch2 = s[j];
193 let q;
194
195 switch (ch2) {
196 case "/":
197 ++j;
198 q = s.indexOf(">", j);
199
200 if (q < 0) {
201 this.onError(XMLParserErrorCode.UnterminatedElement);
202 return;
203 }
204
205 this.onEndElement(s.substring(j, q));
206 j = q + 1;
207 break;
208
209 case "?":
210 ++j;
211
212 const pi = this._parseProcessingInstruction(s, j);
213
214 if (s.substring(j + pi.parsed, j + pi.parsed + 2) !== "?>") {
215 this.onError(XMLParserErrorCode.UnterminatedXmlDeclaration);
216 return;
217 }
218
219 this.onPi(pi.name, pi.value);
220 j += pi.parsed + 2;
221 break;
222
223 case "!":
224 if (s.substring(j + 1, j + 3) === "--") {
225 q = s.indexOf("-->", j + 3);
226
227 if (q < 0) {
228 this.onError(XMLParserErrorCode.UnterminatedComment);
229 return;
230 }
231
232 this.onComment(s.substring(j + 3, q));
233 j = q + 3;
234 } else if (s.substring(j + 1, j + 8) === "[CDATA[") {
235 q = s.indexOf("]]>", j + 8);
236
237 if (q < 0) {
238 this.onError(XMLParserErrorCode.UnterminatedCdat);
239 return;
240 }
241
242 this.onCdata(s.substring(j + 8, q));
243 j = q + 3;
244 } else if (s.substring(j + 1, j + 8) === "DOCTYPE") {
245 const q2 = s.indexOf("[", j + 8);
246 let complexDoctype = false;
247 q = s.indexOf(">", j + 8);
248
249 if (q < 0) {
250 this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
251 return;
252 }
253
254 if (q2 > 0 && q > q2) {
255 q = s.indexOf("]>", j + 8);
256
257 if (q < 0) {
258 this.onError(XMLParserErrorCode.UnterminatedDoctypeDeclaration);
259 return;
260 }
261
262 complexDoctype = true;
263 }
264
265 const doctypeContent = s.substring(j + 8, q + (complexDoctype ? 1 : 0));
266 this.onDoctype(doctypeContent);
267 j = q + (complexDoctype ? 2 : 1);
268 } else {
269 this.onError(XMLParserErrorCode.MalformedElement);
270 return;
271 }
272
273 break;
274
275 default:
276 const content = this._parseContent(s, j);
277
278 if (content === null) {
279 this.onError(XMLParserErrorCode.MalformedElement);
280 return;
281 }
282
283 let isClosed = false;
284
285 if (s.substring(j + content.parsed, j + content.parsed + 2) === "/>") {
286 isClosed = true;
287 } else if (s.substring(j + content.parsed, j + content.parsed + 1) !== ">") {
288 this.onError(XMLParserErrorCode.UnterminatedElement);
289 return;
290 }
291
292 this.onBeginElement(content.name, content.attributes, isClosed);
293 j += content.parsed + (isClosed ? 2 : 1);
294 break;
295 }
296 } else {
297 while (j < s.length && s[j] !== "<") {
298 j++;
299 }
300
301 const text = s.substring(i, j);
302 this.onText(this._resolveEntities(text));
303 }
304
305 i = j;
306 }
307 }
308
309 onResolveEntity(name) {
310 return `&${name};`;
311 }
312
313 onPi(name, value) {}
314
315 onComment(text) {}
316
317 onCdata(text) {}
318
319 onDoctype(doctypeContent) {}
320
321 onText(text) {}
322
323 onBeginElement(name, attributes, isEmpty) {}
324
325 onEndElement(name) {}
326
327 onError(code) {}
328
329}
330
331exports.XMLParserBase = XMLParserBase;
332
333class SimpleDOMNode {
334 constructor(nodeName, nodeValue) {
335 this.nodeName = nodeName;
336 this.nodeValue = nodeValue;
337 Object.defineProperty(this, "parentNode", {
338 value: null,
339 writable: true
340 });
341 }
342
343 get firstChild() {
344 return this.childNodes && this.childNodes[0];
345 }
346
347 get nextSibling() {
348 const childNodes = this.parentNode.childNodes;
349
350 if (!childNodes) {
351 return undefined;
352 }
353
354 const index = childNodes.indexOf(this);
355
356 if (index === -1) {
357 return undefined;
358 }
359
360 return childNodes[index + 1];
361 }
362
363 get textContent() {
364 if (!this.childNodes) {
365 return this.nodeValue || "";
366 }
367
368 return this.childNodes.map(function (child) {
369 return child.textContent;
370 }).join("");
371 }
372
373 get children() {
374 return this.childNodes || [];
375 }
376
377 hasChildNodes() {
378 return this.childNodes && this.childNodes.length > 0;
379 }
380
381 searchNode(paths, pos) {
382 if (pos >= paths.length) {
383 return this;
384 }
385
386 const component = paths[pos];
387 const stack = [];
388 let node = this;
389
390 while (true) {
391 if (component.name === node.nodeName) {
392 if (component.pos === 0) {
393 const res = node.searchNode(paths, pos + 1);
394
395 if (res !== null) {
396 return res;
397 }
398 } else if (stack.length === 0) {
399 return null;
400 } else {
401 const [parent] = stack.pop();
402 let siblingPos = 0;
403
404 for (const child of parent.childNodes) {
405 if (component.name === child.nodeName) {
406 if (siblingPos === component.pos) {
407 return child.searchNode(paths, pos + 1);
408 }
409
410 siblingPos++;
411 }
412 }
413
414 return node.searchNode(paths, pos + 1);
415 }
416 }
417
418 if (node.childNodes && node.childNodes.length !== 0) {
419 stack.push([node, 0]);
420 node = node.childNodes[0];
421 } else if (stack.length === 0) {
422 return null;
423 } else {
424 while (stack.length !== 0) {
425 const [parent, currentPos] = stack.pop();
426 const newPos = currentPos + 1;
427
428 if (newPos < parent.childNodes.length) {
429 stack.push([parent, newPos]);
430 node = parent.childNodes[newPos];
431 break;
432 }
433 }
434
435 if (stack.length === 0) {
436 return null;
437 }
438 }
439 }
440 }
441
442 dump(buffer) {
443 if (this.nodeName === "#text") {
444 buffer.push((0, _core_utils.encodeToXmlString)(this.nodeValue));
445 return;
446 }
447
448 buffer.push(`<${this.nodeName}`);
449
450 if (this.attributes) {
451 for (const attribute of this.attributes) {
452 buffer.push(` ${attribute.name}="${(0, _core_utils.encodeToXmlString)(attribute.value)}"`);
453 }
454 }
455
456 if (this.hasChildNodes()) {
457 buffer.push(">");
458
459 for (const child of this.childNodes) {
460 child.dump(buffer);
461 }
462
463 buffer.push(`</${this.nodeName}>`);
464 } else if (this.nodeValue) {
465 buffer.push(`>${(0, _core_utils.encodeToXmlString)(this.nodeValue)}</${this.nodeName}>`);
466 } else {
467 buffer.push("/>");
468 }
469 }
470
471}
472
473exports.SimpleDOMNode = SimpleDOMNode;
474
475class SimpleXMLParser extends XMLParserBase {
476 constructor({
477 hasAttributes = false,
478 lowerCaseName = false
479 }) {
480 super();
481 this._currentFragment = null;
482 this._stack = null;
483 this._errorCode = XMLParserErrorCode.NoError;
484 this._hasAttributes = hasAttributes;
485 this._lowerCaseName = lowerCaseName;
486 }
487
488 parseFromString(data) {
489 this._currentFragment = [];
490 this._stack = [];
491 this._errorCode = XMLParserErrorCode.NoError;
492 this.parseXml(data);
493
494 if (this._errorCode !== XMLParserErrorCode.NoError) {
495 return undefined;
496 }
497
498 const [documentElement] = this._currentFragment;
499
500 if (!documentElement) {
501 return undefined;
502 }
503
504 return {
505 documentElement
506 };
507 }
508
509 onText(text) {
510 if (isWhitespaceString(text)) {
511 return;
512 }
513
514 const node = new SimpleDOMNode("#text", text);
515
516 this._currentFragment.push(node);
517 }
518
519 onCdata(text) {
520 const node = new SimpleDOMNode("#text", text);
521
522 this._currentFragment.push(node);
523 }
524
525 onBeginElement(name, attributes, isEmpty) {
526 if (this._lowerCaseName) {
527 name = name.toLowerCase();
528 }
529
530 const node = new SimpleDOMNode(name);
531 node.childNodes = [];
532
533 if (this._hasAttributes) {
534 node.attributes = attributes;
535 }
536
537 this._currentFragment.push(node);
538
539 if (isEmpty) {
540 return;
541 }
542
543 this._stack.push(this._currentFragment);
544
545 this._currentFragment = node.childNodes;
546 }
547
548 onEndElement(name) {
549 this._currentFragment = this._stack.pop() || [];
550
551 const lastElement = this._currentFragment.at(-1);
552
553 if (!lastElement) {
554 return null;
555 }
556
557 for (let i = 0, ii = lastElement.childNodes.length; i < ii; i++) {
558 lastElement.childNodes[i].parentNode = lastElement;
559 }
560
561 return lastElement;
562 }
563
564 onError(code) {
565 this._errorCode = code;
566 }
567
568}
569
570exports.SimpleXMLParser = SimpleXMLParser;
\No newline at end of file