1 | /// <reference types="node" />
|
2 |
|
3 | import * as stream from "stream";
|
4 | import * as events from "events";
|
5 |
|
6 |
|
7 | // Markup data
|
8 | //-----------------------------------------------------------------------------------
|
9 | declare namespace MarkupData {
|
10 | interface Location {
|
11 | /**
|
12 | * One-based line index
|
13 | */
|
14 | line: number;
|
15 | /**
|
16 | * One-based column index
|
17 | */
|
18 | col: number;
|
19 | /**
|
20 | * Zero-based first character index
|
21 | */
|
22 | startOffset: number;
|
23 | /**
|
24 | * Zero-based last character index
|
25 | */
|
26 | endOffset: number;
|
27 | }
|
28 |
|
29 | interface AttributesLocation {
|
30 | [attributeName: string]: Location;
|
31 | }
|
32 |
|
33 | interface StartTagLocation extends Location {
|
34 | /**
|
35 | * Start tag attributes' location info
|
36 | */
|
37 | attrs: AttributesLocation
|
38 | }
|
39 |
|
40 | interface ElementLocation extends StartTagLocation {
|
41 | /**
|
42 | * Element's start tag location info.
|
43 | */
|
44 | startTag: StartTagLocation;
|
45 | /**
|
46 | * Element's end tag location info.
|
47 | */
|
48 | endTag: Location;
|
49 | }
|
50 | }
|
51 |
|
52 | // Options
|
53 | //-----------------------------------------------------------------------------------
|
54 | declare namespace Options {
|
55 | export interface ParserOptions {
|
56 | /**
|
57 | * Enables source code location information for the nodes. When enabled, each node (except root node) has the `__location` property.
|
58 | * In case the node is not an empty element, `__location` will be {@link MarkupData.ElementLocation} object, otherwise it's {@link MarkupData.Location}.
|
59 | * If the element was implicitly created by the parser it's `__location` property will be null.
|
60 | *
|
61 | * **Default:** `false`
|
62 | */
|
63 | locationInfo?: boolean;
|
64 | /**
|
65 | * Specifies the resulting tree format.
|
66 | *
|
67 | * **Default:** `treeAdapters.default`
|
68 | */
|
69 | treeAdapter?: AST.TreeAdapter;
|
70 | }
|
71 |
|
72 | export interface SAXParserOptions {
|
73 | /**
|
74 | * Enables source code location information for the tokens.
|
75 | * When enabled, each token event handler will receive {@link MarkupData.Location} (or {@link MarkupData.StartTagLocation})
|
76 | * object as its last argument.
|
77 | */
|
78 | locationInfo?: boolean;
|
79 | }
|
80 |
|
81 | export interface SerializerOptions {
|
82 | /***
|
83 | * Specifies input tree format.
|
84 | *
|
85 | * **Default:** `treeAdapters.default`
|
86 | */
|
87 | treeAdapter?: AST.TreeAdapter;
|
88 | }
|
89 | }
|
90 |
|
91 |
|
92 | // AST
|
93 | //-----------------------------------------------------------------------------------
|
94 | declare namespace AST {
|
95 | /**
|
96 | * [Document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
|
97 | */
|
98 | type DocumentMode = 'no-quirks' | 'quirks' | 'limited-quirks';
|
99 |
|
100 | // Default tree adapter
|
101 | namespace Default {
|
102 | /**
|
103 | * Element attribute.
|
104 | */
|
105 | interface Attribute {
|
106 | /**
|
107 | * The name of the attribute.
|
108 | */
|
109 | name: string;
|
110 | /**
|
111 | * The value of the attribute.
|
112 | */
|
113 | value: string;
|
114 | /**
|
115 | * The namespace of the attribute.
|
116 | */
|
117 | namespace?: string;
|
118 | /**
|
119 | * The namespace-related prefix of the attribute.
|
120 | */
|
121 | prefix?: string;
|
122 | }
|
123 |
|
124 | /**
|
125 | * [Default tree adapter]{@link parse5.treeAdapters} Node interface.
|
126 | */
|
127 | interface Node {
|
128 | /**
|
129 | * The name of the node. E.g. {@link Document} will have `nodeName` equal to '#document'`.
|
130 | */
|
131 | nodeName: string;
|
132 | }
|
133 |
|
134 | /**
|
135 | * [Default tree adapter]{@link parse5.treeAdapters} ParentNode interface.
|
136 | */
|
137 | interface ParentNode {
|
138 | /**
|
139 | * Child nodes.
|
140 | */
|
141 | childNodes: Node[];
|
142 | }
|
143 |
|
144 | /**
|
145 | * [Default tree adapter]{@link parse5.treeAdapters} DocumentType interface.
|
146 | */
|
147 | export interface DocumentType extends Node {
|
148 | /**
|
149 | * The name of the node.
|
150 | */
|
151 | nodeName: '#documentType';
|
152 | /**
|
153 | * Document type name.
|
154 | */
|
155 | name: string;
|
156 | /**
|
157 | * Document type public identifier.
|
158 | */
|
159 | publicId: string;
|
160 | /**
|
161 | * Document type system identifier.
|
162 | */
|
163 | systemId: string;
|
164 | }
|
165 |
|
166 | /**
|
167 | * [Default tree adapter]{@link parse5.treeAdapters} Document interface.
|
168 | */
|
169 | export interface Document extends ParentNode {
|
170 | /**
|
171 | * The name of the node.
|
172 | */
|
173 | nodeName: '#document';
|
174 | /**
|
175 | * [Document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
|
176 | */
|
177 | mode: DocumentMode;
|
178 | }
|
179 |
|
180 | /**
|
181 | * [Default tree adapter]{@link parse5.treeAdapters} DocumentFragment interface.
|
182 | */
|
183 | export interface DocumentFragment extends ParentNode {
|
184 | /**
|
185 | * The name of the node.
|
186 | */
|
187 | nodeName: '#document-fragment';
|
188 | }
|
189 |
|
190 | /**
|
191 | * [Default tree adapter]{@link parse5.treeAdapters} Element interface.
|
192 | */
|
193 | export interface Element extends ParentNode {
|
194 | /**
|
195 | * The name of the node. Equals to element {@link tagName}.
|
196 | */
|
197 | nodeName: string;
|
198 | /**
|
199 | * Element tag name.
|
200 | */
|
201 | tagName: string;
|
202 | /**
|
203 | * Element namespace.
|
204 | */
|
205 | namespaceURI: string;
|
206 | /**
|
207 | * List of element attributes.
|
208 | */
|
209 | attrs: Attribute[];
|
210 | /**
|
211 | * Parent node.
|
212 | */
|
213 | parentNode: ParentNode;
|
214 | /**
|
215 | * Element source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
216 | */
|
217 | __location?: MarkupData.ElementLocation;
|
218 | }
|
219 |
|
220 | /**
|
221 | * [Default tree adapter]{@link parse5.treeAdapters} CommentNode interface.
|
222 | */
|
223 | export interface CommentNode extends Node {
|
224 | /**
|
225 | * The name of the node.
|
226 | */
|
227 | nodeName: '#comment';
|
228 | /**
|
229 | * Comment text.
|
230 | */
|
231 | data: string;
|
232 | /**
|
233 | * Parent node.
|
234 | */
|
235 | parentNode: ParentNode;
|
236 | /**
|
237 | * Comment source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
238 | */
|
239 | __location?: MarkupData.Location;
|
240 | }
|
241 |
|
242 | /**
|
243 | * [Default tree adapter]{@link parse5.treeAdapters} TextNode interface.
|
244 | */
|
245 | export interface TextNode extends Node {
|
246 | /**
|
247 | * The name of the node.
|
248 | */
|
249 | nodeName: '#text';
|
250 | /**
|
251 | * Text content.
|
252 | */
|
253 | value: string;
|
254 | /**
|
255 | * Parent node.
|
256 | */
|
257 | parentNode: ParentNode;
|
258 | /**
|
259 | * Text node source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
260 | */
|
261 | __location?: MarkupData.Location;
|
262 | }
|
263 | }
|
264 |
|
265 |
|
266 | // htmlparser2 tree adapter
|
267 | namespace HtmlParser2 {
|
268 | /**
|
269 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} Node interface.
|
270 | */
|
271 | interface Node {
|
272 | /**
|
273 | * The type of the node. E.g. {@link Document} will have `type` equal to 'root'`.
|
274 | */
|
275 | type: string;
|
276 | /**
|
277 | * [DOM spec](https://dom.spec.whatwg.org/#dom-node-nodetype)-compatible node {@link type}.
|
278 | */
|
279 | nodeType: number;
|
280 | /**
|
281 | * Parent node.
|
282 | */
|
283 | parent: ParentNode;
|
284 | /**
|
285 | * Same as {@link parent}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
286 | */
|
287 | parentNode: ParentNode;
|
288 | /**
|
289 | * Previous sibling.
|
290 | */
|
291 | prev: Node;
|
292 | /**
|
293 | * Same as {@link prev}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
294 | */
|
295 | previousSibling: Node;
|
296 | /**
|
297 | * Next sibling.
|
298 | */
|
299 | next: Node;
|
300 | /**
|
301 | * Same as {@link next}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
302 | */
|
303 | nextSibling: Node;
|
304 | }
|
305 |
|
306 | /**
|
307 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} ParentNode interface.
|
308 | */
|
309 | interface ParentNode extends Node {
|
310 | /**
|
311 | * Child nodes.
|
312 | */
|
313 | children: Node[];
|
314 | /**
|
315 | * Same as {@link children}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
316 | */
|
317 | childNodes: Node[];
|
318 | /**
|
319 | * First child of the node.
|
320 | */
|
321 | firstChild: Node;
|
322 | /**
|
323 | * Last child of the node.
|
324 | */
|
325 | lastChild: Node;
|
326 | }
|
327 |
|
328 | /**
|
329 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} DocumentType interface.
|
330 | */
|
331 | export interface DocumentType extends Node {
|
332 | /**
|
333 | * The type of the node.
|
334 | */
|
335 | type: 'directive';
|
336 | /**
|
337 | * Node name.
|
338 | */
|
339 | name: '!doctype';
|
340 | /**
|
341 | * Serialized doctype {@link name}, {@link publicId} and {@link systemId}.
|
342 | */
|
343 | data: string;
|
344 | /**
|
345 | * Document type name.
|
346 | */
|
347 | 'x-name':string;
|
348 | /**
|
349 | * Document type public identifier.
|
350 | */
|
351 | 'x-publicId': string;
|
352 | /**
|
353 | * Document type system identifier.
|
354 | */
|
355 | 'x-systemId': string;
|
356 | }
|
357 |
|
358 | /**
|
359 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} Document interface.
|
360 | */
|
361 | export interface Document extends ParentNode {
|
362 | /**
|
363 | * The type of the node.
|
364 | */
|
365 | type: 'root';
|
366 | /**
|
367 | * The name of the node.
|
368 | */
|
369 | name: 'root';
|
370 | /**
|
371 | * [Document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
|
372 | */
|
373 | 'x-mode': DocumentMode;
|
374 | }
|
375 |
|
376 | /**
|
377 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} DocumentFragment interface.
|
378 | */
|
379 | export interface DocumentFragment extends ParentNode {
|
380 | /**
|
381 | * The type of the node.
|
382 | */
|
383 | type: 'root';
|
384 | /**
|
385 | * The name of the node.
|
386 | */
|
387 | name: 'root';
|
388 | }
|
389 |
|
390 | /**
|
391 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} Element interface.
|
392 | */
|
393 | export interface Element extends ParentNode {
|
394 | /**
|
395 | * The name of the node. Equals to element {@link tagName}.
|
396 | */
|
397 | name: string;
|
398 | /**
|
399 | * Element tag name.
|
400 | */
|
401 | tagName: string;
|
402 | /**
|
403 | * Element namespace.
|
404 | */
|
405 | namespace: string;
|
406 | /**
|
407 | * Element attributes.
|
408 | */
|
409 | attribs: { [name: string]: string };
|
410 | /**
|
411 | * Element attribute namespaces.
|
412 | */
|
413 | 'x-attribsNamespace': { [name: string]: string };
|
414 | /**
|
415 | * Element attribute namespace-related prefixes.
|
416 | */
|
417 | 'x-attribsPrefix': { [name: string]: string };
|
418 | /**
|
419 | * Element source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
420 | */
|
421 | __location?: MarkupData.ElementLocation;
|
422 | }
|
423 |
|
424 | /**
|
425 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} CommentNode interface.
|
426 | */
|
427 | export interface CommentNode extends Node {
|
428 | /**
|
429 | * The name of the node.
|
430 | */
|
431 | name: 'comment';
|
432 | /**
|
433 | * Comment text.
|
434 | */
|
435 | data: string;
|
436 | /**
|
437 | * Same as {@link data}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
438 | */
|
439 | nodeValue: string;
|
440 | /**
|
441 | * Comment source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
442 | */
|
443 | __location?: MarkupData.Location;
|
444 | }
|
445 |
|
446 | /**
|
447 | * [htmlparser2 tree adapter]{@link parse5.treeAdapters} TextNode interface.
|
448 | */
|
449 | export interface TextNode extends Node {
|
450 | /**
|
451 | * The name of the node.
|
452 | */
|
453 | name: 'text';
|
454 | /**
|
455 | * Text content.
|
456 | */
|
457 | data: string;
|
458 | /**
|
459 | * Same as {@link data}. [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
|
460 | */
|
461 | nodeValue: string;
|
462 | /**
|
463 | * Comment source code location info. Available if location info is enabled via {@link Options.ParserOptions}.
|
464 | */
|
465 | __location?: MarkupData.Location;
|
466 | }
|
467 | }
|
468 |
|
469 |
|
470 | // Unions
|
471 | // NOTE: we use `Object` in unions to support custom tree adapter implementations.
|
472 | // TypeScript Handbook suggests to always use `any` instead of `Object`, but in that
|
473 | // case language service hints `any` as type, instead of actual union name.
|
474 | /**
|
475 | * Generic Node interface.
|
476 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.Node}) to get access to the properties.
|
477 | */
|
478 | type Node = Default.Node | HtmlParser2.Node | Object;
|
479 | /**
|
480 | * Generic ParentNode interface.
|
481 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.ParentNode}) to get access to the properties.
|
482 | */
|
483 | type ParentNode = Default.ParentNode | HtmlParser2.ParentNode | Object;
|
484 | /**
|
485 | * Generic DocumentType interface.
|
486 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.DocumentType}) to get access to the properties.
|
487 | */
|
488 | type DocumentType = Default.DocumentType | HtmlParser2.DocumentType | Object;
|
489 | /**
|
490 | * Generic Document interface.
|
491 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.Document}) to get access to the properties.
|
492 | */
|
493 | type Document = Default.Document | HtmlParser2.Document | Object;
|
494 | /**
|
495 | * Generic DocumentFragment interface.
|
496 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.DocumentFragment}) to get access to the properties.
|
497 | */
|
498 | type DocumentFragment = Default.DocumentFragment | HtmlParser2.DocumentFragment | Object;
|
499 | /**
|
500 | * Generic Element interface.
|
501 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.Element}) to get access to the properties.
|
502 | */
|
503 | type Element = Default.Element | HtmlParser2.Element | Object;
|
504 | /**
|
505 | * Generic TextNode interface.
|
506 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.TextNode}) to get access to the properties.
|
507 | */
|
508 | type TextNode = Default.TextNode | HtmlParser2.TextNode | Object;
|
509 | /**
|
510 | * Generic CommentNode interface.
|
511 | * Cast to the actual AST interface (e.g. {@link parse5.AST.Default.CommentNode}) to get access to the properties.
|
512 | */
|
513 | type CommentNode = Default.CommentNode | HtmlParser2.CommentNode | Object;
|
514 |
|
515 |
|
516 | // Tree adapter interface
|
517 | //-----------------------------------------------------------------------------------
|
518 |
|
519 | /**
|
520 | * Tree adapter is a set of utility functions that provides minimal required abstraction layer beetween parser and a specific AST format.
|
521 | * Note that `TreeAdapter` is not designed to be a general purpose AST manipulation library. You can build such library
|
522 | * on top of existing `TreeAdapter` or use one of the existing libraries from npm.
|
523 | *
|
524 | * @see [default implementation](https://github.com/inikulin/parse5/blob/master/lib/tree_adapters/default.js)
|
525 | */
|
526 | export interface TreeAdapter {
|
527 | /**
|
528 | * Creates a document node.
|
529 | */
|
530 | createDocument(): AST.Document;
|
531 | /**
|
532 | * Creates a document fragment node.
|
533 | */
|
534 | createDocumentFragment(): AST.DocumentFragment;
|
535 | /**
|
536 | * Creates an element node.
|
537 | *
|
538 | * @param tagName - Tag name of the element.
|
539 | * @param namespaceURI - Namespace of the element.
|
540 | * @param attrs - Attribute name-value pair array. Foreign attributes may contain `namespace` and `prefix` fields as well.
|
541 | */
|
542 | createElement(tagName: string, namespaceURI: string, attrs: AST.Default.Attribute[]): AST.Element;
|
543 | /**
|
544 | * Creates a comment node.
|
545 | *
|
546 | * @param data - Comment text.
|
547 | */
|
548 | createCommentNode(data: string): AST.CommentNode;
|
549 | /**
|
550 | * Appends a child node to the given parent node.
|
551 | *
|
552 | * @param parentNode - Parent node.
|
553 | * @param newNode - Child node.
|
554 | */
|
555 | appendChild(parentNode: AST.ParentNode, newNode: AST.Node): void;
|
556 | /**
|
557 | * Inserts a child node to the given parent node before the given reference node.
|
558 | *
|
559 | * @param parentNode - Parent node.
|
560 | * @param newNode - Child node.
|
561 | * @param referenceNode - Reference node.
|
562 | */
|
563 | insertBefore(parentNode: AST.ParentNode, newNode: AST.Node, referenceNode: AST.Node): void;
|
564 | /**
|
565 | * Sets the `<template>` element content element.
|
566 | *
|
567 | * @param templateElement - `<template>` element.
|
568 | * @param contentElement - Content element.
|
569 | */
|
570 | setTemplateContent(templateElement: AST.Element, contentElement: AST.DocumentFragment): void;
|
571 | /**
|
572 | * Returns the `<template>` element content element.
|
573 | *
|
574 | * @param templateElement - `<template>` element.
|
575 | */
|
576 | getTemplateContent(templateElement: AST.Element): AST.DocumentFragment;
|
577 | /**
|
578 | * Sets the document type. If the `document` already contains a document type node, the `name`, `publicId` and `systemId`
|
579 | * properties of this node will be updated with the provided values. Otherwise, creates a new document type node
|
580 | * with the given properties and inserts it into the `document`.
|
581 | *
|
582 | * @param document - Document node.
|
583 | * @param name - Document type name.
|
584 | * @param publicId - Document type public identifier.
|
585 | * @param systemId - Document type system identifier.
|
586 | */
|
587 | setDocumentType(document: AST.Document, name: string, publicId: string, systemId: string): void;
|
588 | /**
|
589 | * Sets the [document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
|
590 | *
|
591 | * @param document - Document node.
|
592 | * @param mode - Document mode.
|
593 | */
|
594 | setDocumentMode(document: AST.Document, mode: AST.DocumentMode): void;
|
595 | /**
|
596 | * Returns [document mode](https://dom.spec.whatwg.org/#concept-document-limited-quirks).
|
597 | *
|
598 | * @param document - Document node.
|
599 | */
|
600 | getDocumentMode(document: AST.Document): AST.DocumentMode;
|
601 | /**
|
602 | * Removes a node from its parent.
|
603 | *
|
604 | * @param node - Node to remove.
|
605 | */
|
606 | detachNode(node: AST.Node): void;
|
607 | /**
|
608 | * Inserts text into a node. If the last child of the node is a text node, the provided text will be appended to the
|
609 | * text node content. Otherwise, inserts a new text node with the given text.
|
610 | *
|
611 | * @param parentNode - Node to insert text into.
|
612 | * @param text - Text to insert.
|
613 | */
|
614 | insertText(parentNode: AST.ParentNode, text: string): void;
|
615 | /**
|
616 | * Inserts text into a sibling node that goes before the reference node. If this sibling node is the text node,
|
617 | * the provided text will be appended to the text node content. Otherwise, inserts a new sibling text node with
|
618 | * the given text before the reference node.
|
619 | *
|
620 | * @param parentNode - Node to insert text into.
|
621 | * @param text - Text to insert.
|
622 | * @param referenceNode - Node to insert text before.
|
623 | */
|
624 | insertTextBefore(parentNode: AST.ParentNode, text: string, referenceNode: AST.Node): void;
|
625 | /**
|
626 | * Copies attributes to the given element. Only attributes that are not yet present in the element are copied.
|
627 | *
|
628 | * @param recipient - Element to copy attributes into.
|
629 | * @param attrs - Attributes to copy.
|
630 | */
|
631 | adoptAttributes(recipient: AST.Element, attrs: AST.Default.Attribute[]): void;
|
632 | /**
|
633 | * Returns the first child of the given node.
|
634 | *
|
635 | * @param node - Node.
|
636 | */
|
637 | getFirstChild(node: AST.ParentNode): AST.Node;
|
638 | /**
|
639 | * Returns the given node's children in an array.
|
640 | *
|
641 | * @param node - Node.
|
642 | */
|
643 | getChildNodes(node: AST.ParentNode): AST.Node[];
|
644 | /**
|
645 | * Returns the given node's parent.
|
646 | *
|
647 | * @param node - Node.
|
648 | */
|
649 | getParentNode(node: AST.Node): AST.ParentNode;
|
650 | /**
|
651 | * Returns the given element's attributes in an array, in the form of name-value pairs.
|
652 | * Foreign attributes may contain `namespace` and `prefix` fields as well.
|
653 | *
|
654 | * @param element - Element.
|
655 | */
|
656 | getAttrList(element: AST.Element): AST.Default.Attribute[];
|
657 | /**
|
658 | * Returns the given element's tag name.
|
659 | *
|
660 | * @param element - Element.
|
661 | */
|
662 | getTagName(element: AST.Element): string;
|
663 | /**
|
664 | * Returns the given element's namespace.
|
665 | *
|
666 | * @param element - Element.
|
667 | */
|
668 | getNamespaceURI(element: AST.Element): string;
|
669 | /**
|
670 | * Returns the given text node's content.
|
671 | *
|
672 | * @param textNode - Text node.
|
673 | */
|
674 | getTextNodeContent(textNode: AST.TextNode): string;
|
675 | /**
|
676 | * Returns the given comment node's content.
|
677 | *
|
678 | * @param commentNode - Comment node.
|
679 | */
|
680 | getCommentNodeContent(commentNode: AST.CommentNode): string;
|
681 | /**
|
682 | * Returns the given document type node's name.
|
683 | *
|
684 | * @param doctypeNode - Document type node.
|
685 | */
|
686 | getDocumentTypeNodeName(doctypeNode: AST.DocumentType): string;
|
687 | /**
|
688 | * Returns the given document type node's public identifier.
|
689 | *
|
690 | * @param doctypeNode - Document type node.
|
691 | */
|
692 | getDocumentTypeNodePublicId(doctypeNode: AST.DocumentType): string;
|
693 | /**
|
694 | * Returns the given document type node's system identifier.
|
695 | *
|
696 | * @param doctypeNode - Document type node.
|
697 | */
|
698 | getDocumentTypeNodeSystemId(doctypeNode: AST.DocumentType): string;
|
699 | /**
|
700 | * Determines if the given node is a text node.
|
701 | *
|
702 | * @param node - Node.
|
703 | */
|
704 | isTextNode(node: AST.Node): boolean;
|
705 | /**
|
706 | * Determines if the given node is a comment node.
|
707 | *
|
708 | * @param node - Node.
|
709 | */
|
710 | isCommentNode(node: AST.Node): boolean;
|
711 | /**
|
712 | * Determines if the given node is a document type node.
|
713 | *
|
714 | * @param node - Node.
|
715 | */
|
716 | isDocumentTypeNode(node: AST.Node): boolean;
|
717 | /**
|
718 | * Determines if the given node is an element.
|
719 | *
|
720 | * @param node - Node.
|
721 | */
|
722 | isElementNode(node: AST.Node): boolean;
|
723 | }
|
724 | }
|
725 |
|
726 |
|
727 | // Included tree adapters
|
728 | //-----------------------------------------------------------------------------------
|
729 |
|
730 | /**
|
731 | * Provides built-in tree adapters that can be used for parsing and serialization.
|
732 | *
|
733 | * @example
|
734 | *```js
|
735 | *
|
736 | * const parse5 = require('parse5');
|
737 | *
|
738 | * // Uses the default tree adapter for parsing.
|
739 | * const document = parse5.parse('<div></div>', {
|
740 | * treeAdapter: parse5.treeAdapters.default
|
741 | * });
|
742 | *
|
743 | * // Uses the htmlparser2 tree adapter with the SerializerStream.
|
744 | * const serializer = new parse5.SerializerStream(node, {
|
745 | * treeAdapter: parse5.treeAdapters.htmlparser2
|
746 | * });
|
747 | * ```
|
748 | */
|
749 | export var treeAdapters: {
|
750 | /**
|
751 | * Default tree format for parse5.
|
752 | */
|
753 | default: AST.TreeAdapter,
|
754 | /**
|
755 | * Quite popular [htmlparser2](https://github.com/fb55/htmlparser2) tree format
|
756 | * (e.g. used by [cheerio](https://github.com/MatthewMueller/cheerio) and [jsdom](https://github.com/tmpvar/jsdom)).
|
757 | */
|
758 | htmlparser2: AST.TreeAdapter
|
759 | };
|
760 |
|
761 |
|
762 | // Shorthand methods
|
763 | //-----------------------------------------------------------------------------------
|
764 |
|
765 | /**
|
766 | * Parses an HTML string.
|
767 | *
|
768 | * @param html - Input HTML string.
|
769 | * @param options - Parsing options.
|
770 | *
|
771 | * @example
|
772 | * ```js
|
773 | *
|
774 | * const parse5 = require('parse5');
|
775 | *
|
776 | * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
|
777 | *
|
778 | * console.log(document.childNodes[1].tagName); //> 'html'
|
779 | * ```
|
780 | */
|
781 | export function parse(html: string, options?: Options.ParserOptions): AST.Document;
|
782 |
|
783 | /**
|
784 | * Parses an HTML fragment.
|
785 | *
|
786 | * @param fragmentContext - Parsing context element. If specified, given fragment will be parsed as if it was set to the context element's `innerHTML` property.
|
787 | * @param html - Input HTML fragment string.
|
788 | * @param options - Parsing options.
|
789 | *
|
790 | * @example
|
791 | * ```js
|
792 | *
|
793 | * const parse5 = require('parse5');
|
794 | *
|
795 | * const documentFragment = parse5.parseFragment('<table></table>');
|
796 | *
|
797 | * console.log(documentFragment.childNodes[0].tagName); //> 'table'
|
798 | *
|
799 | * // Parses the html fragment in the context of the parsed <table> element.
|
800 | * const trFragment = parser.parseFragment(documentFragment.childNodes[0], '<tr><td>Shake it, baby</td></tr>');
|
801 | *
|
802 | * console.log(trFragment.childNodes[0].childNodes[0].tagName); //> 'td'
|
803 | * ```
|
804 | */
|
805 | export function parseFragment(fragmentContext: AST.Element, html: string, options?: Options.ParserOptions): AST.DocumentFragment;
|
806 | export function parseFragment(html: string, options?: Options.ParserOptions): AST.DocumentFragment;
|
807 |
|
808 | /**
|
809 | * Serializes an AST node to an HTML string.
|
810 | *
|
811 | * @param node - Node to serialize.
|
812 | * @param options - Serialization options.
|
813 | *
|
814 | * @example
|
815 | * ```js
|
816 | *
|
817 | * const parse5 = require('parse5');
|
818 | *
|
819 | * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
|
820 | *
|
821 | * // Serializes a document.
|
822 | * const html = parse5.serialize(document);
|
823 | *
|
824 | * // Serializes the <body> element content.
|
825 | * const str = parse5.serialize(document.childNodes[1]);
|
826 | *
|
827 | * console.log(str); //> '<head></head><body>Hi there!</body>'
|
828 | * ```
|
829 | */
|
830 | export function serialize(node: AST.Node, options?: Options.SerializerOptions): string;
|
831 |
|
832 |
|
833 | // Parser stream
|
834 | //-----------------------------------------------------------------------------------
|
835 |
|
836 | /**
|
837 | * Streaming HTML parser with scripting support.
|
838 | * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
|
839 | *
|
840 | * @example
|
841 | * ```js
|
842 | *
|
843 | * const parse5 = require('parse5');
|
844 | * const http = require('http');
|
845 | *
|
846 | * // Fetch the google.com content and obtain it's <body> node
|
847 | * http.get('http://google.com', res => {
|
848 | * const parser = new parse5.ParserStream();
|
849 | *
|
850 | * parser.once('finish', () => {
|
851 | * console.log(parser.document.childNodes[1].childNodes[1].tagName); //> 'body'
|
852 | * });
|
853 | *
|
854 | * res.pipe(parser);
|
855 | * });
|
856 | * ```
|
857 | */
|
858 | export class ParserStream extends stream.Writable {
|
859 | /**
|
860 | * @param options - Parsing options.
|
861 | */
|
862 | constructor(options?: Options.ParserOptions);
|
863 |
|
864 | /**
|
865 | * The resulting document node.
|
866 | */
|
867 | document: AST.Document;
|
868 |
|
869 | /**
|
870 | * Raised then parser encounters a `<script>` element.
|
871 | * If this event has listeners, parsing will be suspended once it is emitted.
|
872 | * So, if `<script>` has the `src` attribute, you can fetch it, execute and then resume parsing just like browsers do.
|
873 | *
|
874 | * @param listener.scriptElement - The script element that caused the event.
|
875 | * @param listener.documentWrite - Write additional `html` at the current parsing position. Suitable for implementing the DOM `document.write` and `document.writeln` methods.
|
876 | * @param listener.documentWrite.html - HTML to write.
|
877 | * @param listener.resume - Resumes parsing.
|
878 | *
|
879 | * @example
|
880 | * ```js
|
881 | *
|
882 | * const parse = require('parse5');
|
883 | * const http = require('http');
|
884 | *
|
885 | * const parser = new parse5.ParserStream();
|
886 | *
|
887 | * parser.on('script', (scriptElement, documentWrite, resume) => {
|
888 | * const src = parse5.treeAdapters.default.getAttrList(scriptElement)[0].value;
|
889 | *
|
890 | * http.get(src, res => {
|
891 | * // Fetch the script content, execute it with DOM built around `parser.document` and
|
892 | * // `document.write` implemented using `documentWrite`.
|
893 | * ...
|
894 | * // Then resume parsing.
|
895 | * resume();
|
896 | * });
|
897 | * });
|
898 | *
|
899 | * parser.end('<script src="example.com/script.js"></script>');
|
900 | * ```
|
901 | */
|
902 | on(event: 'script', listener: (scriptElement: AST.Element, documentWrite: (html: string) => void, resume: () => void) => void): this;
|
903 | /**
|
904 | * WritableStream events
|
905 | */
|
906 | on(event: string, listener: Function): this;
|
907 | }
|
908 |
|
909 |
|
910 | // Plaint text conversion stream
|
911 | //-----------------------------------------------------------------------------------
|
912 |
|
913 | /**
|
914 | * Converts plain text files into HTML document as required by [HTML specification](https://html.spec.whatwg.org/#read-text).
|
915 | * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
|
916 | *
|
917 | *
|
918 | * ```js
|
919 | *
|
920 | * const parse5 = require('parse5');
|
921 | * const fs = require('fs');
|
922 | *
|
923 | * const file = fs.createReadStream('war_and_peace.txt');
|
924 | * const converter = new parse5.PlainTextConversionStream();
|
925 | *
|
926 | * converter.once('finish', () => {
|
927 | * console.log(converter.document.childNodes[1].childNodes[1].tagName); //> 'body'
|
928 | * });
|
929 | *
|
930 | * file.pipe(converter);
|
931 | * ```
|
932 | */
|
933 | export class PlainTextConversionStream extends ParserStream { }
|
934 |
|
935 |
|
936 | // SAX parser
|
937 | //-----------------------------------------------------------------------------------
|
938 | /**
|
939 | * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parser.
|
940 | * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform)
|
941 | * (which means you can pipe *through* it, see example).
|
942 | *
|
943 | * @example
|
944 | * ```js
|
945 | *
|
946 | * const parse5 = require('parse5');
|
947 | * const http = require('http');
|
948 | * const fs = require('fs');
|
949 | *
|
950 | * const file = fs.createWriteStream('/home/google.com.html');
|
951 | * const parser = new parse5.SAXParser();
|
952 | *
|
953 | * parser.on('text', text => {
|
954 | * // Handle page text content
|
955 | * ...
|
956 | * });
|
957 | *
|
958 | * http.get('http://google.com', res => {
|
959 | * // SAXParser is the Transform stream, which means you can pipe
|
960 | * // through it. So, you can analyze page content and, e.g., save it
|
961 | * // to the file at the same time:
|
962 | * res.pipe(parser).pipe(file);
|
963 | * });
|
964 | * ```
|
965 | */
|
966 | export class SAXParser extends stream.Transform {
|
967 | /**
|
968 | * @param options - Parsing options.
|
969 | */
|
970 | constructor(options?: Options.SAXParserOptions);
|
971 |
|
972 | /**
|
973 | * Raised when the parser encounters a start tag.
|
974 | *
|
975 | * @param listener.name - Tag name.
|
976 | * @param listener.attrs - List of attributes.
|
977 | * @param listener.selfClosing - Indicates if the tag is self-closing.
|
978 | * @param listener.location - Start tag source code location info. Available if location info is enabled via {@link Options.SAXParserOptions}.
|
979 | */
|
980 | on(event: 'startTag', listener: (name: string, attrs: AST.Default.Attribute[], selfClosing: boolean, location?: MarkupData.StartTagLocation) => void): this;
|
981 | /**
|
982 | * Raised then parser encounters an end tag.
|
983 | *
|
984 | * @param listener.name - Tag name.
|
985 | * @param listener.location - End tag source code location info. Available if location info is enabled via {@link Options.SAXParserOptions}.
|
986 | */
|
987 | on(event: 'endTag', listener: (name: string, location?: MarkupData.Location) => void): this;
|
988 | /**
|
989 | * Raised then parser encounters a comment.
|
990 | *
|
991 | * @param listener.text - Comment text.
|
992 | * @param listener.location - Comment source code location info. Available if location info is enabled via {@link Options.SAXParserOptions}.
|
993 | */
|
994 | on(event: 'comment', listener: (text: string, location?: MarkupData.Location) => void): this;
|
995 | /**
|
996 | * Raised then parser encounters text content.
|
997 | *
|
998 | * @param listener.text - Text content.
|
999 | * @param listener.location - Text content code location info. Available if location info is enabled via {@link Options.SAXParserOptions}.
|
1000 | */
|
1001 | on(event: 'text', listener: (text: string, location?: MarkupData.Location) => void): this;
|
1002 | /**
|
1003 | * Raised then parser encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration).
|
1004 | *
|
1005 | * @param listener.name - Document type name.
|
1006 | * @param listener.publicId - Document type public identifier.
|
1007 | * @param listener.systemId - Document type system identifier.
|
1008 | * @param listener.location - Document type declaration source code location info. Available if location info is enabled via {@link Options.SAXParserOptions}.
|
1009 | */
|
1010 | on(event: 'doctype', listener: (name: string, publicId: string, systemId: string, location?: MarkupData.Location) => void): this;
|
1011 | /**
|
1012 | * TransformStream events
|
1013 | */
|
1014 | on(event: string, listener: Function): this;
|
1015 |
|
1016 | /**
|
1017 | * Stops parsing. Useful if you want the parser to stop consuming CPU time once you've obtained the desired info
|
1018 | * from the input stream. Doesn't prevent piping, so that data will flow through the parser as usual.
|
1019 | *
|
1020 | * @example
|
1021 | * ```js
|
1022 | *
|
1023 | * const parse5 = require('parse5');
|
1024 | * const http = require('http');
|
1025 | * const fs = require('fs');
|
1026 | *
|
1027 | * const file = fs.createWriteStream('google.com.html');
|
1028 | * const parser = new parse5.SAXParser();
|
1029 | *
|
1030 | * parser.on('doctype', (name, publicId, systemId) => {
|
1031 | * // Process doctype info ans stop parsing
|
1032 | * ...
|
1033 | * parser.stop();
|
1034 | * });
|
1035 | *
|
1036 | * http.get('http://google.com', res => {
|
1037 | * // Despite the fact that parser.stop() was called whole
|
1038 | * // content of the page will be written to the file
|
1039 | * res.pipe(parser).pipe(file);
|
1040 | * });
|
1041 | * ```
|
1042 | */
|
1043 | stop(): void;
|
1044 | }
|
1045 |
|
1046 |
|
1047 | // Serializer stream
|
1048 | //-----------------------------------------------------------------------------------
|
1049 |
|
1050 | /**
|
1051 | * Streaming AST node to an HTML serializer.
|
1052 | * A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
|
1053 | *
|
1054 | * @example
|
1055 | * ```js
|
1056 | *
|
1057 | * const parse5 = require('parse5');
|
1058 | * const fs = require('fs');
|
1059 | *
|
1060 | * const file = fs.createWriteStream('/home/index.html');
|
1061 | *
|
1062 | * // Serializes the parsed document to HTML and writes it to the file.
|
1063 | * const document = parse5.parse('<body>Who is John Galt?</body>');
|
1064 | * const serializer = new parse5.SerializerStream(document);
|
1065 | *
|
1066 | * serializer.pipe(file);
|
1067 | * ```
|
1068 | */
|
1069 | export class SerializerStream extends stream.Readable {
|
1070 | /**
|
1071 | * Streaming AST node to an HTML serializer. A readable stream.
|
1072 | *
|
1073 | * @param node - Node to serialize.
|
1074 | * @param options - Serialization options.
|
1075 | */
|
1076 | constructor(node: AST.Node, options?: Options.SerializerOptions);
|
1077 | }
|
1078 |
|
\ | No newline at end of file |