1 | const {Parser} = require('htmlparser2');
|
2 |
|
3 |
|
4 |
|
5 |
|
6 | const defaultOptions = {lowerCaseTags: false, lowerCaseAttributeNames: false};
|
7 |
|
8 | const defaultDirectives = [{name: '!doctype', start: '<', end: '>'}];
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 | function postHTMLParser(html, options) {
|
17 | const bufArray = [];
|
18 | const results = [];
|
19 |
|
20 | bufArray.last = function () {
|
21 | return this[this.length - 1];
|
22 | };
|
23 |
|
24 | function isDirective({name}, tag) {
|
25 | if (name instanceof RegExp) {
|
26 | const regex = new RegExp(name.source, 'i');
|
27 |
|
28 | return regex.test(tag);
|
29 | }
|
30 |
|
31 | if (tag !== name) {
|
32 | return false;
|
33 | }
|
34 |
|
35 | return true;
|
36 | }
|
37 |
|
38 | function parserDirective(name, data) {
|
39 | const directives = [].concat(defaultDirectives, options.directives || []);
|
40 | const last = bufArray.last();
|
41 |
|
42 | for (const directive of directives) {
|
43 | const directiveText = directive.start + data + directive.end;
|
44 |
|
45 | name = name.toLowerCase();
|
46 | if (isDirective(directive, name)) {
|
47 | if (!last) {
|
48 | results.push(directiveText);
|
49 | return;
|
50 | }
|
51 |
|
52 | if (last.content === undefined) {
|
53 | last.content = [];
|
54 | }
|
55 |
|
56 | last.content.push(directiveText);
|
57 | }
|
58 | }
|
59 | }
|
60 |
|
61 | function normalizeArributes(attrs) {
|
62 | const result = {};
|
63 | Object.keys(attrs).forEach(key => {
|
64 | const object = {};
|
65 | object[key] = attrs[key].replace(/"/g, '"');
|
66 | Object.assign(result, object);
|
67 | });
|
68 |
|
69 | return result;
|
70 | }
|
71 |
|
72 | const parser = new Parser({
|
73 | onprocessinginstruction: parserDirective,
|
74 | oncomment(data) {
|
75 | const comment = `<!--${data}-->`;
|
76 | const last = bufArray.last();
|
77 |
|
78 | if (!last) {
|
79 | results.push(comment);
|
80 | return;
|
81 | }
|
82 |
|
83 | if (last.content === undefined) {
|
84 | last.content = [];
|
85 | }
|
86 |
|
87 | last.content.push(comment);
|
88 | },
|
89 | onopentag(tag, attrs) {
|
90 | const buf = {tag};
|
91 |
|
92 | if (Object.keys(attrs).length > 0) {
|
93 | buf.attrs = normalizeArributes(attrs);
|
94 | }
|
95 |
|
96 | bufArray.push(buf);
|
97 | },
|
98 | onclosetag() {
|
99 | const buf = bufArray.pop();
|
100 |
|
101 | if (!bufArray.length > 0) {
|
102 | results.push(buf);
|
103 | return;
|
104 | }
|
105 |
|
106 | const last = bufArray.last();
|
107 | if (!Array.isArray(last.content)) {
|
108 | last.content = [];
|
109 | }
|
110 |
|
111 | last.content.push(buf);
|
112 | },
|
113 | ontext(text) {
|
114 | const last = bufArray.last();
|
115 |
|
116 | if (!last) {
|
117 | results.push(text);
|
118 | return;
|
119 | }
|
120 |
|
121 | if (last.content && last.content.length > 0 && typeof last.content[last.content.length - 1] === 'string') {
|
122 | last.content[last.content.length - 1] = `${last.content[last.content.length - 1]}${text}`;
|
123 | return;
|
124 | }
|
125 |
|
126 | if (last.content === undefined) {
|
127 | last.content = [];
|
128 | }
|
129 |
|
130 | last.content.push(text);
|
131 | }
|
132 | }, options || defaultOptions);
|
133 |
|
134 | parser.write(html);
|
135 | parser.end();
|
136 |
|
137 | return results;
|
138 | }
|
139 |
|
140 | function parserWrapper(...args) {
|
141 | let option;
|
142 |
|
143 | function parser(html) {
|
144 | const opt = {...defaultOptions, ...option};
|
145 | return postHTMLParser(html, opt);
|
146 | }
|
147 |
|
148 | if (
|
149 | args.length === 1 &&
|
150 | Boolean(args[0]) &&
|
151 | args[0].constructor.name === 'Object'
|
152 | ) {
|
153 | option = args[0];
|
154 | return parser;
|
155 | }
|
156 |
|
157 | option = args[1];
|
158 | return parser(args[0]);
|
159 | }
|
160 |
|
161 | module.exports = parserWrapper;
|
162 | module.exports.defaultOptions = defaultOptions;
|
163 | module.exports.defaultDirectives = defaultDirectives;
|