UNPKG

7.47 kBJavaScriptView Raw
1'use strict';
2
3// TODO need to test
4
5const END_TAG = /^<\/([-A-Za-z0-9_]+)[^>]*>/;
6const ATTRIBUTE = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g;
7const START_TAG = /^<([-A-Za-z0-9_]+)((?:\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/;
8
9module.exports = {
10
11 types: {
12 SPECIAL: [
13 'script','style'
14 ],
15 CLOSE_SELF: [
16 'colgroup','dd','dt','li','options',
17 'p','td','tfoot','th','thead','tr'
18 ],
19 EMPTY: [
20 'area','base','basefont','br','col','frame','hr','img','input',
21 'link','meta','param','embed','command','keygen','source','track','wbr'
22 ],
23 FILL_ATTRIBUTES: [
24 'checked','compact','declare','defer','disabled','ismap',
25 'multiple','nohref','noresize','noshade','nowrap','readonly','selected'
26 ],
27 INLINE: [
28 'abbr','acronym','applet','b','basefont','bdo','big','br','button',
29 'cite','code','del','dfn','em','font','i','iframe','img','input','ins',
30 'kbd','label','map','object','q','s','samp','script','select','small',
31 'span','strike','strong','sub','sup','textarea','tt','u','var'
32 ],
33 BLOCK: [
34 'a','address','article','applet','aside','audio','blockquote','button','canvas',
35 'center','dd','del','dir','div','dl','dt','fieldset','figcaption','figure','footer',
36 'form','frameset','h1','h2','h3','h4','h5','h6','header','hgroup','hr','iframe','ins',
37 'isindex','li','map','menu','noframes','noscript','object','ol','output','p','pre','section',
38 'script','table','tbody','td','tfoot','th','thead','tr','ul','video','svg'
39 ]
40 },
41
42 is: function (type, name) {
43 return this.types[type].indexOf(name) !== -1;
44 },
45
46 createTagStart: function (tag, attributes) {
47 let result = `<${tag}`;
48
49 for (let attribute of attributes) {
50 if (attribute.value) {
51 result += ` ${attribute.name}="${attribute.value}"`;
52 } else {
53 result += ` ${attribute.name}`;
54 }
55 }
56
57 result += '>';
58
59 return result;
60 },
61
62 parseStartTag: function (data, tag, name, rest, unary) {
63 name = name.toLowerCase();
64
65 if (this.is('BLOCK', name)) {
66
67 while ( data.stack.last() && this.is('INLINE', data.stack.last()) ) {
68 this.parseEndTag(data, '', data.stack.last());
69 }
70
71 }
72
73 if ( this.is('CLOSE_SELF', name) && data.stack.last() === name ) {
74 this.parseEndTag(data, '', name);
75 }
76
77 unary = this.is('EMPTY', name) || !!unary;
78
79 if (!unary) {
80 data.stack.push(name);
81 }
82
83 if (data.start) {
84 var attributes = [];
85
86 rest.replace(ATTRIBUTE, function (match, name) {
87
88 var value = arguments[2] ? arguments[2] :
89 arguments[3] ? arguments[3] :
90 arguments[4] ? arguments[4] :
91 this.is('FILL_ATTRIBUTES', name) ? name : '';
92
93 attributes.push({
94 name: name,
95 value: value
96 // escaped: value.replace(/(^|[^\\])"/g, '$1\\\"')
97 });
98
99 });
100
101 if (data.start) {
102 data.start(name, attributes, unary);
103 }
104
105 }
106
107 },
108
109 parseEndTag: function (data, tag, name) {
110 let position;
111
112 // If no tag name is provided, clean shop
113 if (!name) {
114
115 position = 0;
116
117 } else {
118
119 // Find the closest opened tag of the same type
120 for (position = data.stack.length - 1; position >= 0; position--) {
121
122 if (data.stack[position] === name) {
123 break;
124 }
125
126 }
127
128 }
129
130 if (position >= 0) {
131
132 // Close all the open elements, up the stack
133 for (var i = data.stack.length - 1; i >= position; i--) {
134
135 if (data.end) {
136 data.end(data.stack[i]);
137 }
138
139 }
140
141 // Remove the open elements from the stack
142 data.stack.length = position;
143 }
144
145 },
146
147 html: function (data) {
148
149 data.stack = [];
150 data.last = data.html;
151
152 if (data.html.indexOf('<!DOCTYPE html>') === 0) {
153 data.html = data.html.slice(15);
154 }
155
156 data.stack.last = function () {
157 return this[this.length-1];
158 };
159
160 while (data.html) {
161 let isChars = true;
162 let index, match;
163
164 // Make sure we are not in a script or style element
165 if ( !data.stack.last() || !this.is('SPECIAL', data.stack.last()) ) {
166
167 // Comment
168 if (data.html.indexOf('<!--') === 0) {
169 index = data.html.indexOf('-->');
170
171 if (index >= 0) {
172
173 if (data.comment) {
174 data.comment(data.html.substring(4, index));
175 }
176
177 data.html = data.html.substring(index + 3);
178 isChars = false;
179 }
180
181 // end tag
182 } else if (data.html.indexOf('</') === 0) {
183 match = data.html.match(END_TAG);
184
185 if (match) {
186 data.html = data.html.substring(match[0].length);
187 match[0].replace(END_TAG, this.parseEndTag.bind(this, data));
188 isChars = false;
189 }
190
191 // start tag
192 } else if (data.html.indexOf('<') === 0) {
193 match = data.html.match(START_TAG);
194
195 if (match) {
196 data.html = data.html.substring(match[0].length);
197 match[0].replace(START_TAG, this.parseStartTag.bind(this, data));
198 isChars = false;
199 }
200
201 }
202
203 if (isChars) {
204 index = data.html.indexOf('<');
205
206 var text = index < 0 ? data.html : data.html.substring(0, index);
207 data.html = index < 0 ? '' : data.html.substring(index);
208
209 if (data.chars) {
210 data.chars(text);
211 }
212
213 }
214
215 } else {
216 var pattern = new RegExp(`([\\s\\S]*?)<\/${data.stack.last()}[^>]*>`);
217
218 data.html = data.html.replace(pattern, function (all, text) {
219 text = text.replace(/<!--([\s\S]*?)-->|<!\[CDATA\[([\s\S]*?)]]>/g, '$1$2');
220
221 if (data.chars) {
222 data.chars(text);
223 }
224
225 return '';
226 });
227
228 this.parseEndTag(data, '', data.stack.last());
229 }
230
231 if (data.html === data.last) {
232 throw `Parse Error: ${html}`;
233 }
234
235 data.last = data.html;
236 }
237
238 // Clean up any remaining tags
239 this.parseEndTag(data);
240
241 }
242
243};
244
245/*
246 Parser.html(string, {
247 start: function (tag, attributes, unary) {},
248 end: function (tag) {},
249 chars: function (text) {},
250 comment: function (text) {}
251 });
252*/