1 | #!/usr/bin/env node
|
2 | 'use strict';
|
3 | var fs = require('fs');
|
4 | var path = require('path');
|
5 | var process = require('process');
|
6 | var domino = require('../../');
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 | var NAMESPACE = {
|
13 | html: 'http://www.w3.org/1999/xhtml',
|
14 | xml: 'http://www.w3.org/XML/1998/namespace',
|
15 | xmlns: 'http://www.w3.org/2000/xmlns/',
|
16 | math: 'http://www.w3.org/1998/Math/MathML',
|
17 | svg: 'http://www.w3.org/2000/svg',
|
18 | xlink: 'http://www.w3.org/1999/xlink'
|
19 | };
|
20 |
|
21 |
|
22 | var EMPTY = {
|
23 | area: true,
|
24 | base: true,
|
25 | basefont: true,
|
26 | bgsound: true,
|
27 | br: true,
|
28 | col: true,
|
29 | embed: true,
|
30 | frame: true,
|
31 | hr: true,
|
32 | img: true,
|
33 | input: true,
|
34 | keygen: true,
|
35 | link: true,
|
36 | meta: true,
|
37 | param: true,
|
38 | source: true,
|
39 | track: true,
|
40 | wbr: true
|
41 | };
|
42 | var EXTRA_NL = {
|
43 | |
44 |
|
45 |
|
46 |
|
47 |
|
48 | };
|
49 | var NO_ESCAPE = {
|
50 | style: true, script: true, xmp:true, iframe:true, noembed:true,
|
51 | noframes:true, plaintext:true,
|
52 | noscript: true
|
53 | };
|
54 |
|
55 | var localname = function(namestring) {
|
56 | return namestring.replace(/^(svg|math|xlink|xml|xmlns) /, '');
|
57 | };
|
58 | var namespace = function(namestring) {
|
59 | var m = /^(svg|math|xlink|xml|xmlns) /.exec(namestring);
|
60 |
|
61 | return m ? NAMESPACE[m[1]] : undefined;
|
62 | };
|
63 |
|
64 | var ParseError = function ParseError(desc, filename, input) {
|
65 | Error.call(this);
|
66 | this.name = this.constructor.name;
|
67 | this.message = desc + ' ['+filename+']: ' + JSON.stringify(input);
|
68 | };
|
69 | ParseError.prototype = Object.create(Error.prototype);
|
70 | ParseError.prototype.constructor = ParseError;
|
71 |
|
72 |
|
73 | var list_tests = function() {
|
74 | var base = path.join(__dirname, '..', 'html5lib-tests', 'tree-construction');
|
75 | var testfiles = fs.readdirSync(base).filter(function(filename) {
|
76 | return /\.dat$/.test(filename);
|
77 | }).map(function(f) { return path.normalize(path.join(base, f)); });
|
78 | testfiles.sort();
|
79 | return testfiles;
|
80 | };
|
81 |
|
82 | var parse_test_file = function(filename) {
|
83 | var basename = path.basename(filename, '.dat');
|
84 | var cases = fs.readFileSync(filename, 'utf8').replace(/\n$/,'')
|
85 | .split(/\n\n(?=#data\n)/g);
|
86 | return cases.map(function(c) {
|
87 | return twiddle_test(basename, parse_one_test(basename, c));
|
88 | });
|
89 | };
|
90 |
|
91 | var parse_one_test = function(filename, testcase) {
|
92 | var m = /^#data\n(?:([^]*?)\n)?(?:#script-(on|off)\n)?#errors\n((?:[^\n]*\n)*?)(?:#document-fragment\n([^\n]*)\n)?(?:#script-(on|off)\n)?#document\n([^]*?)$/.exec(testcase+'\n');
|
93 | if (!m) {
|
94 | throw new ParseError("Can't parse test case", filename, testcase);
|
95 | }
|
96 |
|
97 |
|
98 |
|
99 | var fragment = m[4] ? { name: localname(m[4]), ns:namespace(m[4]) } :
|
100 | undefined;
|
101 | return {
|
102 |
|
103 | data: m[1] || '',
|
104 | errors: m[3].split(/\n/g).slice(0,-1),
|
105 | fragment: fragment,
|
106 | script: m[2] || m[5],
|
107 | document: serialize_doc(filename, fragment, m[6])
|
108 | };
|
109 | };
|
110 |
|
111 |
|
112 |
|
113 | var serialize_doc = function(filename, fragment, doc) {
|
114 | var result = "", stack = [], can_add_attr = false, props = {tags:{}};
|
115 | var root = { children: [] }, parent, obj;
|
116 | if (fragment) { root.tag = fragment.name; root.ns = fragment.ns; }
|
117 | var clear_add_attr = function() {
|
118 | if (can_add_attr) {
|
119 | result += '>';
|
120 | can_add_attr = false;
|
121 | }
|
122 | };
|
123 | var pop_stack = function() {
|
124 | clear_add_attr();
|
125 | var old = stack.pop();
|
126 | if (old.content !== true) {
|
127 | if (old.ns===namespace('html') && EMPTY[old.tag]) {
|
128 | if (old.children.length > 0) {
|
129 | throw new ParseError("Empty elements ("+old.tag+") can't have children",
|
130 | filename, doc);
|
131 | }
|
132 | } else {
|
133 | result += '</' + old.tag + '>';
|
134 | }
|
135 | }
|
136 |
|
137 | if (old.children.length===0) { old.children = undefined; }
|
138 | if (old.attrs && old.attrs.length===0) { old.attrs = undefined; }
|
139 | return old;
|
140 | };
|
141 | var stack_top = function() {
|
142 | if (stack.length === 0) { return root; }
|
143 | return stack[stack.length-1];
|
144 | };
|
145 | var escape = function(s) {
|
146 | return s.replace(/[&<>\u00A0]/g, function(c) {
|
147 | switch(c) {
|
148 | case '&': return '&';
|
149 | case '<': return '<';
|
150 | case '>': return '>';
|
151 | case '\u00A0': return ' ';
|
152 | }
|
153 | });
|
154 | };
|
155 | var escapeAttr = function(s) {
|
156 | return s.replace(/[&"\u00A0]/g, function(c) {
|
157 | switch(c) {
|
158 | case '&': return '&';
|
159 | case '"': return '"';
|
160 | case '\u00A0': return ' ';
|
161 | }
|
162 | });
|
163 | };
|
164 |
|
165 | while (doc.length > 0) {
|
166 | var m = /^\| ((?: )*)(?:<([^!?>][^>]*)>|([^="\n][^=\n]*)="([^"]*)"|"((?:[^"]|"(?!\n))*)"|<!-- ((?:[^](?!-->))*) -->|<!DOCTYPE ([^>]*)>|<\?([^>]+)>|(content))\n/.exec(doc);
|
167 | if (!m) {
|
168 | throw new ParseError('Bad document line', filename, doc);
|
169 | }
|
170 | doc = doc.slice(m[0].length);
|
171 | var indent = m[1].length / 2;
|
172 | while (indent < stack.length) {
|
173 | pop_stack();
|
174 | }
|
175 | if (indent !== stack.length) {
|
176 | throw new ParseError('Indentation error', filename, doc);
|
177 | }
|
178 | var tagname = m[2], attrname = m[3], attrvalue = m[4];
|
179 | var text = m[5], comment = m[6], doctype = m[7], processing = m[8];
|
180 | var template_content = m[9];
|
181 | if (attrname !== undefined) {
|
182 | if (!can_add_attr)
|
183 | throw new ParseError('Late attribute', filename, m);
|
184 | obj = {
|
185 | name:localname(attrname),
|
186 | ns:namespace(attrname),
|
187 | value:attrvalue
|
188 | };
|
189 | if (attrvalue !== escapeAttr(attrvalue)) {
|
190 | obj.escaped = props.escaped = true;
|
191 | }
|
192 | var serializedName;
|
193 | if (obj.ns === namespace('html')) {
|
194 | serializedName = obj.name;
|
195 | } else if (obj.ns === NAMESPACE.xml) {
|
196 | serializedName = 'xml:' + obj.name;
|
197 | } else if (obj.ns == NAMESPACE.xmlns) {
|
198 | if (obj.name === 'xmlns') {
|
199 | serializedName = 'xmlns';
|
200 | } else {
|
201 | serializedName = 'xmlns:' + obj.name;
|
202 | }
|
203 | } else if (obj.ns === NAMESPACE.xlink) {
|
204 | serializedName = 'xlink:' + obj.name;
|
205 | } else {
|
206 | throw new Error("don't know what qualified name to use");
|
207 | }
|
208 | result += ' ' + serializedName + '="' + escapeAttr(obj.value) + '"';
|
209 | stack_top().attrs.push(obj);
|
210 | if (/[<"]/.test(serializedName)) {
|
211 | props.attrWithFunnyChar = true;
|
212 | }
|
213 | continue;
|
214 | }
|
215 | clear_add_attr();
|
216 | if (tagname !== undefined) {
|
217 | result += '<' + localname(tagname);
|
218 | can_add_attr = true;
|
219 | props.tags[tagname] = true;
|
220 | if (/</.test(tagname)) {
|
221 | props.tagWithLt = true;
|
222 | }
|
223 | parent = stack_top();
|
224 | stack.push({
|
225 | tag: localname(tagname),
|
226 | ns: namespace(tagname),
|
227 | attrs: [],
|
228 | children: []
|
229 | });
|
230 | parent.children.push(stack_top());
|
231 | continue;
|
232 | }
|
233 | if (text !== undefined) {
|
234 | obj = { text: text };
|
235 | if (stack_top().ns === namespace('html') &&
|
236 | NO_ESCAPE[stack_top().tag]) {
|
237 | obj.no_escape = props.no_escape = true;
|
238 | }
|
239 | if (stack_top().ns === namespace('html') &&
|
240 | EXTRA_NL[stack_top().tag] &&
|
241 | stack_top().children.length === 0 &&
|
242 | /^\n/.test(text)) {
|
243 | result += '\n';
|
244 | obj.extraNL = props.extraNL = true;
|
245 | }
|
246 | if (text !== escape(text) && !obj.no_escape) {
|
247 | obj.escaped = props.escaped = true;
|
248 | }
|
249 | result += obj.no_escape ? text : escape(text);
|
250 | stack_top().children.push(obj);
|
251 | continue;
|
252 | }
|
253 | if (comment !== undefined) {
|
254 | result += '<!--' + comment + '-->';
|
255 | props.comment = true;
|
256 | stack_top().children.push({ comment: comment });
|
257 | continue;
|
258 | }
|
259 | if (doctype !== undefined) {
|
260 |
|
261 |
|
262 | result += '<!DOCTYPE ' + doctype.replace(/ .*$/, '') + '>';
|
263 | props.doctype = true;
|
264 | stack_top().children.push({ doctype: doctype });
|
265 | continue;
|
266 | }
|
267 | if (processing !== undefined) {
|
268 | result += '<?' + processing + '>';
|
269 | props.processing = true;
|
270 | stack_top().children.push({ processing: processing });
|
271 | continue;
|
272 | }
|
273 | if (template_content !== undefined) {
|
274 | parent = stack_top();
|
275 | stack.push({content:true, children:[]});
|
276 | parent.children.push(stack_top());
|
277 | can_add_attr = false;
|
278 | props.template = true;
|
279 | continue;
|
280 | }
|
281 | throw new ParseError("Unknown line type", filename, m);
|
282 | }
|
283 | while (stack.length > 0) {
|
284 | pop_stack();
|
285 | }
|
286 | return {
|
287 | props: props,
|
288 | tree: root.children,
|
289 | html: result
|
290 | };
|
291 | };
|
292 |
|
293 | var twiddle_test = function(filename, tc) {
|
294 |
|
295 |
|
296 | var expected = tc.document.html;
|
297 |
|
298 |
|
299 | if (/^isindex$/.test(filename) &&
|
300 | /<isindex name="A" action="B" prompt="C" foo="D"/.test(tc.data) &&
|
301 | /<isindex action="B" foo="D" name="A" prompt="C"/.test(expected)) {
|
302 | expected = expected.replace(/<(isindex) (action="B") (foo="D") (name="A") (prompt="C")/,
|
303 | '<$1 $4 $2 $5 $3');
|
304 | }
|
305 | if (/^tests(9|10)$/.test(filename) &&
|
306 | /<(g|mi) xml:lang=en xlink:href=foo/.test(tc.data) &&
|
307 | /<(g|mi) xlink:href="foo" xml:lang="en"/.test(expected)) {
|
308 | expected = expected.replace(/<(g|mi) (xlink[^> ]+) (xml[^> ]+)/g,
|
309 | '<$1 $3 $2');
|
310 | }
|
311 | if (filename==='tests19' &&
|
312 | /<html c=d>.*<html a=b>/.test(tc.data) &&
|
313 | /<html a="b" c="d">/.test(expected)) {
|
314 | expected = expected.replace(/a="b" c="d"/, 'c="d" a="b"');
|
315 | }
|
316 | if (filename==='tests19' &&
|
317 | /http-equiv="content-type" content="[^\"]+"/.test(tc.data) &&
|
318 | /content="[^\"]+" http-equiv="content-type"/.test(expected)) {
|
319 | expected = expected.replace(/(content=[^> ]+) (http-equiv=[^> ]+)/g, '$2 $1');
|
320 | }
|
321 | if (filename==='tests23' &&
|
322 | /size=4 id=a/.test(tc.data) &&
|
323 | /id="a" size="4"/.test(expected)) {
|
324 | expected = expected.replace(/(id=[^> ]+) (size=[^> ]+)/g, '$2 $1');
|
325 | }
|
326 | if (filename==='tests26' &&
|
327 | /<code code="" x<="">/.test(expected)) {
|
328 | expected = expected.replace(/(code=[^> ]+) (x<=[^> ]+)/g, '$2 $1');
|
329 | }
|
330 | if (filename==='webkit01' &&
|
331 | /<rdar: 6869687="" problem="">/.test(expected)) {
|
332 | expected = expected.replace(/(6869687=[^> ]+) (problem=[^> ]+)/g, '$2 $1');
|
333 | }
|
334 | tc.document.html = expected;
|
335 | // Will this pass if parsed as a <body> fragment in no-quirks mode?
|
336 | // This property is used by some third-party consumers of the parsed
|
337 | // tests.
|
338 | var dd = domino.createDocument();
|
339 | dd.body.innerHTML = tc.data;
|
340 | tc.document.noQuirksBodyHtml = dd.body.innerHTML;
|
341 |
|
342 | return tc;
|
343 | };
|
344 |
|
345 | var result = list_tests().reduce(function(result, filename){
|
346 | result[path.basename(filename)] = parse_test_file(filename);
|
347 | return result;
|
348 | }, {});
|
349 | //console.log(JSON.stringify(result, null, 2));
|
350 | if (process.argv[2]) {
|
351 | fs.writeFileSync(process.argv[2], JSON.stringify(result, null, 2), 'utf8');
|
352 | console.warn('Wrote', process.argv[2]);
|
353 | } else {
|
354 | console.log(JSON.stringify(result, null, 2));
|
355 | }
|