UNPKG

1.24 kBJavaScriptView Raw
1var _ = require('lodash');
2var htmlParser = require('htmlparser2');
3
4module.exports = function (html) {
5 var DOM;
6 var handler = new htmlParser.DomHandler(function (err, dom) {
7 DOM = dom;
8 });
9 var parser = new htmlParser.Parser(handler, {
10 xmlMode: true,
11 lowerCaseTags: false,
12 lowerCaseAttributeNames: false
13 });
14
15 parser.write(html);
16 parser.end();
17
18 var tree = [];
19
20 _.forEach(DOM, forEachChild(tree));
21
22 return tree;
23};
24
25function forEachChild(tree) {
26 return function (child) {
27 var type = child.type;
28
29 if (type !== 'tag' && type !== 'text' && type !== 'comment') {
30 return;
31 }
32
33 var newChild = {};
34
35 if (type === 'text' || type === 'comment') {
36 newChild.name = '#' + type;
37
38 if (type === 'text') {
39 var value = _.trim(child.data);
40
41 if (!value) {
42 return;
43 }
44
45 newChild.value = value;
46 } else {
47 newChild.value = child.data;
48 }
49 } else {
50 newChild.name = child.name;
51
52 if (!_.isEmpty(child.attribs)) {
53 newChild.attrs = child.attribs;
54 }
55
56 if (child.children.length) {
57 _.forEach(child.children, forEachChild(newChild.children = []));
58 }
59 }
60
61 tree.push(newChild);
62 };
63}