1 | var _ = require('lodash');
|
2 | var htmlParser = require('htmlparser2');
|
3 |
|
4 | module.exports = function (html) {
|
5 | var DOM;
|
6 | var handler = new htmlParser.DomHandler(function (err, dom) {
|
7 | DOM = dom;
|
8 | });
|
9 | var parser = new htmlParser.Parser(handler, {
|
10 | xmlMode: true,
|
11 | lowerCaseTags: false,
|
12 | lowerCaseAttributeNames: false
|
13 | });
|
14 |
|
15 | parser.write(html);
|
16 | parser.end();
|
17 |
|
18 | var tree = [];
|
19 |
|
20 | _.forEach(DOM, forEachChild(tree));
|
21 |
|
22 | return tree;
|
23 | };
|
24 |
|
25 | function forEachChild(tree) {
|
26 | return function (child) {
|
27 | var type = child.type;
|
28 |
|
29 | if (type !== 'tag' && type !== 'text' && type !== 'comment') {
|
30 | return;
|
31 | }
|
32 |
|
33 | var newChild = {};
|
34 |
|
35 | if (type === 'text' || type === 'comment') {
|
36 | newChild.name = '#' + type;
|
37 |
|
38 | if (type === 'text') {
|
39 | var value = _.trim(child.data);
|
40 |
|
41 | if (!value) {
|
42 | return;
|
43 | }
|
44 |
|
45 | newChild.value = value;
|
46 | } else {
|
47 | newChild.value = child.data;
|
48 | }
|
49 | } else {
|
50 | newChild.name = child.name;
|
51 |
|
52 | if (!_.isEmpty(child.attribs)) {
|
53 | newChild.attrs = child.attribs;
|
54 | }
|
55 |
|
56 | if (child.children.length) {
|
57 | _.forEach(child.children, forEachChild(newChild.children = []));
|
58 | }
|
59 | }
|
60 |
|
61 | tree.push(newChild);
|
62 | };
|
63 | }
|