UNPKG

921 BJavaScriptView Raw
1const cheerio = require('cheerio');
2const _ = require('underscore');
3
4function parse(html) {
5 let $ = cheerio.load(html, {
6 decodeEntities: false
7 });
8 let $el = $('html, body').first();
9 return $el.length > 0 ? $el : $;
10}
11
12/**
13 * Get root element
14 * @param {*} $
15 */
16function root($) {
17 let $el = $('html, body, > div').first();
18 return $el.length > 0 ? $el : $.root();
19}
20
21/**
22 * Get element children text content
23 * @param {*}
24 */
25function textNode($el) {
26 return _.reduce($el.children, function (text, e) {
27 if (e.type == 'text') text += e.data;
28 return text;
29 }, '');
30}
31
32/**
33 * remove div element
34 * @param {*}
35 * @param {*} $
36 */
37function cleanup($el, $) {
38 $el.find('div').each(function () {
39 let $div = $(this);
40 cleanup($div, $);
41
42 $div.replaceWith($div.html());
43 });
44
45 return $el;
46}
47
48module.exports = {
49 parse: parse,
50 textNode: textNode,
51 root: root,
52 cleanup: cleanup
53};