UNPKG

2.49 kBJavaScriptView Raw
1const dom = require('./dom');
2
3const SELECTOR_LIST = 'ol, ul';
4const SELECTOR_LINK = '> a, p > a';
5const SELECTOR_PART = 'h2, h3, h4';
6
7const utils = require('../utils');
8
9
10function findList($parent) {
11 let $container = $parent.children('.olist');
12 if ($container.length > 0) $parent = $container.first();
13
14 return $parent.children(SELECTOR_LIST);
15}
16
17function parseList($ul, $) {
18 let articles = [];
19
20 $ul.children('li').each(function() {
21 let article = {};
22 let $li = $(this);
23 let $p = $li.children('p');
24 article.title = ($p.text() || dom.textNode($li.get(0))).trim();
25
26 let $a = $li.find(SELECTOR_LINK);
27 if ($a.length > 0) {
28 article.title = $a.first().text();
29 article.ref = $a.attr('href').replace(/\\/g, '/').replace(/^\/+/, '');
30 }
31
32 let $sub = findList($li);
33 article.articles = parseList($sub, $);
34
35 if (!article.title) return;
36 articles.push(article);
37 });
38
39 return articles;
40}
41
42function findParts($parent, $) {
43 let partsAndLists = $parent.children(SELECTOR_LIST + ', ' + SELECTOR_PART);
44
45 let parts = [];
46 let previousPart = null;
47
48 partsAndLists.each(function (i, el) {
49 if (isPartNode(el)) {
50 if (previousPart !== null) {
51 parts.push(previousPart);
52 }
53 previousPart = {
54 title: getPartTitle(el, $),
55 list: null
56 };
57
58 } else {
59 if (previousPart !== null) {
60 previousPart.list = el;
61 } else {
62 previousPart = {
63 title: '',
64 list: el
65 };
66 }
67 parts.push(previousPart);
68 previousPart = null;
69 }
70 });
71
72 if (previousPart !== null) {
73 parts.push(previousPart);
74 }
75
76 return parts;
77}
78
79function isPartNode(el) {
80 return SELECTOR_PART.indexOf(el.name) !== -1;
81}
82
83
84function getPartTitle(el, $) {
85 return $(el).text().trim();
86}
87
88function parseSummary(html) {
89 let $ = dom.parse(html);
90
91 let $root = dom.cleanup(dom.root($), $);
92
93 let parts = findParts($root, $);
94
95 let parsedParts = [];
96 let part;
97 for (let i = 0; i < parts.length; ++i) {
98 part = parts[i];
99 parsedParts.push({
100 title: part.title,
101 articles: parseList($(part.list), $)
102 });
103 }
104 return parsedParts;
105}
106
107module.exports = parseSummary;