1 | function objectify() {
|
2 |
|
3 | const fsp = require('fs-promise');
|
4 | const path = require('path');
|
5 | const chalk = require('chalk');
|
6 | const cheerio = require("cheerio");
|
7 | const wordcount = require('wordcount');
|
8 |
|
9 | const headerTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'];
|
10 |
|
11 | let book = {};
|
12 |
|
13 | fsp.readJson(path.join('.', '.abelonerc'))
|
14 | .then((abelonerc) => {
|
15 |
|
16 | book.START_PAGE = abelonerc.START_PAGE;
|
17 |
|
18 | fsp.readFile(path.join('interim', 'normalized.html'), { encoding: 'utf8' })
|
19 | .then((contents) => {
|
20 |
|
21 | const $ = cheerio.load(contents);
|
22 |
|
23 | $('body').children().each((i, elem) => {
|
24 |
|
25 | let key = '';
|
26 | let val = '';
|
27 | let len = '';
|
28 |
|
29 |
|
30 | if ($(elem)[0].name === 'p' || headerTags.indexOf($(elem)[0].name) > -1) {
|
31 | key = $(elem)[0].name;
|
32 | val = $(elem).text();
|
33 | len = wordcount($(elem).text());
|
34 | } else if ($(elem)[0].name === 'img') {
|
35 | key = $(elem)[0].name;
|
36 | val = $(elem).attr('src');
|
37 | len = '';
|
38 | } else {
|
39 | console.log('We have a situation Houston.');
|
40 | }
|
41 |
|
42 | const elemObj = {};
|
43 |
|
44 | elemObj[key] = val;
|
45 | elemObj.len = len;
|
46 |
|
47 | book[ i ] = elemObj;
|
48 |
|
49 |
|
50 | });
|
51 |
|
52 |
|
53 | }).then(() => {
|
54 |
|
55 | fsp.mkdirs(path.join('interim', 'tmp'))
|
56 | .then(() => {
|
57 | fsp.writeFile(path.join('.', 'interim', 'tmp', '.prebook'), JSON.stringify(book, null, 2))
|
58 | .then(() => {
|
59 | console.log(chalk.blue('Prebook object saved.'));
|
60 | }).catch((err) => {
|
61 | if (err)
|
62 | return console.log(chalk.bold.red('Failed to write abelone URL', err));
|
63 | });
|
64 | })
|
65 | .catch((err) => {
|
66 | if (err)
|
67 | return console.log(chalk.bold.red('Failed to write abelone URL', err));
|
68 |
|
69 | });
|
70 |
|
71 |
|
72 | }).catch((err) => {
|
73 | if (err)
|
74 | console.log(chalk.bold.red('Failed to pick up contents', err));
|
75 |
|
76 | });
|
77 |
|
78 |
|
79 |
|
80 |
|
81 | }).catch((err) => {
|
82 | if (err)
|
83 | console.log(chalk.red('Couldn\'t read abelonerc', err));
|
84 | });
|
85 |
|
86 | }
|
87 |
|
88 | module.exports.objectify = objectify;
|