1 | import fs from 'node:fs';
|
2 | import path from 'node:path';
|
3 | import process from 'node:process';
|
4 | import glob from 'glob';
|
5 | import matter from 'gray-matter';
|
6 | import lunr from 'lunr';
|
7 | import removeMd from 'remove-markdown';
|
8 | import {stripHtml} from 'string-strip-html';
|
9 | import {createFolders, getSystemLang} from './utils.js';
|
10 |
|
11 | const DEFAULT_LANGUAGE = 'ru';
|
12 | const CONTENT_PATH = 'content/**';
|
13 | const OUTPUT_INDEX_FILE = 'static/search/index.json';
|
14 | const OUTPUT_LUNR_INDEX_FILE = 'static/search/lunr-index.json';
|
15 |
|
16 | class HugoIndexer {
|
17 | constructor() {
|
18 | this.defaultLanguage = getSystemLang();
|
19 | this.input = CONTENT_PATH;
|
20 | this.output = OUTPUT_INDEX_FILE;
|
21 | this.outputLunr = OUTPUT_LUNR_INDEX_FILE;
|
22 | this.baseDir = path.dirname(this.input);
|
23 | this.extensions = ['.md', '.html'];
|
24 |
|
25 | this.indexData = {};
|
26 | this.indexData[DEFAULT_LANGUAGE] = [];
|
27 |
|
28 | this._parseArgs();
|
29 | }
|
30 |
|
31 | _parseArgs() {
|
32 | if (process.argv.includes('-l')) {
|
33 |
|
34 | this.output = process.argv[process.argv.indexOf('-l') + 1];
|
35 | }
|
36 |
|
37 | if (process.argv.includes('-i')) {
|
38 |
|
39 | this.input = process.argv[process.argv.indexOf('-i') + 1];
|
40 | console.log(process.argv.indexOf('-i'));
|
41 | }
|
42 |
|
43 | if (process.argv.includes('-o')) {
|
44 |
|
45 | this.output = process.argv[process.argv.indexOf('-o') + 1];
|
46 | }
|
47 |
|
48 | if (process.argv.includes('-ol')) {
|
49 |
|
50 | this.outputLunr = process.argv[process.argv.indexOf('-ol') + 1];
|
51 | }
|
52 | }
|
53 |
|
54 | parseContent(dirPath) {
|
55 | const files = glob.sync(dirPath);
|
56 | for (const file of files) {
|
57 | const stats = fs.lstatSync(file);
|
58 | if (stats.isFile()) {
|
59 | this.parseFile(file);
|
60 | }
|
61 | }
|
62 | }
|
63 |
|
64 | parseFile(filePath) {
|
65 | const ext = path.extname(filePath);
|
66 |
|
67 | if (!this.extensions.includes(ext)) {
|
68 | return;
|
69 | }
|
70 |
|
71 | const meta = matter.read(filePath);
|
72 | const {data: postMeta, content: postContent} = meta;
|
73 |
|
74 | let plainText = '';
|
75 | if (ext === '.md') {
|
76 | plainText = removeMd(postContent);
|
77 | } else if (ext === '.html') {
|
78 | plainText = stripHtml(postContent);
|
79 | } else {
|
80 | console.log('Sikpped file: ' + filePath);
|
81 | }
|
82 |
|
83 | let tags = [];
|
84 |
|
85 | if (postMeta.tags) {
|
86 | tags = postMeta.tags;
|
87 | }
|
88 |
|
89 | let [lang, uri] = this._getPostUrl(filePath, postMeta);
|
90 |
|
91 | const item = {
|
92 | uri,
|
93 | title: postMeta.title,
|
94 | content: plainText,
|
95 | tags,
|
96 | };
|
97 |
|
98 | if (lang) {
|
99 | item.lang = lang;
|
100 | } else {
|
101 | lang = DEFAULT_LANGUAGE;
|
102 | }
|
103 |
|
104 | const indexPosts = this.indexData[lang] || [];
|
105 | indexPosts.push(item);
|
106 | this.indexData[lang] = indexPosts;
|
107 | }
|
108 |
|
109 | _getPostUrl(filePath, postMeta) {
|
110 | let uri = '/' + filePath.slice(0, Math.max(0, filePath.lastIndexOf('.')));
|
111 | uri = uri.replace(this.baseDir + '/', '');
|
112 |
|
113 | let lang = path.extname(uri);
|
114 |
|
115 | if (lang) {
|
116 |
|
117 | lang = lang.replace('.', '');
|
118 | uri = uri.slice(0, Math.max(0, uri.lastIndexOf('.')));
|
119 | }
|
120 |
|
121 | if (uri.endsWith('/index')) {
|
122 | uri = uri.slice(0, -5);
|
123 | }
|
124 |
|
125 | if (postMeta.slug !== undefined) {
|
126 | uri = path.dirname(uri) + postMeta.slug;
|
127 | }
|
128 |
|
129 | if (postMeta.url !== undefined) {
|
130 | uri = postMeta.url;
|
131 | }
|
132 |
|
133 | return [lang, uri];
|
134 | }
|
135 |
|
136 | _setDefaultLanguage(lang) {
|
137 | this.defaultLanguage = lang;
|
138 | }
|
139 |
|
140 | _setInput(dirPath) {
|
141 | this.input = dirPath;
|
142 | }
|
143 |
|
144 | _setOutput(filePath) {
|
145 | this.output = filePath;
|
146 | }
|
147 |
|
148 | createIndex() {
|
149 | console.log(`Arguments: input: ${this.input}, output: ${this.output}, defaultLanguage: ${this.defaultLanguage}`);
|
150 |
|
151 | createFolders(this.output);
|
152 |
|
153 | this.stream = fs.createWriteStream(this.output);
|
154 |
|
155 | this.parseContent(this.input);
|
156 |
|
157 | this.stream.write(JSON.stringify(this.indexData, null, 4));
|
158 | this.stream.end();
|
159 |
|
160 | console.info(`Saved json data: ${this.output}`);
|
161 |
|
162 | this.saveLunrIndex();
|
163 | }
|
164 |
|
165 | saveLunrIndex() {
|
166 | const contentMap = {};
|
167 | function createIndex(lang, documents) {
|
168 | contentMap[lang] = contentMap[lang] || {};
|
169 | const idx = lunr(function () {
|
170 | this.ref('uri');
|
171 |
|
172 | this.field('title');
|
173 | this.field('content');
|
174 | this.field('description');
|
175 |
|
176 | for (const doc of documents) {
|
177 | this.add(doc);
|
178 | contentMap[lang][doc.uri] = doc.title;
|
179 | }
|
180 | });
|
181 | return idx;
|
182 | }
|
183 |
|
184 | const lunrIndex = {};
|
185 | for (const lang of Object.keys(this.indexData)) {
|
186 | const idx = createIndex(lang, this.indexData[lang]);
|
187 | lunrIndex[lang] = idx;
|
188 | }
|
189 |
|
190 | lunrIndex.contentMap = contentMap;
|
191 | const serializedIdx = JSON.stringify(lunrIndex);
|
192 |
|
193 | try {
|
194 | fs.writeFileSync(this.outputLunr, serializedIdx, {flag: 'w+'});
|
195 | console.info(`Saved lunr index data: ${this.outputLunr}`);
|
196 | } catch (error) {
|
197 | console.error(error);
|
198 | }
|
199 | }
|
200 | }
|
201 |
|
202 | export {HugoIndexer, DEFAULT_LANGUAGE, CONTENT_PATH, OUTPUT_INDEX_FILE};
|