1 | import fs from 'node:fs';
|
2 | import path from 'node:path';
|
3 | import process from 'node:process';
|
4 | import glob from 'glob';
|
5 | import matter from 'gray-matter';
|
6 | import lunr from 'lunr';
|
7 | import removeMd from 'remove-markdown';
|
8 | import {stripHtml} from 'string-strip-html';
|
9 | import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js';
|
10 | import lunrMulti from 'lunr-languages/lunr.multi.js';
|
11 | import tinyseg from 'lunr-languages/tinyseg.js';
|
12 | import lunrJa from 'lunr-languages/lunr.ja.js';
|
13 | import lunrEs from 'lunr-languages/lunr.es.js';
|
14 | import lunrPt from 'lunr-languages/lunr.pt.js';
|
15 | import lunrDe from 'lunr-languages/lunr.de.js';
|
16 | import lunrRu from 'lunr-languages/lunr.ru.js';
|
17 |
|
18 | import {createFolders, getSystemLang} from './utils.js';
|
19 |
|
20 | lunrStemmerSupport(lunr);
|
21 | tinyseg(lunr);
|
22 | lunrMulti(lunr);
|
23 | lunrDe(lunr);
|
24 | lunrEs(lunr);
|
25 | lunrJa(lunr);
|
26 | lunrPt(lunr);
|
27 | lunrRu(lunr);
|
28 |
|
29 | const DEFAULT_LANGUAGE = 'ru';
|
30 | const CONTENT_PATH = 'content/**';
|
31 | const OUTPUT_INDEX_FILE = 'static/search/index.json';
|
32 | const OUTPUT_LUNR_INDEX_FILE = 'static/search/lunr-index.json';
|
33 |
|
34 | class HugoIndexer {
|
35 | constructor() {
|
36 | this.defaultLanguage = getSystemLang();
|
37 | this.input = CONTENT_PATH;
|
38 | this.output = OUTPUT_INDEX_FILE;
|
39 | this.outputLunr = OUTPUT_LUNR_INDEX_FILE;
|
40 | this.baseDir = path.dirname(this.input);
|
41 | this.extensions = ['.md', '.html'];
|
42 |
|
43 | this.indexData = {};
|
44 | this.indexData[DEFAULT_LANGUAGE] = [];
|
45 |
|
46 | this._parseArgs();
|
47 | }
|
48 |
|
49 | _parseArgs() {
|
50 | if (process.argv.includes('-l')) {
|
51 |
|
52 | this.output = process.argv[process.argv.indexOf('-l') + 1];
|
53 | }
|
54 |
|
55 | if (process.argv.includes('-i')) {
|
56 |
|
57 | this.input = process.argv[process.argv.indexOf('-i') + 1];
|
58 | console.log(process.argv.indexOf('-i'));
|
59 | }
|
60 |
|
61 | if (process.argv.includes('-o')) {
|
62 |
|
63 | this.output = process.argv[process.argv.indexOf('-o') + 1];
|
64 | }
|
65 |
|
66 | if (process.argv.includes('-ol')) {
|
67 |
|
68 | this.outputLunr = process.argv[process.argv.indexOf('-ol') + 1];
|
69 | }
|
70 | }
|
71 |
|
72 | parseContent(dirPath) {
|
73 | const files = glob.sync(dirPath);
|
74 | for (const file of files) {
|
75 | const stats = fs.lstatSync(file);
|
76 | if (stats.isFile()) {
|
77 | this.parseFile(file);
|
78 | }
|
79 | }
|
80 | }
|
81 |
|
82 | parseFile(filePath) {
|
83 | const ext = path.extname(filePath);
|
84 |
|
85 | if (!this.extensions.includes(ext)) {
|
86 | return;
|
87 | }
|
88 |
|
89 | const meta = matter.read(filePath);
|
90 | const {data: postMeta, content: postContent} = meta;
|
91 |
|
92 | let plainText = '';
|
93 | if (ext === '.md') {
|
94 | plainText = removeMd(postContent);
|
95 | } else if (ext === '.html') {
|
96 | plainText = stripHtml(postContent);
|
97 | } else {
|
98 | console.log('Sikpped file: ' + filePath);
|
99 | }
|
100 |
|
101 | let tags = [];
|
102 |
|
103 | if (postMeta.tags) {
|
104 | tags = postMeta.tags;
|
105 | }
|
106 |
|
107 | let [lang, uri] = this._getPostUrl(filePath, postMeta);
|
108 |
|
109 | const item = {
|
110 | uri,
|
111 | title: postMeta.title,
|
112 | content: plainText,
|
113 | tags,
|
114 | };
|
115 |
|
116 | if (lang) {
|
117 | item.lang = lang;
|
118 | } else {
|
119 | lang = DEFAULT_LANGUAGE;
|
120 | }
|
121 |
|
122 | const indexPosts = this.indexData[lang] || [];
|
123 | indexPosts.push(item);
|
124 | this.indexData[lang] = indexPosts;
|
125 | }
|
126 |
|
127 | _getPostUrl(filePath, postMeta) {
|
128 | let uri = '/' + filePath.slice(0, Math.max(0, filePath.lastIndexOf('.')));
|
129 | uri = uri.replace(this.baseDir + '/', '');
|
130 |
|
131 | let lang = path.extname(uri);
|
132 |
|
133 | if (lang) {
|
134 |
|
135 | lang = lang.replace('.', '');
|
136 | uri = uri.slice(0, Math.max(0, uri.lastIndexOf('.')));
|
137 | }
|
138 |
|
139 | if (uri.endsWith('/index')) {
|
140 | uri = uri.slice(0, -5);
|
141 | }
|
142 |
|
143 | if (postMeta.slug !== undefined) {
|
144 | uri = path.dirname(uri) + postMeta.slug;
|
145 | }
|
146 |
|
147 | if (postMeta.url !== undefined) {
|
148 | uri = postMeta.url;
|
149 | }
|
150 |
|
151 | return [lang, uri];
|
152 | }
|
153 |
|
154 | _setDefaultLanguage(lang) {
|
155 | this.defaultLanguage = lang;
|
156 | }
|
157 |
|
158 | _setInput(dirPath) {
|
159 | this.input = dirPath;
|
160 | }
|
161 |
|
162 | _setOutput(filePath) {
|
163 | this.output = filePath;
|
164 | }
|
165 |
|
166 | _getLanguages() {
|
167 |
|
168 | return Object.keys(this.indexData);
|
169 | }
|
170 |
|
171 | createIndex() {
|
172 | console.log(`Arguments: input: ${this.input}, output: ${this.output}, defaultLanguage: ${this.defaultLanguage}`);
|
173 |
|
174 | createFolders(this.output);
|
175 |
|
176 | this.stream = fs.createWriteStream(this.output);
|
177 |
|
178 | this.parseContent(this.input);
|
179 |
|
180 | this.stream.write(JSON.stringify(this.indexData, null, 4));
|
181 | this.stream.end();
|
182 |
|
183 | console.info(`Saved json data: ${this.output}`);
|
184 |
|
185 | this.saveLunrIndex();
|
186 | }
|
187 |
|
188 | saveLunrIndex() {
|
189 | const contentMap = {};
|
190 | const languages = this._getLanguages();
|
191 |
|
192 | function createLunrIndex(lang, documents) {
|
193 | contentMap[lang] = contentMap[lang] || {};
|
194 | const idx = lunr(function () {
|
195 | if (languages.length > 1) {
|
196 | this.use(lunr.multiLanguage(...languages));
|
197 | }
|
198 |
|
199 | this.ref('uri');
|
200 |
|
201 | this.field('title');
|
202 | this.field('content');
|
203 | this.field('description');
|
204 |
|
205 | for (const doc of documents) {
|
206 | this.add(doc);
|
207 | contentMap[lang][doc.uri] = doc.title;
|
208 | }
|
209 | });
|
210 | return idx;
|
211 | }
|
212 |
|
213 | const lunrIndex = {};
|
214 | console.log('Languages in Index:', languages);
|
215 |
|
216 | for (const lang of languages) {
|
217 | const idx = createLunrIndex(lang, this.indexData[lang]);
|
218 | lunrIndex[lang] = idx;
|
219 | }
|
220 |
|
221 | lunrIndex.contentMap = contentMap;
|
222 | const serializedIdx = JSON.stringify(lunrIndex);
|
223 |
|
224 | try {
|
225 | fs.writeFileSync(this.outputLunr, serializedIdx, {flag: 'w+'});
|
226 | console.info(`Saved lunr index data: ${this.outputLunr}`);
|
227 | } catch (error) {
|
228 | console.error(error);
|
229 | }
|
230 | }
|
231 | }
|
232 |
|
233 | export {HugoIndexer, DEFAULT_LANGUAGE, CONTENT_PATH, OUTPUT_INDEX_FILE, OUTPUT_LUNR_INDEX_FILE};
|