UNPKG

5.61 kBJavaScriptView Raw
1import fs from 'node:fs';
2import path from 'node:path';
3import process from 'node:process';
4import {glob} from 'glob';
5import matter from 'gray-matter';
6import removeMd from 'remove-markdown';
7import {stripHtml} from 'string-strip-html';
8import lunr from 'lunr';
9import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js';
10import lunrMulti from 'lunr-languages/lunr.multi.js';
11import tinyseg from 'lunr-languages/tinyseg.js';
12import lunrJa from 'lunr-languages/lunr.ja.js';
13import lunrEs from 'lunr-languages/lunr.es.js';
14import lunrPt from 'lunr-languages/lunr.pt.js';
15import lunrDe from 'lunr-languages/lunr.de.js';
16import lunrRu from 'lunr-languages/lunr.ru.js';
17import {createFolders, getSystemLang} from './utils.js';
18
19lunrStemmerSupport(lunr);
20tinyseg(lunr);
21lunrMulti(lunr);
22lunrDe(lunr);
23lunrEs(lunr);
24lunrJa(lunr);
25lunrPt(lunr);
26lunrRu(lunr);
27
28const DEFAULT_LANGUAGE = 'ru';
29const CONTENT_PATH = 'content/**';
30const OUTPUT_INDEX_FILE = 'static/search/index.json';
31const OUTPUT_LUNR_INDEX_FILE = 'static/search/lunr-index.json';
32
33class HugoIndexer {
34 constructor() {
35 this.defaultLanguage = getSystemLang();
36 this.input = CONTENT_PATH;
37 this.output = OUTPUT_INDEX_FILE;
38 this.outputLunr = OUTPUT_LUNR_INDEX_FILE;
39 this.baseDir = path.dirname(this.input);
40 this.extensions = ['.md', '.html'];
41
42 this.indexData = {}; // Result index
43 this.indexData[DEFAULT_LANGUAGE] = [];
44
45 this._parseArgs();
46 }
47
48 _parseArgs() {
49 if (process.argv.includes('-l')) {
50 // Default language
51 this.output = process.argv[process.argv.indexOf('-l') + 1];
52 }
53
54 if (process.argv.includes('-i')) {
55 // Input
56 this.input = process.argv[process.argv.indexOf('-i') + 1];
57 console.log(process.argv.indexOf('-i'));
58 }
59
60 if (process.argv.includes('-o')) {
61 // Output
62 this.output = process.argv[process.argv.indexOf('-o') + 1];
63 }
64
65 if (process.argv.includes('-ol')) {
66 // Output for lunr index
67 this.outputLunr = process.argv[process.argv.indexOf('-ol') + 1];
68 }
69 }
70
71 parseContent(dirPath) {
72 const files = glob.sync(dirPath);
73 for (const file of files) {
74 const stats = fs.lstatSync(file);
75 if (stats.isFile()) {
76 this.parseFile(file);
77 }
78 }
79 }
80
81 parseFile(filePath) {
82 const ext = path.extname(filePath);
83
84 if (!this.extensions.includes(ext)) {
85 return; // Not .md or .html
86 }
87
88 const meta = matter.read(filePath);
89 const {data: postMeta, content: postContent} = meta;
90
91 let plainText = '';
92 if (ext === '.md') {
93 plainText = removeMd(postContent);
94 } else if (ext === '.html') {
95 plainText = stripHtml(postContent);
96 } else {
97 console.log('Sikpped file: ' + filePath);
98 }
99
100 let tags = [];
101
102 if (postMeta.tags) {
103 tags = postMeta.tags;
104 }
105
106 let [lang, uri] = this._getPostUrl(filePath, postMeta);
107
108 const item = {
109 uri,
110 title: postMeta.title,
111 description: postMeta.description,
112 content: plainText,
113 tags,
114 };
115
116 if (lang) {
117 item.lang = lang;
118 } else {
119 lang = DEFAULT_LANGUAGE;
120 }
121
122 const indexPosts = this.indexData[lang] || [];
123 indexPosts.push(item);
124 this.indexData[lang] = indexPosts;
125 }
126
127 _getPostUrl(filePath, postMeta) {
128 let uri = '/' + filePath.slice(0, Math.max(0, filePath.lastIndexOf('.'))); // Remove extension .md || .html
129 uri = uri.replace(this.baseDir + '/', '');
130
131 let lang = path.extname(uri);
132
133 if (lang) {
134 // Remove lang extension [.en] etc
135 lang = lang.replace('.', '');
136 uri = uri.slice(0, Math.max(0, uri.lastIndexOf('.')));
137 }
138
139 if (uri.endsWith('/index')) {
140 uri = uri.slice(0, -5);
141 }
142
143 if (postMeta.slug !== undefined) {
144 uri = path.dirname(uri) + postMeta.slug;
145 }
146
147 if (postMeta.url !== undefined) {
148 uri = postMeta.url;
149 }
150
151 return [lang, uri];
152 }
153
154 _setDefaultLanguage(lang) {
155 this.defaultLanguage = lang;
156 }
157
158 _setInput(dirPath) {
159 this.input = dirPath;
160 }
161
162 _setOutput(filePath) {
163 this.output = filePath;
164 }
165
166 _getLanguages() {
167 // Get list of language codes from created index
168 return Object.keys(this.indexData);
169 }
170
171 createIndex() {
172 console.log(`Arguments: input: ${this.input}, output: ${this.output}, defaultLanguage: ${this.defaultLanguage}`);
173
174 createFolders(this.output);
175
176 this.stream = fs.createWriteStream(this.output);
177
178 this.parseContent(this.input);
179
180 this.stream.write(JSON.stringify(this.indexData, null, 4));
181 this.stream.end();
182
183 console.info(`Saved json data: ${this.output}`);
184
185 this.saveLunrIndex();
186 }
187
188 saveLunrIndex() {
189 const contentMap = {};
190 const languages = this._getLanguages();
191
192 function createLunrIndex(lang, documents) {
193 contentMap[lang] = contentMap[lang] || {};
194 const idx = lunr(function () {
195 if (languages.length > 1) {
196 this.use(lunr.multiLanguage(...languages));
197 }
198
199 this.ref('uri');
200
201 this.field('title');
202 this.field('content');
203 this.field('description');
204
205 for (const doc of documents) {
206 this.add(doc);
207 contentMap[lang][doc.uri] = doc.title;
208 }
209 });
210 return idx;
211 }
212
213 const lunrIndex = {};
214 console.log('Languages in Index:', languages);
215
216 for (const lang of languages) {
217 let idx = {};
218 try {
219 idx = createLunrIndex(lang, this.indexData[lang]);
220 } catch {
221 console.error(`Error creating lunr index for language: ${lang}`);
222 }
223
224 lunrIndex[lang] = idx;
225 }
226
227 lunrIndex.contentMap = contentMap;
228 const serializedIdx = JSON.stringify(lunrIndex);
229
230 try {
231 fs.writeFileSync(this.outputLunr, serializedIdx, {flag: 'w+'});
232 console.info(`Saved lunr index data: ${this.outputLunr}`);
233 } catch (error) {
234 console.error(error);
235 }
236 }
237}
238
239export {HugoIndexer, DEFAULT_LANGUAGE, CONTENT_PATH, OUTPUT_INDEX_FILE, OUTPUT_LUNR_INDEX_FILE};