UNPKG

5.46 kBJavaScriptView Raw
1import fs from 'node:fs';
2import path from 'node:path';
3import process from 'node:process';
4import glob from 'glob';
5import matter from 'gray-matter';
6import lunr from 'lunr';
7import removeMd from 'remove-markdown';
8import {stripHtml} from 'string-strip-html';
9import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js';
10import lunrMulti from 'lunr-languages/lunr.multi.js';
11import tinyseg from 'lunr-languages/tinyseg.js';
12import lunrJa from 'lunr-languages/lunr.ja.js';
13import lunrEs from 'lunr-languages/lunr.es.js';
14import lunrPt from 'lunr-languages/lunr.pt.js';
15import lunrDe from 'lunr-languages/lunr.de.js';
16import lunrRu from 'lunr-languages/lunr.ru.js';
17
18import {createFolders, getSystemLang} from './utils.js';
19
20lunrStemmerSupport(lunr);
21tinyseg(lunr);
22lunrMulti(lunr);
23lunrDe(lunr);
24lunrEs(lunr);
25lunrJa(lunr);
26lunrPt(lunr);
27lunrRu(lunr);
28
29const DEFAULT_LANGUAGE = 'ru';
30const CONTENT_PATH = 'content/**';
31const OUTPUT_INDEX_FILE = 'static/search/index.json';
32const OUTPUT_LUNR_INDEX_FILE = 'static/search/lunr-index.json';
33
34class HugoIndexer {
35 constructor() {
36 this.defaultLanguage = getSystemLang();
37 this.input = CONTENT_PATH;
38 this.output = OUTPUT_INDEX_FILE;
39 this.outputLunr = OUTPUT_LUNR_INDEX_FILE;
40 this.baseDir = path.dirname(this.input);
41 this.extensions = ['.md', '.html'];
42
43 this.indexData = {}; // Result index
44 this.indexData[DEFAULT_LANGUAGE] = [];
45
46 this._parseArgs();
47 }
48
49 _parseArgs() {
50 if (process.argv.includes('-l')) {
51 // Default language
52 this.output = process.argv[process.argv.indexOf('-l') + 1];
53 }
54
55 if (process.argv.includes('-i')) {
56 // Input
57 this.input = process.argv[process.argv.indexOf('-i') + 1];
58 console.log(process.argv.indexOf('-i'));
59 }
60
61 if (process.argv.includes('-o')) {
62 // Output
63 this.output = process.argv[process.argv.indexOf('-o') + 1];
64 }
65
66 if (process.argv.includes('-ol')) {
67 // Output for lunr index
68 this.outputLunr = process.argv[process.argv.indexOf('-ol') + 1];
69 }
70 }
71
72 parseContent(dirPath) {
73 const files = glob.sync(dirPath);
74 for (const file of files) {
75 const stats = fs.lstatSync(file);
76 if (stats.isFile()) {
77 this.parseFile(file);
78 }
79 }
80 }
81
82 parseFile(filePath) {
83 const ext = path.extname(filePath);
84
85 if (!this.extensions.includes(ext)) {
86 return; // Not .md or .html
87 }
88
89 const meta = matter.read(filePath);
90 const {data: postMeta, content: postContent} = meta;
91
92 let plainText = '';
93 if (ext === '.md') {
94 plainText = removeMd(postContent);
95 } else if (ext === '.html') {
96 plainText = stripHtml(postContent);
97 } else {
98 console.log('Sikpped file: ' + filePath);
99 }
100
101 let tags = [];
102
103 if (postMeta.tags) {
104 tags = postMeta.tags;
105 }
106
107 let [lang, uri] = this._getPostUrl(filePath, postMeta);
108
109 const item = {
110 uri,
111 title: postMeta.title,
112 content: plainText,
113 tags,
114 };
115
116 if (lang) {
117 item.lang = lang;
118 } else {
119 lang = DEFAULT_LANGUAGE;
120 }
121
122 const indexPosts = this.indexData[lang] || [];
123 indexPosts.push(item);
124 this.indexData[lang] = indexPosts;
125 }
126
127 _getPostUrl(filePath, postMeta) {
128 let uri = '/' + filePath.slice(0, Math.max(0, filePath.lastIndexOf('.'))); // Remove extension .md || .html
129 uri = uri.replace(this.baseDir + '/', '');
130
131 let lang = path.extname(uri);
132
133 if (lang) {
134 // Remove lang extension [.en] etc
135 lang = lang.replace('.', '');
136 uri = uri.slice(0, Math.max(0, uri.lastIndexOf('.')));
137 }
138
139 if (uri.endsWith('/index')) {
140 uri = uri.slice(0, -5);
141 }
142
143 if (postMeta.slug !== undefined) {
144 uri = path.dirname(uri) + postMeta.slug;
145 }
146
147 if (postMeta.url !== undefined) {
148 uri = postMeta.url;
149 }
150
151 return [lang, uri];
152 }
153
154 _setDefaultLanguage(lang) {
155 this.defaultLanguage = lang;
156 }
157
158 _setInput(dirPath) {
159 this.input = dirPath;
160 }
161
162 _setOutput(filePath) {
163 this.output = filePath;
164 }
165
166 _getLanguages() {
167 // Get list of language codes from created index
168 return Object.keys(this.indexData);
169 }
170
171 createIndex() {
172 console.log(`Arguments: input: ${this.input}, output: ${this.output}, defaultLanguage: ${this.defaultLanguage}`);
173
174 createFolders(this.output);
175
176 this.stream = fs.createWriteStream(this.output);
177
178 this.parseContent(this.input);
179
180 this.stream.write(JSON.stringify(this.indexData, null, 4));
181 this.stream.end();
182
183 console.info(`Saved json data: ${this.output}`);
184
185 this.saveLunrIndex();
186 }
187
188 saveLunrIndex() {
189 const contentMap = {};
190 const languages = this._getLanguages();
191
192 function createLunrIndex(lang, documents) {
193 contentMap[lang] = contentMap[lang] || {};
194 const idx = lunr(function () {
195 if (languages.length > 1) {
196 this.use(lunr.multiLanguage(...languages));
197 }
198
199 this.ref('uri');
200
201 this.field('title');
202 this.field('content');
203 this.field('description');
204
205 for (const doc of documents) {
206 this.add(doc);
207 contentMap[lang][doc.uri] = doc.title;
208 }
209 });
210 return idx;
211 }
212
213 const lunrIndex = {};
214 console.log('Languages in Index:', languages);
215
216 for (const lang of languages) {
217 const idx = createLunrIndex(lang, this.indexData[lang]);
218 lunrIndex[lang] = idx;
219 }
220
221 lunrIndex.contentMap = contentMap;
222 const serializedIdx = JSON.stringify(lunrIndex);
223
224 try {
225 fs.writeFileSync(this.outputLunr, serializedIdx, {flag: 'w+'});
226 console.info(`Saved lunr index data: ${this.outputLunr}`);
227 } catch (error) {
228 console.error(error);
229 }
230 }
231}
232
233export {HugoIndexer, DEFAULT_LANGUAGE, CONTENT_PATH, OUTPUT_INDEX_FILE, OUTPUT_LUNR_INDEX_FILE};