UNPKG

4.61 kBJavaScriptView Raw
1import fs from 'node:fs';
2import path from 'node:path';
3import process from 'node:process';
4import glob from 'glob';
5import matter from 'gray-matter';
6import lunr from 'lunr';
7import removeMd from 'remove-markdown';
8import {stripHtml} from 'string-strip-html';
9import {createFolders, getSystemLang} from './utils.js';
10
11const DEFAULT_LANGUAGE = 'ru';
12const CONTENT_PATH = 'content/**';
13const OUTPUT_INDEX_FILE = 'static/search/index.json';
14const OUTPUT_LUNR_INDEX_FILE = 'static/search/lunr-index.json';
15
16class HugoIndexer {
17 constructor() {
18 this.defaultLanguage = getSystemLang();
19 this.input = CONTENT_PATH;
20 this.output = OUTPUT_INDEX_FILE;
21 this.outputLunr = OUTPUT_LUNR_INDEX_FILE;
22 this.baseDir = path.dirname(this.input);
23 this.extensions = ['.md', '.html'];
24
25 this.indexData = {}; // Result index
26 this.indexData[DEFAULT_LANGUAGE] = [];
27
28 this._parseArgs();
29 }
30
31 _parseArgs() {
32 if (process.argv.includes('-l')) {
33 // Default language
34 this.output = process.argv[process.argv.indexOf('-l') + 1];
35 }
36
37 if (process.argv.includes('-i')) {
38 // Input
39 this.input = process.argv[process.argv.indexOf('-i') + 1];
40 console.log(process.argv.indexOf('-i'));
41 }
42
43 if (process.argv.includes('-o')) {
44 // Output
45 this.output = process.argv[process.argv.indexOf('-o') + 1];
46 }
47
48 if (process.argv.includes('-ol')) {
49 // Output for lunr index
50 this.outputLunr = process.argv[process.argv.indexOf('-ol') + 1];
51 }
52 }
53
54 parseContent(dirPath) {
55 const files = glob.sync(dirPath);
56 for (const file of files) {
57 const stats = fs.lstatSync(file);
58 if (stats.isFile()) {
59 this.parseFile(file);
60 }
61 }
62 }
63
64 parseFile(filePath) {
65 const ext = path.extname(filePath);
66
67 if (!this.extensions.includes(ext)) {
68 return; // Not .md or .html
69 }
70
71 const meta = matter.read(filePath);
72 const {data: postMeta, content: postContent} = meta;
73
74 let plainText = '';
75 if (ext === '.md') {
76 plainText = removeMd(postContent);
77 } else if (ext === '.html') {
78 plainText = stripHtml(postContent);
79 } else {
80 console.log('Sikpped file: ' + filePath);
81 }
82
83 let tags = [];
84
85 if (postMeta.tags) {
86 tags = postMeta.tags;
87 }
88
89 let [lang, uri] = this._getPostUrl(filePath, postMeta);
90
91 const item = {
92 uri,
93 title: postMeta.title,
94 content: plainText,
95 tags,
96 };
97
98 if (lang) {
99 item.lang = lang;
100 } else {
101 lang = DEFAULT_LANGUAGE;
102 }
103
104 const indexPosts = this.indexData[lang] || [];
105 indexPosts.push(item);
106 this.indexData[lang] = indexPosts;
107 }
108
109 _getPostUrl(filePath, postMeta) {
110 let uri = '/' + filePath.slice(0, Math.max(0, filePath.lastIndexOf('.'))); // Remove extension .md || .html
111 uri = uri.replace(this.baseDir + '/', '');
112
113 let lang = path.extname(uri);
114
115 if (lang) {
116 // Remove lang extension [.en] etc
117 lang = lang.replace('.', '');
118 uri = uri.slice(0, Math.max(0, uri.lastIndexOf('.')));
119 }
120
121 if (uri.endsWith('/index')) {
122 uri = uri.slice(0, -5);
123 }
124
125 if (postMeta.slug !== undefined) {
126 uri = path.dirname(uri) + postMeta.slug;
127 }
128
129 if (postMeta.url !== undefined) {
130 uri = postMeta.url;
131 }
132
133 return [lang, uri];
134 }
135
136 _setDefaultLanguage(lang) {
137 this.defaultLanguage = lang;
138 }
139
140 _setInput(dirPath) {
141 this.input = dirPath;
142 }
143
144 _setOutput(filePath) {
145 this.output = filePath;
146 }
147
148 createIndex() {
149 console.log(`Arguments: input: ${this.input}, output: ${this.output}, defaultLanguage: ${this.defaultLanguage}`);
150
151 createFolders(this.output);
152
153 this.stream = fs.createWriteStream(this.output);
154
155 this.parseContent(this.input);
156
157 this.stream.write(JSON.stringify(this.indexData, null, 4));
158 this.stream.end();
159
160 console.info(`Saved json data: ${this.output}`);
161
162 this.saveLunrIndex();
163 }
164
165 saveLunrIndex() {
166 const contentMap = {};
167 function createIndex(lang, documents) {
168 contentMap[lang] = contentMap[lang] || {};
169 const idx = lunr(function () {
170 this.ref('uri');
171
172 this.field('title');
173 this.field('content');
174 this.field('description');
175
176 for (const doc of documents) {
177 this.add(doc);
178 contentMap[lang][doc.uri] = doc.title;
179 }
180 });
181 return idx;
182 }
183
184 const lunrIndex = {};
185 for (const lang of Object.keys(this.indexData)) {
186 const idx = createIndex(lang, this.indexData[lang]);
187 lunrIndex[lang] = idx;
188 }
189
190 lunrIndex.contentMap = contentMap;
191 const serializedIdx = JSON.stringify(lunrIndex);
192
193 try {
194 fs.writeFileSync(this.outputLunr, serializedIdx, {flag: 'w+'});
195 console.info(`Saved lunr index data: ${this.outputLunr}`);
196 } catch (error) {
197 console.error(error);
198 }
199 }
200}
201
202export {HugoIndexer, DEFAULT_LANGUAGE, CONTENT_PATH, OUTPUT_INDEX_FILE};