1 | import split from 'graphemesplit';
|
2 |
|
3 | const whitespaceRegex = /^\s+$/;
|
4 | const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/;
|
5 | const sortKind = {
|
6 | insertOrder: "insertOrder",
|
7 | bestMatch: "bestMatch"
|
8 | };
|
9 |
|
10 | const defaultOptions = {
|
11 | keySelector: s => s,
|
12 | threshold: .6,
|
13 | ignoreCase: true,
|
14 | ignoreSymbols: true,
|
15 | normalizeWhitespace: true,
|
16 | returnMatchData: false,
|
17 | useDamerau: true,
|
18 | useSellers: true,
|
19 | sortBy: sortKind.bestMatch
|
20 | };
|
21 |
|
22 | const noop = () => {};
|
23 |
|
24 | const arrayWrap = item => item instanceof Array ? item : [item];
|
25 |
|
26 |
|
27 | function normalize(string, options) {
|
28 | const lower = options.ignoreCase ? string.toLocaleLowerCase() : string;
|
29 |
|
30 | const normal = [];
|
31 | const map = [];
|
32 | let lastWasWhitespace = true;
|
33 | let length = 0;
|
34 |
|
35 | for (const grapheme of split(lower)) {
|
36 | whitespaceRegex.lastIndex = 0;
|
37 | nonWordRegex.lastIndex = 0;
|
38 |
|
39 | if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
|
40 | if (!lastWasWhitespace) {
|
41 | normal.push(" ");
|
42 | map.push(length);
|
43 | lastWasWhitespace = true;
|
44 | }
|
45 | } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
|
46 | normal.push(grapheme.normalize());
|
47 | map.push(length);
|
48 | lastWasWhitespace = false;
|
49 | }
|
50 |
|
51 | length += grapheme.length;
|
52 | }
|
53 |
|
54 |
|
55 | map.push(string.length);
|
56 |
|
57 | while (normal[normal.length - 1] === " ") {
|
58 | normal.pop();
|
59 | map.pop();
|
60 | }
|
61 |
|
62 | return {
|
63 | original: string,
|
64 | normal,
|
65 | map
|
66 | };
|
67 | }
|
68 |
|
69 |
|
70 | function denormalizeMatchPosition(match, map) {
|
71 | return {
|
72 | index: map[match.start],
|
73 | length: map[match.end + 1] - map[match.start]
|
74 | };
|
75 | }
|
76 |
|
77 |
|
78 | function walkBack(rows, scoreIndex) {
|
79 | if (scoreIndex === 0) {
|
80 | return {
|
81 | index: 0,
|
82 | length: 0
|
83 | };
|
84 | }
|
85 |
|
86 | let start = scoreIndex;
|
87 |
|
88 | for (let i = rows.length - 2; i > 0 && start > 1; i--) {
|
89 | const row = rows[i];
|
90 | start = row[start] < row[start - 1] ? start : start - 1;
|
91 | }
|
92 |
|
93 | return {
|
94 | start: start - 1,
|
95 | end: scoreIndex - 1
|
96 | };
|
97 | }
|
98 |
|
99 |
|
100 | function noopWalkback() {
|
101 | return {
|
102 | start: 0,
|
103 | end: 0
|
104 | };
|
105 | }
|
106 |
|
107 | const levUpdateScore = () => true;
|
108 |
|
109 | const sellersUpdateScore = (cur, min) => cur < min;
|
110 |
|
111 | function getLevScore(rows, length) {
|
112 | const lastRow = rows[rows.length - 1];
|
113 | const lastCell = lastRow[length - 1];
|
114 | const scoreLength = Math.max(rows.length, length);
|
115 | return {
|
116 | score: 1 - lastCell / (scoreLength - 1),
|
117 | scoreIndex: length - 1
|
118 | };
|
119 | }
|
120 |
|
121 | function getSellersScore(rows, length) {
|
122 |
|
123 | if (rows.length === 1) {
|
124 | return {
|
125 | score: 1,
|
126 | scoreIndex: 0
|
127 | };
|
128 | }
|
129 |
|
130 | const lastRow = rows[rows.length - 1];
|
131 | let minValue = lastRow[0];
|
132 | let minIndex = 0;
|
133 |
|
134 | for (let i = 1; i < length; i++) {
|
135 | const val = lastRow[i];
|
136 |
|
137 | if (val < minValue) {
|
138 | minValue = val;
|
139 | minIndex = i;
|
140 | }
|
141 | }
|
142 |
|
143 | return {
|
144 | score: 1 - minValue / (rows.length - 1),
|
145 | scoreIndex: minIndex
|
146 | };
|
147 | }
|
148 |
|
149 | function initLevRows(rowCount, columnCount) {
|
150 | const rows = new Array(rowCount);
|
151 |
|
152 | for (let i = 0; i < rowCount; i++) {
|
153 | rows[i] = new Array(columnCount);
|
154 | rows[i][0] = i;
|
155 | }
|
156 |
|
157 | for (let i = 0; i < columnCount; i++) {
|
158 | rows[0][i] = i;
|
159 | }
|
160 |
|
161 | return rows;
|
162 | }
|
163 |
|
164 | function initSellersRows(rowCount, columnCount) {
|
165 | const rows = new Array(rowCount);
|
166 | rows[0] = new Array(columnCount).fill(0);
|
167 |
|
168 | for (let i = 1; i < rowCount; i++) {
|
169 | rows[i] = new Array(columnCount);
|
170 | rows[i][0] = i;
|
171 | }
|
172 |
|
173 | return rows;
|
174 | }
|
175 |
|
176 |
|
177 | function levCore(term, candidate, rows, i, j) {
|
178 | const rowA = rows[i];
|
179 | const rowB = rows[i + 1];
|
180 | const cost = term[i] === candidate[j] ? 0 : 1;
|
181 | let m;
|
182 | let min = rowB[j] + 1;
|
183 |
|
184 | if ((m = rowA[j + 1] + 1) < min) min = m;
|
185 |
|
186 | if ((m = rowA[j] + cost) < min) min = m;
|
187 |
|
188 | rowB[j + 1] = min;
|
189 | }
|
190 |
|
191 |
|
192 |
|
193 | function levenshtein(term, candidate, rows, j) {
|
194 | for (let i = 0; i < term.length; i++) {
|
195 | levCore(term, candidate, rows, i, j);
|
196 | }
|
197 | }
|
198 |
|
199 |
|
200 |
|
201 |
|
202 | function damerauLevenshtein(term, candidate, rows, j) {
|
203 |
|
204 |
|
205 | if (j === 0) {
|
206 | levenshtein(term, candidate, rows, j);
|
207 | return;
|
208 | }
|
209 |
|
210 |
|
211 |
|
212 | if (term.length > 0) {
|
213 | levCore(term, candidate, rows, 0, j);
|
214 | }
|
215 |
|
216 | for (let i = 1; i < term.length; i++) {
|
217 | const rowA = rows[i - 1];
|
218 | const rowB = rows[i];
|
219 | const rowC = rows[i + 1];
|
220 | const cost = term[i] === candidate[j] ? 0 : 1;
|
221 | let m;
|
222 |
|
223 | let min = rowC[j] + 1;
|
224 |
|
225 | if ((m = rowB[j + 1] + 1) < min) min = m;
|
226 |
|
227 | if ((m = rowB[j] + cost) < min) min = m;
|
228 |
|
229 | if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
|
230 | rowC[j + 1] = min;
|
231 | }
|
232 | }
|
233 |
|
234 |
|
235 |
|
236 | function trieInsert(trie, string, item) {
|
237 | let walker = trie;
|
238 |
|
239 | for (let i = 0; i < string.length; i++) {
|
240 | const char = string[i];
|
241 |
|
242 | if (walker.children[char] == null) {
|
243 | walker.children[char] = {
|
244 | children: {},
|
245 | candidates: [],
|
246 | depth: 0
|
247 | };
|
248 | }
|
249 |
|
250 |
|
251 | walker.depth = Math.max(walker.depth, string.length - i);
|
252 |
|
253 | walker = walker.children[char];
|
254 | }
|
255 |
|
256 | walker.candidates.push(item);
|
257 | }
|
258 |
|
259 |
|
260 |
|
261 |
|
262 | function createSearchTrie(trie, index, items, options) {
|
263 | for (const item of items) {
|
264 | const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
|
265 | index,
|
266 | keyIndex,
|
267 | item,
|
268 | normalized: normalize(key, options)
|
269 | }));
|
270 | index++;
|
271 |
|
272 | for (const candidate of candidates) {
|
273 | trieInsert(trie, candidate.normalized.normal, candidate);
|
274 | }
|
275 | }
|
276 | }
|
277 |
|
278 |
|
279 | function compareItemsBestScore(a, b) {
|
280 |
|
281 | const scoreDiff = b.score - a.score;
|
282 |
|
283 | if (scoreDiff !== 0) {
|
284 | return scoreDiff;
|
285 | }
|
286 |
|
287 |
|
288 | const matchPosDiff = a.match.start - b.match.start;
|
289 |
|
290 | if (matchPosDiff !== 0) {
|
291 | return matchPosDiff;
|
292 | }
|
293 |
|
294 |
|
295 | const keyIndexDiff = a.keyIndex - b.keyIndex;
|
296 |
|
297 | if (keyIndexDiff !== 0) {
|
298 | return keyIndexDiff;
|
299 | }
|
300 |
|
301 |
|
302 | const lengthDiff = a.lengthDiff - b.lengthDiff;
|
303 |
|
304 | if (lengthDiff !== 0) {
|
305 | return lengthDiff;
|
306 | }
|
307 |
|
308 |
|
309 | return compareItemsInsertOrder(a, b);
|
310 | }
|
311 |
|
312 | function compareItemsInsertOrder(a, b) {
|
313 | return a.index - b.index;
|
314 | }
|
315 |
|
316 | function getCompareFunc(sortBy) {
|
317 | switch (sortBy) {
|
318 | case sortKind.bestMatch:
|
319 | return compareItemsBestScore;
|
320 |
|
321 | case sortKind.insertOrder:
|
322 | return compareItemsInsertOrder;
|
323 |
|
324 | default:
|
325 | throw new Error(`unknown sortBy method ${sortBy}`);
|
326 | }
|
327 | }
|
328 |
|
329 |
|
330 | function addResult(results, resultMap, candidate, score, match, lengthDiff, compareItems) {
|
331 | const scoredItem = {
|
332 | item: candidate.item,
|
333 | normalized: candidate.normalized,
|
334 | score,
|
335 | match,
|
336 | index: candidate.index,
|
337 | keyIndex: candidate.keyIndex,
|
338 | lengthDiff
|
339 | };
|
340 |
|
341 | if (resultMap[candidate.index] == null) {
|
342 | resultMap[candidate.index] = results.length;
|
343 | results.push(scoredItem);
|
344 | } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
|
345 | results[resultMap[candidate.index]] = scoredItem;
|
346 | }
|
347 | }
|
348 |
|
349 | const getLevLength = Math.max;
|
350 |
|
351 | const getSellersLength = termLength => termLength;
|
352 |
|
353 |
|
354 | function levShouldContinue(node, pos, term, threshold, sValue) {
|
355 |
|
356 | const p1 = pos + sValue;
|
357 |
|
358 | const p2 = Math.min(term.length, pos + node.depth + 1);
|
359 |
|
360 |
|
361 | const length = Math.ceil((p1 + p2) / 2);
|
362 | const bestPossibleValue = length - p2;
|
363 | return 1 - bestPossibleValue / length >= threshold;
|
364 | }
|
365 |
|
366 | function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
|
367 | const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
|
368 | return 1 - bestPossibleValue / term.length >= threshold;
|
369 | }
|
370 |
|
371 |
|
372 | function searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options) {
|
373 | const stack = [];
|
374 |
|
375 | for (const key in trie.children) {
|
376 | const node = trie.children[key];
|
377 | stack.push([node, 1, key, 0, term.length]);
|
378 | }
|
379 |
|
380 | const acc = new Array(trie.depth);
|
381 |
|
382 | while (stack.length !== 0) {
|
383 | const [node, len, char, si, sv] = stack.pop();
|
384 | acc[len - 1] = char;
|
385 |
|
386 | scoreMethods.score(term, acc, rows, len - 1);
|
387 |
|
388 | const lastIndex = len;
|
389 | const lastValue = rows[rows.length - 1][lastIndex];
|
390 | let sIndex = si,
|
391 | sValue = sv;
|
392 |
|
393 | if (scoreMethods.shouldUpdateScore(lastValue, sv)) {
|
394 | sIndex = lastIndex;
|
395 | sValue = lastValue;
|
396 | }
|
397 |
|
398 |
|
399 | if (node.candidates.length > 0) {
|
400 | const length = scoreMethods.getLength(term.length, len);
|
401 | const score = 1 - sValue / length;
|
402 |
|
403 | if (score >= options.threshold) {
|
404 | const match = walkBack(rows, sIndex);
|
405 | const lengthDiff = Math.abs(len - term.length);
|
406 |
|
407 | for (const candidate of node.candidates) {
|
408 | addResult(results, resultMap, candidate, score, match, lengthDiff, scoreMethods.compareItems);
|
409 | }
|
410 | }
|
411 | }
|
412 |
|
413 |
|
414 | for (const key in node.children) {
|
415 | const child = node.children[key];
|
416 |
|
417 | if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
|
418 | stack.push([child, len + 1, key, sIndex, sValue]);
|
419 | }
|
420 | }
|
421 | }
|
422 | }
|
423 |
|
424 |
|
425 |
|
426 | function searchCore(term, trie, options) {
|
427 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
428 | const scoreMethods = {
|
429 | score: options.useDamerau ? damerauLevenshtein : levenshtein,
|
430 | getLength: options.useSellers ? getSellersLength : getLevLength,
|
431 | shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
|
432 | shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
|
433 | walkBack: options.useSellers ? walkBack : noopWalkback,
|
434 | compareItems: getCompareFunc(options.sortBy)
|
435 | };
|
436 |
|
437 | const resultMap = {};
|
438 | const results = [];
|
439 | const rows = initMethod(term.length + 1, trie.depth + 1);
|
440 |
|
441 | if (options.threshold <= 0 || term.length === 0) {
|
442 | for (const candidate of trie.candidates) {
|
443 | addResult(results, resultMap, candidate, 0, {
|
444 | index: 0,
|
445 | length: 0
|
446 | }, term.length, scoreMethods.compareItems);
|
447 | }
|
448 | }
|
449 |
|
450 | searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options);
|
451 | const sorted = results.sort(scoreMethods.compareItems);
|
452 |
|
453 | if (options.returnMatchData) {
|
454 | const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
|
455 | return sorted.map(candidate => ({
|
456 | item: candidate.item,
|
457 | original: candidate.normalized.original,
|
458 | key: candidate.normalized.normal.join(""),
|
459 | score: candidate.score,
|
460 | match: denormalize(candidate.match, candidate.normalized.map)
|
461 | }));
|
462 | }
|
463 |
|
464 | return sorted.map(candidate => candidate.item);
|
465 | }
|
466 |
|
467 |
|
468 | function fuzzy(term, candidate, options) {
|
469 | options = { ...defaultOptions,
|
470 | ...options
|
471 | };
|
472 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
473 | const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
|
474 | const getScore = options.useSellers ? getSellersScore : getLevScore;
|
475 | term = normalize(term, options).normal;
|
476 | const normalized = normalize(candidate, options);
|
477 | const rows = initMethod(term.length + 1, normalized.normal.length + 1);
|
478 |
|
479 | for (let j = 0; j < normalized.normal.length; j++) {
|
480 | scoreMethod(term, normalized.normal, rows, j);
|
481 | }
|
482 |
|
483 | const scoreResult = getScore(rows, normalized.normal.length + 1);
|
484 | return options.returnMatchData ? {
|
485 | item: candidate,
|
486 | original: normalized.original,
|
487 | key: normalized.normal.join(""),
|
488 | score: scoreResult.score,
|
489 | match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
|
490 | } : scoreResult.score;
|
491 | }
|
492 |
|
493 | function search(term, candidates, options) {
|
494 | options = { ...defaultOptions,
|
495 | ...options
|
496 | };
|
497 | const trie = {
|
498 | children: {},
|
499 | candidates: [],
|
500 | depth: 0
|
501 | };
|
502 | createSearchTrie(trie, 0, candidates, options);
|
503 | return searchCore(normalize(term, options).normal, trie, options);
|
504 | }
|
505 |
|
506 |
|
507 | class Searcher {
|
508 | constructor(candidates, options) {
|
509 | this.options = Object.assign({}, defaultOptions, options);
|
510 | this.trie = {
|
511 | children: {},
|
512 | candidates: [],
|
513 | depth: 0
|
514 | };
|
515 | createSearchTrie(this.trie, 0, candidates, this.options);
|
516 | this.count = candidates.length;
|
517 | }
|
518 |
|
519 | add(...candidates) {
|
520 | createSearchTrie(this.trie, this.count, candidates, this.options);
|
521 | this.count += candidates.length;
|
522 | }
|
523 |
|
524 | search(term, options) {
|
525 | options = Object.assign({}, this.options, options);
|
526 | return searchCore(normalize(term, this.options).normal, this.trie, options);
|
527 | }
|
528 |
|
529 | }
|
530 |
|
531 | export { Searcher, fuzzy, search, sortKind };
|