1 | 'use strict';
|
2 |
|
3 | Object.defineProperty(exports, '__esModule', { value: true });
|
4 |
|
5 | function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
|
6 |
|
7 | var split = _interopDefault(require('graphemesplit'));
|
8 |
|
9 | const whitespaceRegex = /^\s+$/;
|
10 | const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/;
|
11 | const sortKind = {
|
12 | insertOrder: "insertOrder",
|
13 | bestMatch: "bestMatch"
|
14 | };
|
15 |
|
16 | const defaultOptions = {
|
17 | keySelector: s => s,
|
18 | threshold: .6,
|
19 | ignoreCase: true,
|
20 | ignoreSymbols: true,
|
21 | normalizeWhitespace: true,
|
22 | returnMatchData: false,
|
23 | useDamerau: true,
|
24 | useSellers: true,
|
25 | sortBy: sortKind.bestMatch
|
26 | };
|
27 |
|
28 | const noop = () => {};
|
29 |
|
30 | const arrayWrap = item => item instanceof Array ? item : [item];
|
31 |
|
32 |
|
33 | function normalize(string, options) {
|
34 | const lower = options.ignoreCase ? string.toLocaleLowerCase() : string;
|
35 |
|
36 | const normal = [];
|
37 | const map = [];
|
38 | let lastWasWhitespace = true;
|
39 | let length = 0;
|
40 |
|
41 | for (const grapheme of split(lower)) {
|
42 | whitespaceRegex.lastIndex = 0;
|
43 | nonWordRegex.lastIndex = 0;
|
44 |
|
45 | if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
|
46 | if (!lastWasWhitespace) {
|
47 | normal.push(" ");
|
48 | map.push(length);
|
49 | lastWasWhitespace = true;
|
50 | }
|
51 | } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
|
52 | normal.push(grapheme.normalize());
|
53 | map.push(length);
|
54 | lastWasWhitespace = false;
|
55 | }
|
56 |
|
57 | length += grapheme.length;
|
58 | }
|
59 |
|
60 |
|
61 | map.push(string.length);
|
62 |
|
63 | while (normal[normal.length - 1] === " ") {
|
64 | normal.pop();
|
65 | map.pop();
|
66 | }
|
67 |
|
68 | return {
|
69 | original: string,
|
70 | normal,
|
71 | map
|
72 | };
|
73 | }
|
74 |
|
75 |
|
76 | function denormalizeMatchPosition(match, map) {
|
77 | return {
|
78 | index: map[match.start],
|
79 | length: map[match.end + 1] - map[match.start]
|
80 | };
|
81 | }
|
82 |
|
83 |
|
84 | function walkBack(rows, scoreIndex) {
|
85 | if (scoreIndex === 0) {
|
86 | return {
|
87 | index: 0,
|
88 | length: 0
|
89 | };
|
90 | }
|
91 |
|
92 | let start = scoreIndex;
|
93 |
|
94 | for (let i = rows.length - 2; i > 0 && start > 1; i--) {
|
95 | const row = rows[i];
|
96 | start = row[start] < row[start - 1] ? start : start - 1;
|
97 | }
|
98 |
|
99 | return {
|
100 | start: start - 1,
|
101 | end: scoreIndex - 1
|
102 | };
|
103 | }
|
104 |
|
105 |
|
106 | function noopWalkback() {
|
107 | return {
|
108 | start: 0,
|
109 | end: 0
|
110 | };
|
111 | }
|
112 |
|
113 | const levUpdateScore = () => true;
|
114 |
|
115 | const sellersUpdateScore = (cur, min) => cur < min;
|
116 |
|
117 | function getLevScore(rows, length) {
|
118 | const lastRow = rows[rows.length - 1];
|
119 | const lastCell = lastRow[length - 1];
|
120 | const scoreLength = Math.max(rows.length, length);
|
121 | return {
|
122 | score: 1 - lastCell / (scoreLength - 1),
|
123 | scoreIndex: length - 1
|
124 | };
|
125 | }
|
126 |
|
127 | function getSellersScore(rows, length) {
|
128 |
|
129 | if (rows.length === 1) {
|
130 | return {
|
131 | score: 1,
|
132 | scoreIndex: 0
|
133 | };
|
134 | }
|
135 |
|
136 | const lastRow = rows[rows.length - 1];
|
137 | let minValue = lastRow[0];
|
138 | let minIndex = 0;
|
139 |
|
140 | for (let i = 1; i < length; i++) {
|
141 | const val = lastRow[i];
|
142 |
|
143 | if (val < minValue) {
|
144 | minValue = val;
|
145 | minIndex = i;
|
146 | }
|
147 | }
|
148 |
|
149 | return {
|
150 | score: 1 - minValue / (rows.length - 1),
|
151 | scoreIndex: minIndex
|
152 | };
|
153 | }
|
154 |
|
155 | function initLevRows(rowCount, columnCount) {
|
156 | const rows = new Array(rowCount);
|
157 |
|
158 | for (let i = 0; i < rowCount; i++) {
|
159 | rows[i] = new Array(columnCount);
|
160 | rows[i][0] = i;
|
161 | }
|
162 |
|
163 | for (let i = 0; i < columnCount; i++) {
|
164 | rows[0][i] = i;
|
165 | }
|
166 |
|
167 | return rows;
|
168 | }
|
169 |
|
170 | function initSellersRows(rowCount, columnCount) {
|
171 | const rows = new Array(rowCount);
|
172 | rows[0] = new Array(columnCount).fill(0);
|
173 |
|
174 | for (let i = 1; i < rowCount; i++) {
|
175 | rows[i] = new Array(columnCount);
|
176 | rows[i][0] = i;
|
177 | }
|
178 |
|
179 | return rows;
|
180 | }
|
181 |
|
182 |
|
183 | function levCore(term, candidate, rows, i, j) {
|
184 | const rowA = rows[i];
|
185 | const rowB = rows[i + 1];
|
186 | const cost = term[i] === candidate[j] ? 0 : 1;
|
187 | let m;
|
188 | let min = rowB[j] + 1;
|
189 |
|
190 | if ((m = rowA[j + 1] + 1) < min) min = m;
|
191 |
|
192 | if ((m = rowA[j] + cost) < min) min = m;
|
193 |
|
194 | rowB[j + 1] = min;
|
195 | }
|
196 |
|
197 |
|
198 |
|
199 | function levenshtein(term, candidate, rows, j) {
|
200 | for (let i = 0; i < term.length; i++) {
|
201 | levCore(term, candidate, rows, i, j);
|
202 | }
|
203 | }
|
204 |
|
205 |
|
206 |
|
207 |
|
208 | function damerauLevenshtein(term, candidate, rows, j) {
|
209 |
|
210 |
|
211 | if (j === 0) {
|
212 | levenshtein(term, candidate, rows, j);
|
213 | return;
|
214 | }
|
215 |
|
216 |
|
217 |
|
218 | if (term.length > 0) {
|
219 | levCore(term, candidate, rows, 0, j);
|
220 | }
|
221 |
|
222 | for (let i = 1; i < term.length; i++) {
|
223 | const rowA = rows[i - 1];
|
224 | const rowB = rows[i];
|
225 | const rowC = rows[i + 1];
|
226 | const cost = term[i] === candidate[j] ? 0 : 1;
|
227 | let m;
|
228 |
|
229 | let min = rowC[j] + 1;
|
230 |
|
231 | if ((m = rowB[j + 1] + 1) < min) min = m;
|
232 |
|
233 | if ((m = rowB[j] + cost) < min) min = m;
|
234 |
|
235 | if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
|
236 | rowC[j + 1] = min;
|
237 | }
|
238 | }
|
239 |
|
240 |
|
241 |
|
242 | function trieInsert(trie, string, item) {
|
243 | let walker = trie;
|
244 |
|
245 | for (let i = 0; i < string.length; i++) {
|
246 | const char = string[i];
|
247 |
|
248 | if (walker.children[char] == null) {
|
249 | walker.children[char] = {
|
250 | children: {},
|
251 | candidates: [],
|
252 | depth: 0
|
253 | };
|
254 | }
|
255 |
|
256 |
|
257 | walker.depth = Math.max(walker.depth, string.length - i);
|
258 |
|
259 | walker = walker.children[char];
|
260 | }
|
261 |
|
262 | walker.candidates.push(item);
|
263 | }
|
264 |
|
265 |
|
266 |
|
267 |
|
268 | function createSearchTrie(trie, index, items, options) {
|
269 | for (const item of items) {
|
270 | const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
|
271 | index,
|
272 | keyIndex,
|
273 | item,
|
274 | normalized: normalize(key, options)
|
275 | }));
|
276 | index++;
|
277 |
|
278 | for (const candidate of candidates) {
|
279 | trieInsert(trie, candidate.normalized.normal, candidate);
|
280 | }
|
281 | }
|
282 | }
|
283 |
|
284 |
|
285 | function compareItemsBestScore(a, b) {
|
286 |
|
287 | const scoreDiff = b.score - a.score;
|
288 |
|
289 | if (scoreDiff !== 0) {
|
290 | return scoreDiff;
|
291 | }
|
292 |
|
293 |
|
294 | const matchPosDiff = a.match.start - b.match.start;
|
295 |
|
296 | if (matchPosDiff !== 0) {
|
297 | return matchPosDiff;
|
298 | }
|
299 |
|
300 |
|
301 | const keyIndexDiff = a.keyIndex - b.keyIndex;
|
302 |
|
303 | if (keyIndexDiff !== 0) {
|
304 | return keyIndexDiff;
|
305 | }
|
306 |
|
307 |
|
308 | const lengthDiff = a.lengthDiff - b.lengthDiff;
|
309 |
|
310 | if (lengthDiff !== 0) {
|
311 | return lengthDiff;
|
312 | }
|
313 |
|
314 |
|
315 | return compareItemsInsertOrder(a, b);
|
316 | }
|
317 |
|
318 | function compareItemsInsertOrder(a, b) {
|
319 | return a.index - b.index;
|
320 | }
|
321 |
|
322 | function getCompareFunc(sortBy) {
|
323 | switch (sortBy) {
|
324 | case sortKind.bestMatch:
|
325 | return compareItemsBestScore;
|
326 |
|
327 | case sortKind.insertOrder:
|
328 | return compareItemsInsertOrder;
|
329 |
|
330 | default:
|
331 | throw new Error(`unknown sortBy method ${sortBy}`);
|
332 | }
|
333 | }
|
334 |
|
335 |
|
336 | function addResult(results, resultMap, candidate, score, match, lengthDiff, compareItems) {
|
337 | const scoredItem = {
|
338 | item: candidate.item,
|
339 | normalized: candidate.normalized,
|
340 | score,
|
341 | match,
|
342 | index: candidate.index,
|
343 | keyIndex: candidate.keyIndex,
|
344 | lengthDiff
|
345 | };
|
346 |
|
347 | if (resultMap[candidate.index] == null) {
|
348 | resultMap[candidate.index] = results.length;
|
349 | results.push(scoredItem);
|
350 | } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
|
351 | results[resultMap[candidate.index]] = scoredItem;
|
352 | }
|
353 | }
|
354 |
|
355 | const getLevLength = Math.max;
|
356 |
|
357 | const getSellersLength = termLength => termLength;
|
358 |
|
359 |
|
360 | function levShouldContinue(node, pos, term, threshold, sValue) {
|
361 |
|
362 | const p1 = pos + sValue;
|
363 |
|
364 | const p2 = Math.min(term.length, pos + node.depth + 1);
|
365 |
|
366 |
|
367 | const length = Math.ceil((p1 + p2) / 2);
|
368 | const bestPossibleValue = length - p2;
|
369 | return 1 - bestPossibleValue / length >= threshold;
|
370 | }
|
371 |
|
372 | function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
|
373 | const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
|
374 | return 1 - bestPossibleValue / term.length >= threshold;
|
375 | }
|
376 |
|
377 |
|
378 | function searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options) {
|
379 | const stack = [];
|
380 |
|
381 | for (const key in trie.children) {
|
382 | const node = trie.children[key];
|
383 | stack.push([node, 1, key, 0, term.length]);
|
384 | }
|
385 |
|
386 | const acc = new Array(trie.depth);
|
387 |
|
388 | while (stack.length !== 0) {
|
389 | const [node, len, char, si, sv] = stack.pop();
|
390 | acc[len - 1] = char;
|
391 |
|
392 | scoreMethods.score(term, acc, rows, len - 1);
|
393 |
|
394 | const lastIndex = len;
|
395 | const lastValue = rows[rows.length - 1][lastIndex];
|
396 | let sIndex = si,
|
397 | sValue = sv;
|
398 |
|
399 | if (scoreMethods.shouldUpdateScore(lastValue, sv)) {
|
400 | sIndex = lastIndex;
|
401 | sValue = lastValue;
|
402 | }
|
403 |
|
404 |
|
405 | if (node.candidates.length > 0) {
|
406 | const length = scoreMethods.getLength(term.length, len);
|
407 | const score = 1 - sValue / length;
|
408 |
|
409 | if (score >= options.threshold) {
|
410 | const match = walkBack(rows, sIndex);
|
411 | const lengthDiff = Math.abs(len - term.length);
|
412 |
|
413 | for (const candidate of node.candidates) {
|
414 | addResult(results, resultMap, candidate, score, match, lengthDiff, scoreMethods.compareItems);
|
415 | }
|
416 | }
|
417 | }
|
418 |
|
419 |
|
420 | for (const key in node.children) {
|
421 | const child = node.children[key];
|
422 |
|
423 | if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
|
424 | stack.push([child, len + 1, key, sIndex, sValue]);
|
425 | }
|
426 | }
|
427 | }
|
428 | }
|
429 |
|
430 |
|
431 |
|
432 | function searchCore(term, trie, options) {
|
433 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
434 | const scoreMethods = {
|
435 | score: options.useDamerau ? damerauLevenshtein : levenshtein,
|
436 | getLength: options.useSellers ? getSellersLength : getLevLength,
|
437 | shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
|
438 | shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
|
439 | walkBack: options.useSellers ? walkBack : noopWalkback,
|
440 | compareItems: getCompareFunc(options.sortBy)
|
441 | };
|
442 |
|
443 | const resultMap = {};
|
444 | const results = [];
|
445 | const rows = initMethod(term.length + 1, trie.depth + 1);
|
446 |
|
447 | if (options.threshold <= 0 || term.length === 0) {
|
448 | for (const candidate of trie.candidates) {
|
449 | addResult(results, resultMap, candidate, 0, {
|
450 | index: 0,
|
451 | length: 0
|
452 | }, term.length, scoreMethods.compareItems);
|
453 | }
|
454 | }
|
455 |
|
456 | searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options);
|
457 | const sorted = results.sort(scoreMethods.compareItems);
|
458 |
|
459 | if (options.returnMatchData) {
|
460 | const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
|
461 | return sorted.map(candidate => ({
|
462 | item: candidate.item,
|
463 | original: candidate.normalized.original,
|
464 | key: candidate.normalized.normal.join(""),
|
465 | score: candidate.score,
|
466 | match: denormalize(candidate.match, candidate.normalized.map)
|
467 | }));
|
468 | }
|
469 |
|
470 | return sorted.map(candidate => candidate.item);
|
471 | }
|
472 |
|
473 |
|
474 | function fuzzy(term, candidate, options) {
|
475 | options = { ...defaultOptions,
|
476 | ...options
|
477 | };
|
478 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
479 | const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
|
480 | const getScore = options.useSellers ? getSellersScore : getLevScore;
|
481 | term = normalize(term, options).normal;
|
482 | const normalized = normalize(candidate, options);
|
483 | const rows = initMethod(term.length + 1, normalized.normal.length + 1);
|
484 |
|
485 | for (let j = 0; j < normalized.normal.length; j++) {
|
486 | scoreMethod(term, normalized.normal, rows, j);
|
487 | }
|
488 |
|
489 | const scoreResult = getScore(rows, normalized.normal.length + 1);
|
490 | return options.returnMatchData ? {
|
491 | item: candidate,
|
492 | original: normalized.original,
|
493 | key: normalized.normal.join(""),
|
494 | score: scoreResult.score,
|
495 | match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
|
496 | } : scoreResult.score;
|
497 | }
|
498 |
|
499 | function search(term, candidates, options) {
|
500 | options = { ...defaultOptions,
|
501 | ...options
|
502 | };
|
503 | const trie = {
|
504 | children: {},
|
505 | candidates: [],
|
506 | depth: 0
|
507 | };
|
508 | createSearchTrie(trie, 0, candidates, options);
|
509 | return searchCore(normalize(term, options).normal, trie, options);
|
510 | }
|
511 |
|
512 |
|
513 | class Searcher {
|
514 | constructor(candidates, options) {
|
515 | this.options = Object.assign({}, defaultOptions, options);
|
516 | this.trie = {
|
517 | children: {},
|
518 | candidates: [],
|
519 | depth: 0
|
520 | };
|
521 | createSearchTrie(this.trie, 0, candidates, this.options);
|
522 | this.count = candidates.length;
|
523 | }
|
524 |
|
525 | add(...candidates) {
|
526 | createSearchTrie(this.trie, this.count, candidates, this.options);
|
527 | this.count += candidates.length;
|
528 | }
|
529 |
|
530 | search(term, options) {
|
531 | options = Object.assign({}, this.options, options);
|
532 | return searchCore(normalize(term, this.options).normal, this.trie, options);
|
533 | }
|
534 |
|
535 | }
|
536 |
|
537 | exports.Searcher = Searcher;
|
538 | exports.fuzzy = fuzzy;
|
539 | exports.search = search;
|
540 | exports.sortKind = sortKind;
|