UNPKG

15.1 kBJavaScriptView Raw
1'use strict';
2
3Object.defineProperty(exports, '__esModule', { value: true });
4
5function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
6
7var split = _interopDefault(require('graphemesplit'));
8
9const whitespaceRegex = /^\s+$/;
10const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/;
11const sortKind = {
12 insertOrder: "insertOrder",
13 bestMatch: "bestMatch"
14}; // the default options, which will be used for any unset option
15
16const defaultOptions = {
17 keySelector: s => s,
18 threshold: .6,
19 ignoreCase: true,
20 ignoreSymbols: true,
21 normalizeWhitespace: true,
22 returnMatchData: false,
23 useDamerau: true,
24 useSellers: true,
25 sortBy: sortKind.bestMatch
26};
27
28const noop = () => {};
29
30const arrayWrap = item => item instanceof Array ? item : [item]; // return normalized string, with map included
31
32
33function normalize(string, options) {
34 const lower = options.ignoreCase ? string.toLocaleLowerCase() : string; // track transformations
35
36 const normal = [];
37 const map = [];
38 let lastWasWhitespace = true;
39 let length = 0;
40
41 for (const grapheme of split(lower)) {
42 whitespaceRegex.lastIndex = 0;
43 nonWordRegex.lastIndex = 0;
44
45 if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
46 if (!lastWasWhitespace) {
47 normal.push(" ");
48 map.push(length);
49 lastWasWhitespace = true;
50 }
51 } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
52 normal.push(grapheme.normalize());
53 map.push(length);
54 lastWasWhitespace = false;
55 }
56
57 length += grapheme.length;
58 } // add the end of the string
59
60
61 map.push(string.length);
62
63 while (normal[normal.length - 1] === " ") {
64 normal.pop();
65 map.pop();
66 }
67
68 return {
69 original: string,
70 normal,
71 map
72 };
73} // translates a match to the original string
74
75
76function denormalizeMatchPosition(match, map) {
77 return {
78 index: map[match.start],
79 length: map[match.end + 1] - map[match.start]
80 };
81} // walks back up the matrix to find the match index and length
82
83
84function walkBack(rows, scoreIndex) {
85 if (scoreIndex === 0) {
86 return {
87 index: 0,
88 length: 0
89 };
90 }
91
92 let start = scoreIndex;
93
94 for (let i = rows.length - 2; i > 0 && start > 1; i--) {
95 const row = rows[i];
96 start = row[start] < row[start - 1] ? start : start - 1;
97 }
98
99 return {
100 start: start - 1,
101 end: scoreIndex - 1
102 };
103} // walkback is a noop for non-sellers, but should still return an object
104
105
106function noopWalkback() {
107 return {
108 start: 0,
109 end: 0
110 };
111}
112
113const levUpdateScore = () => true;
114
115const sellersUpdateScore = (cur, min) => cur < min;
116
117function getLevScore(rows, length) {
118 const lastRow = rows[rows.length - 1];
119 const lastCell = lastRow[length - 1];
120 const scoreLength = Math.max(rows.length, length);
121 return {
122 score: 1 - lastCell / (scoreLength - 1),
123 scoreIndex: length - 1
124 };
125}
126
127function getSellersScore(rows, length) {
128 // search term was empty string, return perfect score
129 if (rows.length === 1) {
130 return {
131 score: 1,
132 scoreIndex: 0
133 };
134 }
135
136 const lastRow = rows[rows.length - 1];
137 let minValue = lastRow[0];
138 let minIndex = 0;
139
140 for (let i = 1; i < length; i++) {
141 const val = lastRow[i];
142
143 if (val < minValue) {
144 minValue = val;
145 minIndex = i;
146 }
147 }
148
149 return {
150 score: 1 - minValue / (rows.length - 1),
151 scoreIndex: minIndex
152 };
153}
154
155function initLevRows(rowCount, columnCount) {
156 const rows = new Array(rowCount);
157
158 for (let i = 0; i < rowCount; i++) {
159 rows[i] = new Array(columnCount);
160 rows[i][0] = i;
161 }
162
163 for (let i = 0; i < columnCount; i++) {
164 rows[0][i] = i;
165 }
166
167 return rows;
168}
169
170function initSellersRows(rowCount, columnCount) {
171 const rows = new Array(rowCount);
172 rows[0] = new Array(columnCount).fill(0);
173
174 for (let i = 1; i < rowCount; i++) {
175 rows[i] = new Array(columnCount);
176 rows[i][0] = i;
177 }
178
179 return rows;
180} // the content of the innermost loop of levenshtein
181
182
183function levCore(term, candidate, rows, i, j) {
184 const rowA = rows[i];
185 const rowB = rows[i + 1];
186 const cost = term[i] === candidate[j] ? 0 : 1;
187 let m;
188 let min = rowB[j] + 1; // insertion
189
190 if ((m = rowA[j + 1] + 1) < min) min = m; // deletion
191
192 if ((m = rowA[j] + cost) < min) min = m; // substitution
193
194 rowB[j + 1] = min;
195} // runtime complexity: O(mn) where m and n are the lengths of term and candidate, respectively
196// Note: this method only runs on a single column
197
198
199function levenshtein(term, candidate, rows, j) {
200 for (let i = 0; i < term.length; i++) {
201 levCore(term, candidate, rows, i, j);
202 }
203} // has all the runtime characteristics of the above, but punishes transpositions less,
204// resulting in better tolerance to those types of typos
205// Note: this method only runs on a single column
206
207
208function damerauLevenshtein(term, candidate, rows, j) {
209 // if j === 0, we can't check for transpositions,
210 // so use normal levenshtein instead
211 if (j === 0) {
212 levenshtein(term, candidate, rows, j);
213 return;
214 } // for i === 0, we also can't check for transpositions, so calculate
215 // the first row using normal levenshtein as well
216
217
218 if (term.length > 0) {
219 levCore(term, candidate, rows, 0, j);
220 }
221
222 for (let i = 1; i < term.length; i++) {
223 const rowA = rows[i - 1];
224 const rowB = rows[i];
225 const rowC = rows[i + 1];
226 const cost = term[i] === candidate[j] ? 0 : 1;
227 let m; // insertion
228
229 let min = rowC[j] + 1; // deletion
230
231 if ((m = rowB[j + 1] + 1) < min) min = m; // substitution
232
233 if ((m = rowB[j] + cost) < min) min = m; // transposition
234
235 if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
236 rowC[j + 1] = min;
237 }
238} // method for creating a trie from search candidates
239// using a trie can significantly improve search time
240
241
242function trieInsert(trie, string, item) {
243 let walker = trie;
244
245 for (let i = 0; i < string.length; i++) {
246 const char = string[i]; // add child node if not already present
247
248 if (walker.children[char] == null) {
249 walker.children[char] = {
250 children: {},
251 candidates: [],
252 depth: 0
253 };
254 } // log max depth of this subtree
255
256
257 walker.depth = Math.max(walker.depth, string.length - i); // step into child node
258
259 walker = walker.children[char];
260 }
261
262 walker.candidates.push(item);
263} // transforms a list of candidates into objects with normalized search keys,
264// and inserts them into a trie
265// the keySelector is used to pick strings from an object to search by
266
267
268function createSearchTrie(trie, index, items, options) {
269 for (const item of items) {
270 const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
271 index,
272 keyIndex,
273 item,
274 normalized: normalize(key, options)
275 }));
276 index++;
277
278 for (const candidate of candidates) {
279 trieInsert(trie, candidate.normalized.normal, candidate);
280 }
281 }
282} // scored item comparator
283
284
285function compareItemsBestScore(a, b) {
286 // highest priority is raw levenshtein score
287 const scoreDiff = b.score - a.score;
288
289 if (scoreDiff !== 0) {
290 return scoreDiff;
291 } // ties are broken by earlier match positions
292
293
294 const matchPosDiff = a.match.start - b.match.start;
295
296 if (matchPosDiff !== 0) {
297 return matchPosDiff;
298 } // prioritize earlier keys
299
300
301 const keyIndexDiff = a.keyIndex - b.keyIndex;
302
303 if (keyIndexDiff !== 0) {
304 return keyIndexDiff;
305 } // lastly, break ties by preferring the closer length match
306
307
308 const lengthDiff = a.lengthDiff - b.lengthDiff;
309
310 if (lengthDiff !== 0) {
311 return lengthDiff;
312 } // if all else fails, resort to insertion order
313
314
315 return compareItemsInsertOrder(a, b);
316}
317
318function compareItemsInsertOrder(a, b) {
319 return a.index - b.index;
320}
321
322function getCompareFunc(sortBy) {
323 switch (sortBy) {
324 case sortKind.bestMatch:
325 return compareItemsBestScore;
326
327 case sortKind.insertOrder:
328 return compareItemsInsertOrder;
329
330 default:
331 throw new Error(`unknown sortBy method ${sortBy}`);
332 }
333} // dedupes and adds results to the results list/map
334
335
336function addResult(results, resultMap, candidate, score, match, lengthDiff, compareItems) {
337 const scoredItem = {
338 item: candidate.item,
339 normalized: candidate.normalized,
340 score,
341 match,
342 index: candidate.index,
343 keyIndex: candidate.keyIndex,
344 lengthDiff
345 };
346
347 if (resultMap[candidate.index] == null) {
348 resultMap[candidate.index] = results.length;
349 results.push(scoredItem);
350 } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
351 results[resultMap[candidate.index]] = scoredItem;
352 }
353}
354
355const getLevLength = Math.max;
356
357const getSellersLength = termLength => termLength; // skip any subtrees for which it is impossible to score >= threshold
358
359
360function levShouldContinue(node, pos, term, threshold, sValue) {
361 // earliest point (length) at which sValue could return to 0
362 const p1 = pos + sValue; // point (length) at which string lengths would match
363
364 const p2 = Math.min(term.length, pos + node.depth + 1); // the best score possible is the string which minimizes the value
365 // max(sValue, strLenDiff), which is always halfway between p1 and p2
366
367 const length = Math.ceil((p1 + p2) / 2);
368 const bestPossibleValue = length - p2;
369 return 1 - bestPossibleValue / length >= threshold;
370}
371
372function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
373 const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
374 return 1 - bestPossibleValue / term.length >= threshold;
375} // (pseudo) recursively walk the trie
376
377
378function searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options) {
379 const stack = [];
380
381 for (const key in trie.children) {
382 const node = trie.children[key];
383 stack.push([node, 1, key, 0, term.length]);
384 }
385
386 const acc = new Array(trie.depth);
387
388 while (stack.length !== 0) {
389 const [node, len, char, si, sv] = stack.pop();
390 acc[len - 1] = char; // build rows
391
392 scoreMethods.score(term, acc, rows, len - 1); // track best score and position
393
394 const lastIndex = len;
395 const lastValue = rows[rows.length - 1][lastIndex];
396 let sIndex = si,
397 sValue = sv;
398
399 if (scoreMethods.shouldUpdateScore(lastValue, sv)) {
400 sIndex = lastIndex;
401 sValue = lastValue;
402 } // insert results
403
404
405 if (node.candidates.length > 0) {
406 const length = scoreMethods.getLength(term.length, len);
407 const score = 1 - sValue / length;
408
409 if (score >= options.threshold) {
410 const match = walkBack(rows, sIndex);
411 const lengthDiff = Math.abs(len - term.length);
412
413 for (const candidate of node.candidates) {
414 addResult(results, resultMap, candidate, score, match, lengthDiff, scoreMethods.compareItems);
415 }
416 }
417 } // recurse for children
418
419
420 for (const key in node.children) {
421 const child = node.children[key];
422
423 if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
424 stack.push([child, len + 1, key, sIndex, sValue]);
425 }
426 }
427 }
428} // the core match finder: returns a sorted, filtered list of matches
429// this does not normalize input, requiring users to normalize themselves
430
431
432function searchCore(term, trie, options) {
433 const initMethod = options.useSellers ? initSellersRows : initLevRows;
434 const scoreMethods = {
435 score: options.useDamerau ? damerauLevenshtein : levenshtein,
436 getLength: options.useSellers ? getSellersLength : getLevLength,
437 shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
438 shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
439 walkBack: options.useSellers ? walkBack : noopWalkback,
440 compareItems: getCompareFunc(options.sortBy)
441 }; // walk the trie, scoring and storing the candidates
442
443 const resultMap = {};
444 const results = [];
445 const rows = initMethod(term.length + 1, trie.depth + 1);
446
447 if (options.threshold <= 0 || term.length === 0) {
448 for (const candidate of trie.candidates) {
449 addResult(results, resultMap, candidate, 0, {
450 index: 0,
451 length: 0
452 }, term.length, scoreMethods.compareItems);
453 }
454 }
455
456 searchRecurse(trie, term, scoreMethods, rows, results, resultMap, options);
457 const sorted = results.sort(scoreMethods.compareItems);
458
459 if (options.returnMatchData) {
460 const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
461 return sorted.map(candidate => ({
462 item: candidate.item,
463 original: candidate.normalized.original,
464 key: candidate.normalized.normal.join(""),
465 score: candidate.score,
466 match: denormalize(candidate.match, candidate.normalized.map)
467 }));
468 }
469
470 return sorted.map(candidate => candidate.item);
471} // wrapper for exporting sellers while allowing options to be passed in
472
473
474function fuzzy(term, candidate, options) {
475 options = { ...defaultOptions,
476 ...options
477 };
478 const initMethod = options.useSellers ? initSellersRows : initLevRows;
479 const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
480 const getScore = options.useSellers ? getSellersScore : getLevScore;
481 term = normalize(term, options).normal;
482 const normalized = normalize(candidate, options);
483 const rows = initMethod(term.length + 1, normalized.normal.length + 1);
484
485 for (let j = 0; j < normalized.normal.length; j++) {
486 scoreMethod(term, normalized.normal, rows, j);
487 }
488
489 const scoreResult = getScore(rows, normalized.normal.length + 1);
490 return options.returnMatchData ? {
491 item: candidate,
492 original: normalized.original,
493 key: normalized.normal.join(""),
494 score: scoreResult.score,
495 match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
496 } : scoreResult.score;
497} // simple one-off search. Useful if you don't expect to use the same candidate list again
498
499function search(term, candidates, options) {
500 options = { ...defaultOptions,
501 ...options
502 };
503 const trie = {
504 children: {},
505 candidates: [],
506 depth: 0
507 };
508 createSearchTrie(trie, 0, candidates, options);
509 return searchCore(normalize(term, options).normal, trie, options);
510} // class that improves performance of searching the same set multiple times
511// normalizes the strings and caches the result for future calls
512
513class Searcher {
514 constructor(candidates, options) {
515 this.options = Object.assign({}, defaultOptions, options);
516 this.trie = {
517 children: {},
518 candidates: [],
519 depth: 0
520 };
521 createSearchTrie(this.trie, 0, candidates, this.options);
522 this.count = candidates.length;
523 }
524
525 add(...candidates) {
526 createSearchTrie(this.trie, this.count, candidates, this.options);
527 this.count += candidates.length;
528 }
529
530 search(term, options) {
531 options = Object.assign({}, this.options, options);
532 return searchCore(normalize(term, this.options).normal, this.trie, options);
533 }
534
535}
536
537exports.Searcher = Searcher;
538exports.fuzzy = fuzzy;
539exports.search = search;
540exports.sortKind = sortKind;