UNPKG

15.2 kBJavaScriptView Raw
1'use strict';
2
3Object.defineProperty(exports, '__esModule', { value: true });
4
5function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
6
7var split = _interopDefault(require('graphemesplit'));
8
9function _defineProperty(obj, key, value) {
10 if (key in obj) {
11 Object.defineProperty(obj, key, {
12 value: value,
13 enumerable: true,
14 configurable: true,
15 writable: true
16 });
17 } else {
18 obj[key] = value;
19 }
20
21 return obj;
22}
23
24function ownKeys(object, enumerableOnly) {
25 var keys = Object.keys(object);
26
27 if (Object.getOwnPropertySymbols) {
28 var symbols = Object.getOwnPropertySymbols(object);
29 if (enumerableOnly) symbols = symbols.filter(function (sym) {
30 return Object.getOwnPropertyDescriptor(object, sym).enumerable;
31 });
32 keys.push.apply(keys, symbols);
33 }
34
35 return keys;
36}
37
38function _objectSpread2(target) {
39 for (var i = 1; i < arguments.length; i++) {
40 var source = arguments[i] != null ? arguments[i] : {};
41
42 if (i % 2) {
43 ownKeys(Object(source), true).forEach(function (key) {
44 _defineProperty(target, key, source[key]);
45 });
46 } else if (Object.getOwnPropertyDescriptors) {
47 Object.defineProperties(target, Object.getOwnPropertyDescriptors(source));
48 } else {
49 ownKeys(Object(source)).forEach(function (key) {
50 Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
51 });
52 }
53 }
54
55 return target;
56}
57
58const whitespaceRegex = /^\s+$/;
59const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/; // the default options, which will be used for any unset option
60
61const defaultOptions = {
62 keySelector: s => s,
63 threshold: .6,
64 ignoreCase: true,
65 ignoreSymbols: true,
66 normalizeWhitespace: true,
67 returnMatchData: false,
68 useDamerau: true,
69 useSellers: true
70};
71
72const noop = () => {};
73
74const arrayWrap = item => item instanceof Array ? item : [item]; // return normalized string, with map included
75
76
77function normalize(string, options) {
78 const lower = options.ignoreCase ? string.toLocaleLowerCase() : string; // track transformations
79
80 const normal = [];
81 const map = [];
82 let lastWasWhitespace = true;
83 let length = 0;
84
85 for (const grapheme of split(lower)) {
86 whitespaceRegex.lastIndex = 0;
87 nonWordRegex.lastIndex = 0;
88
89 if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
90 if (!lastWasWhitespace) {
91 normal.push(" ");
92 map.push(length);
93 lastWasWhitespace = true;
94 }
95 } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
96 normal.push(grapheme.normalize());
97 map.push(length);
98 lastWasWhitespace = false;
99 }
100
101 length += grapheme.length;
102 } // add the end of the string
103
104
105 map.push(string.length);
106
107 while (normal[normal.length - 1] === " ") {
108 normal.pop();
109 map.pop();
110 }
111
112 return {
113 original: string,
114 normal,
115 map
116 };
117} // translates a match to the original string
118
119
120function denormalizeMatchPosition(match, map) {
121 return {
122 index: map[match.start],
123 length: map[match.end + 1] - map[match.start]
124 };
125} // walks back up the matrix to find the match index and length
126
127
128function walkBack(rows, scoreIndex) {
129 if (scoreIndex === 0) {
130 return {
131 index: 0,
132 length: 0
133 };
134 }
135
136 let start = scoreIndex;
137
138 for (let i = rows.length - 2; i > 0 && start > 1; i--) {
139 const row = rows[i];
140 start = row[start] < row[start - 1] ? start : start - 1;
141 }
142
143 return {
144 start: start - 1,
145 end: scoreIndex - 1
146 };
147}
148
149const levUpdateScore = () => true;
150
151const sellersUpdateScore = (cur, min) => cur < min;
152
153function getLevScore(rows, length) {
154 const lastRow = rows[rows.length - 1];
155 const lastCell = lastRow[length - 1];
156 const scoreLength = Math.max(rows.length, length);
157 return {
158 score: 1 - lastCell / (scoreLength - 1),
159 scoreIndex: length - 1
160 };
161}
162
163function getSellersScore(rows, length) {
164 // search term was empty string, return perfect score
165 if (rows.length === 1) {
166 return {
167 score: 1,
168 scoreIndex: 0
169 };
170 }
171
172 const lastRow = rows[rows.length - 1];
173 let minValue = lastRow[0];
174 let minIndex = 0;
175
176 for (let i = 1; i < length; i++) {
177 const val = lastRow[i];
178
179 if (val < minValue) {
180 minValue = val;
181 minIndex = i;
182 }
183 }
184
185 return {
186 score: 1 - minValue / (rows.length - 1),
187 scoreIndex: minIndex
188 };
189}
190
191function initLevRows(rowCount, columnCount) {
192 const rows = new Array(rowCount);
193
194 for (let i = 0; i < rowCount; i++) {
195 rows[i] = new Array(columnCount);
196 rows[i][0] = i;
197 }
198
199 for (let i = 0; i < columnCount; i++) {
200 rows[0][i] = i;
201 }
202
203 return rows;
204}
205
206function initSellersRows(rowCount, columnCount) {
207 const rows = new Array(rowCount);
208 rows[0] = new Array(columnCount).fill(0);
209
210 for (let i = 1; i < rowCount; i++) {
211 rows[i] = new Array(columnCount);
212 rows[i][0] = i;
213 }
214
215 return rows;
216} // the content of the innermost loop of levenshtein
217
218
219function levCore(term, candidate, rows, i, j) {
220 const rowA = rows[i];
221 const rowB = rows[i + 1];
222 const cost = term[i] === candidate[j] ? 0 : 1;
223 let m;
224 let min = rowB[j] + 1; // insertion
225
226 if ((m = rowA[j + 1] + 1) < min) min = m; // deletion
227
228 if ((m = rowA[j] + cost) < min) min = m; // substitution
229
230 rowB[j + 1] = min;
231} // runtime complexity: O(mn) where m and n are the lengths of term and candidate, respectively
232// Note: this method only runs on a single column
233
234
235function levenshtein(term, candidate, rows, j) {
236 for (let i = 0; i < term.length; i++) {
237 levCore(term, candidate, rows, i, j);
238 }
239} // has all the runtime characteristics of the above, but punishes transpositions less,
240// resulting in better tolerance to those types of typos
241// Note: this method only runs on a single column
242
243
244function damerauLevenshtein(term, candidate, rows, j) {
245 // if j === 0, we can't check for transpositions,
246 // so use normal levenshtein instead
247 if (j === 0) {
248 levenshtein(term, candidate, rows, j);
249 return;
250 } // for i === 0, we also can't check for transpositions, so calculate
251 // the first row using normal levenshtein as well
252
253
254 if (term.length > 0) {
255 levCore(term, candidate, rows, 0, j);
256 }
257
258 for (let i = 1; i < term.length; i++) {
259 const rowA = rows[i - 1];
260 const rowB = rows[i];
261 const rowC = rows[i + 1];
262 const cost = term[i] === candidate[j] ? 0 : 1;
263 let m; // insertion
264
265 let min = rowC[j] + 1; // deletion
266
267 if ((m = rowB[j + 1] + 1) < min) min = m; // substitution
268
269 if ((m = rowB[j] + cost) < min) min = m; // transposition
270
271 if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
272 rowC[j + 1] = min;
273 }
274} // method for creating a trie from search candidates
275// using a trie can significantly improve search time
276
277
278function trieInsert(trie, string, item) {
279 let walker = trie;
280
281 for (let i = 0; i < string.length; i++) {
282 const char = string[i]; // add child node if not already present
283
284 if (walker.children[char] == null) {
285 walker.children[char] = {
286 children: {},
287 candidates: [],
288 depth: 0
289 };
290 } // log max depth of this subtree
291
292
293 walker.depth = Math.max(walker.depth, string.length - i); // step into child node
294
295 walker = walker.children[char];
296 }
297
298 walker.candidates.push(item);
299} // transforms a list of candidates into objects with normalized search keys,
300// and inserts them into a trie
301// the keySelector is used to pick strings from an object to search by
302
303
304function createSearchTrie(trie, index, items, options) {
305 for (const item of items) {
306 const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
307 index,
308 keyIndex,
309 item,
310 normalized: normalize(key, options)
311 }));
312 index++;
313
314 for (const candidate of candidates) {
315 trieInsert(trie, candidate.normalized.normal, candidate);
316 }
317 }
318} // scored item comparator
319
320
321function compareItems(a, b) {
322 const scoreDiff = b.score - a.score;
323
324 if (scoreDiff !== 0) {
325 return scoreDiff;
326 }
327
328 const keyIndexDiff = a.keyIndex - b.keyIndex;
329
330 if (keyIndexDiff !== 0) {
331 return keyIndexDiff;
332 }
333
334 const lengthDiff = a.lengthDiff - b.lengthDiff;
335
336 if (lengthDiff !== 0) {
337 return lengthDiff;
338 }
339
340 return a.index - b.index;
341} // dedupes and adds results to the results list/map
342
343
344function addResult(results, resultMap, candidate, score, match, lengthDiff) {
345 const scoredItem = {
346 item: candidate.item,
347 normalized: candidate.normalized,
348 score,
349 match,
350 index: candidate.index,
351 keyIndex: candidate.keyIndex,
352 lengthDiff
353 };
354
355 if (resultMap[candidate.index] == null) {
356 resultMap[candidate.index] = results.length;
357 results.push(scoredItem);
358 } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
359 results[resultMap[candidate.index]] = scoredItem;
360 }
361}
362
363const getLevLength = Math.max;
364
365const getSellersLength = termLength => termLength; // skip any subtrees for which it is impossible to score >= threshold
366
367
368function levShouldContinue(node, pos, term, threshold, sValue) {
369 // earliest point (length) at which sValue could return to 0
370 const p1 = pos + sValue; // point (length) at which string lengths would match
371
372 const p2 = Math.min(term.length, pos + node.depth + 1); // the best score possible is the string which minimizes the value
373 // max(sValue, strLenDiff), which is always halfway between p1 and p2
374
375 const length = Math.ceil((p1 + p2) / 2);
376 const bestPossibleValue = length - p2;
377 return 1 - bestPossibleValue / length >= threshold;
378}
379
380function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
381 const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
382 return 1 - bestPossibleValue / term.length >= threshold;
383} // recursively walk the trie
384
385
386function searchRecurse(node, acc, len, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue) {
387 // build rows
388 scoreMethods.score(term, acc, rows, len - 1); // track best score and position
389
390 const lastIndex = len;
391 const lastValue = rows[rows.length - 1][lastIndex];
392
393 if (scoreMethods.shouldUpdateScore(lastValue, sValue)) {
394 sIndex = lastIndex;
395 sValue = lastValue;
396 } // insert results
397
398
399 if (node.candidates.length > 0) {
400 const length = scoreMethods.getLength(term.length, len);
401 const score = 1 - sValue / length;
402
403 if (score >= options.threshold) {
404 const match = scoreMethods.walkBack(rows, sIndex);
405 const lengthDiff = Math.abs(len - term.length);
406
407 for (const candidate of node.candidates) {
408 addResult(results, resultMap, candidate, score, match, lengthDiff);
409 }
410 }
411 } // recurse for children
412
413
414 for (const key in node.children) {
415 const child = node.children[key];
416
417 if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
418 acc[len] = key;
419 searchRecurse(child, acc, len + 1, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue);
420 }
421 }
422} // the core match finder: returns a sorted, filtered list of matches
423// this does not normalize input, requiring users to normalize themselves
424
425
426function searchCore(term, trie, options) {
427 const initMethod = options.useSellers ? initSellersRows : initLevRows;
428 const scoreMethods = {
429 score: options.useDamerau ? damerauLevenshtein : levenshtein,
430 getLength: options.useSellers ? getSellersLength : getLevLength,
431 shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
432 shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
433 walkBack: options.returnMatchData && options.useSellers ? walkBack : noop
434 }; // walk the trie, scoring and storing the candidates
435
436 const resultMap = {};
437 const results = [];
438 const rows = initMethod(term.length + 1, trie.depth + 1);
439
440 if (options.threshold <= 0 || term.length === 0) {
441 for (const candidate of trie.candidates) {
442 addResult(results, resultMap, candidate, 0, {
443 index: 0,
444 length: 0
445 }, term.length);
446 }
447 }
448
449 for (const key in trie.children) {
450 const value = trie.children[key];
451 const acc = new Array(trie.depth);
452 acc[0] = key;
453 searchRecurse(value, acc, 1, term, scoreMethods, rows, results, resultMap, options, 0, term.length);
454 }
455
456 const sorted = results.sort(compareItems);
457
458 if (options.returnMatchData) {
459 const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
460 return sorted.map(candidate => ({
461 item: candidate.item,
462 original: candidate.normalized.original,
463 key: candidate.normalized.normal.join(""),
464 score: candidate.score,
465 match: denormalize(candidate.match, candidate.normalized.map)
466 }));
467 }
468
469 return sorted.map(candidate => candidate.item);
470} // wrapper for exporting sellers while allowing options to be passed in
471
472
473function fuzzy(term, candidate, options) {
474 options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
475 const initMethod = options.useSellers ? initSellersRows : initLevRows;
476 const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
477 const getScore = options.useSellers ? getSellersScore : getLevScore;
478 term = normalize(term, options).normal;
479 const normalized = normalize(candidate, options);
480 const rows = initMethod(term.length + 1, normalized.normal.length + 1);
481
482 for (let j = 0; j < normalized.normal.length; j++) {
483 scoreMethod(term, normalized.normal, rows, j);
484 }
485
486 const scoreResult = getScore(rows, normalized.normal.length + 1);
487 return options.returnMatchData ? {
488 item: candidate,
489 original: normalized.original,
490 key: normalized.normal.join(""),
491 score: scoreResult.score,
492 match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
493 } : scoreResult.score;
494} // simple one-off search. Useful if you don't expect to use the same candidate list again
495
496function search(term, candidates, options) {
497 options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
498 const trie = {
499 children: {},
500 candidates: [],
501 depth: 0
502 };
503 createSearchTrie(trie, 0, candidates, options);
504 return searchCore(normalize(term, options).normal, trie, options);
505} // class that improves performance of searching the same set multiple times
506// normalizes the strings and caches the result for future calls
507
508class Searcher {
509 constructor(candidates, options) {
510 this.options = Object.assign({}, defaultOptions, options);
511 this.trie = {
512 children: {},
513 candidates: [],
514 depth: 0
515 };
516 createSearchTrie(this.trie, 0, candidates, this.options);
517 this.count = candidates.length;
518 }
519
520 add(...candidates) {
521 createSearchTrie(this.trie, this.count, candidates, this.options);
522 this.count += candidates.length;
523 }
524
525 search(term, options) {
526 options = Object.assign({}, this.options, options);
527 return searchCore(normalize(term, this.options).normal, this.trie, options);
528 }
529
530}
531
532exports.Searcher = Searcher;
533exports.fuzzy = fuzzy;
534exports.search = search;