1 | 'use strict';
|
2 |
|
3 | Object.defineProperty(exports, '__esModule', { value: true });
|
4 |
|
5 | function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
|
6 |
|
7 | var split = _interopDefault(require('graphemesplit'));
|
8 |
|
9 | function _defineProperty(obj, key, value) {
|
10 | if (key in obj) {
|
11 | Object.defineProperty(obj, key, {
|
12 | value: value,
|
13 | enumerable: true,
|
14 | configurable: true,
|
15 | writable: true
|
16 | });
|
17 | } else {
|
18 | obj[key] = value;
|
19 | }
|
20 |
|
21 | return obj;
|
22 | }
|
23 |
|
24 | function ownKeys(object, enumerableOnly) {
|
25 | var keys = Object.keys(object);
|
26 |
|
27 | if (Object.getOwnPropertySymbols) {
|
28 | var symbols = Object.getOwnPropertySymbols(object);
|
29 | if (enumerableOnly) symbols = symbols.filter(function (sym) {
|
30 | return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
31 | });
|
32 | keys.push.apply(keys, symbols);
|
33 | }
|
34 |
|
35 | return keys;
|
36 | }
|
37 |
|
38 | function _objectSpread2(target) {
|
39 | for (var i = 1; i < arguments.length; i++) {
|
40 | var source = arguments[i] != null ? arguments[i] : {};
|
41 |
|
42 | if (i % 2) {
|
43 | ownKeys(Object(source), true).forEach(function (key) {
|
44 | _defineProperty(target, key, source[key]);
|
45 | });
|
46 | } else if (Object.getOwnPropertyDescriptors) {
|
47 | Object.defineProperties(target, Object.getOwnPropertyDescriptors(source));
|
48 | } else {
|
49 | ownKeys(Object(source)).forEach(function (key) {
|
50 | Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
51 | });
|
52 | }
|
53 | }
|
54 |
|
55 | return target;
|
56 | }
|
57 |
|
58 | const whitespaceRegex = /^\s+$/;
|
59 | const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/;
|
60 |
|
61 | const defaultOptions = {
|
62 | keySelector: s => s,
|
63 | threshold: .6,
|
64 | ignoreCase: true,
|
65 | ignoreSymbols: true,
|
66 | normalizeWhitespace: true,
|
67 | returnMatchData: false,
|
68 | useDamerau: true,
|
69 | useSellers: true
|
70 | };
|
71 |
|
72 | const noop = () => {};
|
73 |
|
74 | const arrayWrap = item => item instanceof Array ? item : [item];
|
75 |
|
76 |
|
77 | function normalize(string, options) {
|
78 | const lower = options.ignoreCase ? string.toLocaleLowerCase() : string;
|
79 |
|
80 | const normal = [];
|
81 | const map = [];
|
82 | let lastWasWhitespace = true;
|
83 | let length = 0;
|
84 |
|
85 | for (const grapheme of split(lower)) {
|
86 | whitespaceRegex.lastIndex = 0;
|
87 | nonWordRegex.lastIndex = 0;
|
88 |
|
89 | if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
|
90 | if (!lastWasWhitespace) {
|
91 | normal.push(" ");
|
92 | map.push(length);
|
93 | lastWasWhitespace = true;
|
94 | }
|
95 | } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
|
96 | normal.push(grapheme.normalize());
|
97 | map.push(length);
|
98 | lastWasWhitespace = false;
|
99 | }
|
100 |
|
101 | length += grapheme.length;
|
102 | }
|
103 |
|
104 |
|
105 | map.push(string.length);
|
106 |
|
107 | while (normal[normal.length - 1] === " ") {
|
108 | normal.pop();
|
109 | map.pop();
|
110 | }
|
111 |
|
112 | return {
|
113 | original: string,
|
114 | normal,
|
115 | map
|
116 | };
|
117 | }
|
118 |
|
119 |
|
120 | function denormalizeMatchPosition(match, map) {
|
121 | return {
|
122 | index: map[match.start],
|
123 | length: map[match.end + 1] - map[match.start]
|
124 | };
|
125 | }
|
126 |
|
127 |
|
128 | function walkBack(rows, scoreIndex) {
|
129 | if (scoreIndex === 0) {
|
130 | return {
|
131 | index: 0,
|
132 | length: 0
|
133 | };
|
134 | }
|
135 |
|
136 | let start = scoreIndex;
|
137 |
|
138 | for (let i = rows.length - 2; i > 0 && start > 1; i--) {
|
139 | const row = rows[i];
|
140 | start = row[start] < row[start - 1] ? start : start - 1;
|
141 | }
|
142 |
|
143 | return {
|
144 | start: start - 1,
|
145 | end: scoreIndex - 1
|
146 | };
|
147 | }
|
148 |
|
149 | const levUpdateScore = () => true;
|
150 |
|
151 | const sellersUpdateScore = (cur, min) => cur < min;
|
152 |
|
153 | function getLevScore(rows, length) {
|
154 | const lastRow = rows[rows.length - 1];
|
155 | const lastCell = lastRow[length - 1];
|
156 | const scoreLength = Math.max(rows.length, length);
|
157 | return {
|
158 | score: 1 - lastCell / (scoreLength - 1),
|
159 | scoreIndex: length - 1
|
160 | };
|
161 | }
|
162 |
|
163 | function getSellersScore(rows, length) {
|
164 |
|
165 | if (rows.length === 1) {
|
166 | return {
|
167 | score: 1,
|
168 | scoreIndex: 0
|
169 | };
|
170 | }
|
171 |
|
172 | const lastRow = rows[rows.length - 1];
|
173 | let minValue = lastRow[0];
|
174 | let minIndex = 0;
|
175 |
|
176 | for (let i = 1; i < length; i++) {
|
177 | const val = lastRow[i];
|
178 |
|
179 | if (val < minValue) {
|
180 | minValue = val;
|
181 | minIndex = i;
|
182 | }
|
183 | }
|
184 |
|
185 | return {
|
186 | score: 1 - minValue / (rows.length - 1),
|
187 | scoreIndex: minIndex
|
188 | };
|
189 | }
|
190 |
|
191 | function initLevRows(rowCount, columnCount) {
|
192 | const rows = new Array(rowCount);
|
193 |
|
194 | for (let i = 0; i < rowCount; i++) {
|
195 | rows[i] = new Array(columnCount);
|
196 | rows[i][0] = i;
|
197 | }
|
198 |
|
199 | for (let i = 0; i < columnCount; i++) {
|
200 | rows[0][i] = i;
|
201 | }
|
202 |
|
203 | return rows;
|
204 | }
|
205 |
|
206 | function initSellersRows(rowCount, columnCount) {
|
207 | const rows = new Array(rowCount);
|
208 | rows[0] = new Array(columnCount).fill(0);
|
209 |
|
210 | for (let i = 1; i < rowCount; i++) {
|
211 | rows[i] = new Array(columnCount);
|
212 | rows[i][0] = i;
|
213 | }
|
214 |
|
215 | return rows;
|
216 | }
|
217 |
|
218 |
|
219 | function levCore(term, candidate, rows, i, j) {
|
220 | const rowA = rows[i];
|
221 | const rowB = rows[i + 1];
|
222 | const cost = term[i] === candidate[j] ? 0 : 1;
|
223 | let m;
|
224 | let min = rowB[j] + 1;
|
225 |
|
226 | if ((m = rowA[j + 1] + 1) < min) min = m;
|
227 |
|
228 | if ((m = rowA[j] + cost) < min) min = m;
|
229 |
|
230 | rowB[j + 1] = min;
|
231 | }
|
232 |
|
233 |
|
234 |
|
235 | function levenshtein(term, candidate, rows, j) {
|
236 | for (let i = 0; i < term.length; i++) {
|
237 | levCore(term, candidate, rows, i, j);
|
238 | }
|
239 | }
|
240 |
|
241 |
|
242 |
|
243 |
|
244 | function damerauLevenshtein(term, candidate, rows, j) {
|
245 |
|
246 |
|
247 | if (j === 0) {
|
248 | levenshtein(term, candidate, rows, j);
|
249 | return;
|
250 | }
|
251 |
|
252 |
|
253 |
|
254 | if (term.length > 0) {
|
255 | levCore(term, candidate, rows, 0, j);
|
256 | }
|
257 |
|
258 | for (let i = 1; i < term.length; i++) {
|
259 | const rowA = rows[i - 1];
|
260 | const rowB = rows[i];
|
261 | const rowC = rows[i + 1];
|
262 | const cost = term[i] === candidate[j] ? 0 : 1;
|
263 | let m;
|
264 |
|
265 | let min = rowC[j] + 1;
|
266 |
|
267 | if ((m = rowB[j + 1] + 1) < min) min = m;
|
268 |
|
269 | if ((m = rowB[j] + cost) < min) min = m;
|
270 |
|
271 | if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
|
272 | rowC[j + 1] = min;
|
273 | }
|
274 | }
|
275 |
|
276 |
|
277 |
|
278 | function trieInsert(trie, string, item) {
|
279 | let walker = trie;
|
280 |
|
281 | for (let i = 0; i < string.length; i++) {
|
282 | const char = string[i];
|
283 |
|
284 | if (walker.children[char] == null) {
|
285 | walker.children[char] = {
|
286 | children: {},
|
287 | candidates: [],
|
288 | depth: 0
|
289 | };
|
290 | }
|
291 |
|
292 |
|
293 | walker.depth = Math.max(walker.depth, string.length - i);
|
294 |
|
295 | walker = walker.children[char];
|
296 | }
|
297 |
|
298 | walker.candidates.push(item);
|
299 | }
|
300 |
|
301 |
|
302 |
|
303 |
|
304 | function createSearchTrie(trie, index, items, options) {
|
305 | for (const item of items) {
|
306 | const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
|
307 | index,
|
308 | keyIndex,
|
309 | item,
|
310 | normalized: normalize(key, options)
|
311 | }));
|
312 | index++;
|
313 |
|
314 | for (const candidate of candidates) {
|
315 | trieInsert(trie, candidate.normalized.normal, candidate);
|
316 | }
|
317 | }
|
318 | }
|
319 |
|
320 |
|
321 | function compareItems(a, b) {
|
322 | const scoreDiff = b.score - a.score;
|
323 |
|
324 | if (scoreDiff !== 0) {
|
325 | return scoreDiff;
|
326 | }
|
327 |
|
328 | const keyIndexDiff = a.keyIndex - b.keyIndex;
|
329 |
|
330 | if (keyIndexDiff !== 0) {
|
331 | return keyIndexDiff;
|
332 | }
|
333 |
|
334 | const lengthDiff = a.lengthDiff - b.lengthDiff;
|
335 |
|
336 | if (lengthDiff !== 0) {
|
337 | return lengthDiff;
|
338 | }
|
339 |
|
340 | return a.index - b.index;
|
341 | }
|
342 |
|
343 |
|
344 | function addResult(results, resultMap, candidate, score, match, lengthDiff) {
|
345 | const scoredItem = {
|
346 | item: candidate.item,
|
347 | normalized: candidate.normalized,
|
348 | score,
|
349 | match,
|
350 | index: candidate.index,
|
351 | keyIndex: candidate.keyIndex,
|
352 | lengthDiff
|
353 | };
|
354 |
|
355 | if (resultMap[candidate.index] == null) {
|
356 | resultMap[candidate.index] = results.length;
|
357 | results.push(scoredItem);
|
358 | } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
|
359 | results[resultMap[candidate.index]] = scoredItem;
|
360 | }
|
361 | }
|
362 |
|
363 | const getLevLength = Math.max;
|
364 |
|
365 | const getSellersLength = termLength => termLength;
|
366 |
|
367 |
|
368 | function levShouldContinue(node, pos, term, threshold, sValue) {
|
369 |
|
370 | const p1 = pos + sValue;
|
371 |
|
372 | const p2 = Math.min(term.length, pos + node.depth + 1);
|
373 |
|
374 |
|
375 | const length = Math.ceil((p1 + p2) / 2);
|
376 | const bestPossibleValue = length - p2;
|
377 | return 1 - bestPossibleValue / length >= threshold;
|
378 | }
|
379 |
|
380 | function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
|
381 | const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
|
382 | return 1 - bestPossibleValue / term.length >= threshold;
|
383 | }
|
384 |
|
385 |
|
386 | function searchRecurse(node, acc, len, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue) {
|
387 |
|
388 | scoreMethods.score(term, acc, rows, len - 1);
|
389 |
|
390 | const lastIndex = len;
|
391 | const lastValue = rows[rows.length - 1][lastIndex];
|
392 |
|
393 | if (scoreMethods.shouldUpdateScore(lastValue, sValue)) {
|
394 | sIndex = lastIndex;
|
395 | sValue = lastValue;
|
396 | }
|
397 |
|
398 |
|
399 | if (node.candidates.length > 0) {
|
400 | const length = scoreMethods.getLength(term.length, len);
|
401 | const score = 1 - sValue / length;
|
402 |
|
403 | if (score >= options.threshold) {
|
404 | const match = scoreMethods.walkBack(rows, sIndex);
|
405 | const lengthDiff = Math.abs(len - term.length);
|
406 |
|
407 | for (const candidate of node.candidates) {
|
408 | addResult(results, resultMap, candidate, score, match, lengthDiff);
|
409 | }
|
410 | }
|
411 | }
|
412 |
|
413 |
|
414 | for (const key in node.children) {
|
415 | const child = node.children[key];
|
416 |
|
417 | if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
|
418 | acc[len] = key;
|
419 | searchRecurse(child, acc, len + 1, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue);
|
420 | }
|
421 | }
|
422 | }
|
423 |
|
424 |
|
425 |
|
426 | function searchCore(term, trie, options) {
|
427 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
428 | const scoreMethods = {
|
429 | score: options.useDamerau ? damerauLevenshtein : levenshtein,
|
430 | getLength: options.useSellers ? getSellersLength : getLevLength,
|
431 | shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
|
432 | shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
|
433 | walkBack: options.returnMatchData && options.useSellers ? walkBack : noop
|
434 | };
|
435 |
|
436 | const resultMap = {};
|
437 | const results = [];
|
438 | const rows = initMethod(term.length + 1, trie.depth + 1);
|
439 |
|
440 | if (options.threshold <= 0 || term.length === 0) {
|
441 | for (const candidate of trie.candidates) {
|
442 | addResult(results, resultMap, candidate, 0, {
|
443 | index: 0,
|
444 | length: 0
|
445 | }, term.length);
|
446 | }
|
447 | }
|
448 |
|
449 | for (const key in trie.children) {
|
450 | const value = trie.children[key];
|
451 | const acc = new Array(trie.depth);
|
452 | acc[0] = key;
|
453 | searchRecurse(value, acc, 1, term, scoreMethods, rows, results, resultMap, options, 0, term.length);
|
454 | }
|
455 |
|
456 | const sorted = results.sort(compareItems);
|
457 |
|
458 | if (options.returnMatchData) {
|
459 | const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
|
460 | return sorted.map(candidate => ({
|
461 | item: candidate.item,
|
462 | original: candidate.normalized.original,
|
463 | key: candidate.normalized.normal.join(""),
|
464 | score: candidate.score,
|
465 | match: denormalize(candidate.match, candidate.normalized.map)
|
466 | }));
|
467 | }
|
468 |
|
469 | return sorted.map(candidate => candidate.item);
|
470 | }
|
471 |
|
472 |
|
473 | function fuzzy(term, candidate, options) {
|
474 | options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
|
475 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
476 | const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
|
477 | const getScore = options.useSellers ? getSellersScore : getLevScore;
|
478 | term = normalize(term, options).normal;
|
479 | const normalized = normalize(candidate, options);
|
480 | const rows = initMethod(term.length + 1, normalized.normal.length + 1);
|
481 |
|
482 | for (let j = 0; j < normalized.normal.length; j++) {
|
483 | scoreMethod(term, normalized.normal, rows, j);
|
484 | }
|
485 |
|
486 | const scoreResult = getScore(rows, normalized.normal.length + 1);
|
487 | return options.returnMatchData ? {
|
488 | item: candidate,
|
489 | original: normalized.original,
|
490 | key: normalized.normal.join(""),
|
491 | score: scoreResult.score,
|
492 | match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
|
493 | } : scoreResult.score;
|
494 | }
|
495 |
|
496 | function search(term, candidates, options) {
|
497 | options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
|
498 | const trie = {
|
499 | children: {},
|
500 | candidates: [],
|
501 | depth: 0
|
502 | };
|
503 | createSearchTrie(trie, 0, candidates, options);
|
504 | return searchCore(normalize(term, options).normal, trie, options);
|
505 | }
|
506 |
|
507 |
|
508 | class Searcher {
|
509 | constructor(candidates, options) {
|
510 | this.options = Object.assign({}, defaultOptions, options);
|
511 | this.trie = {
|
512 | children: {},
|
513 | candidates: [],
|
514 | depth: 0
|
515 | };
|
516 | createSearchTrie(this.trie, 0, candidates, this.options);
|
517 | this.count = candidates.length;
|
518 | }
|
519 |
|
520 | add(...candidates) {
|
521 | createSearchTrie(this.trie, this.count, candidates, this.options);
|
522 | this.count += candidates.length;
|
523 | }
|
524 |
|
525 | search(term, options) {
|
526 | options = Object.assign({}, this.options, options);
|
527 | return searchCore(normalize(term, this.options).normal, this.trie, options);
|
528 | }
|
529 |
|
530 | }
|
531 |
|
532 | exports.Searcher = Searcher;
|
533 | exports.fuzzy = fuzzy;
|
534 | exports.search = search;
|