1 | import split from 'graphemesplit';
|
2 |
|
3 | function _defineProperty(obj, key, value) {
|
4 | if (key in obj) {
|
5 | Object.defineProperty(obj, key, {
|
6 | value: value,
|
7 | enumerable: true,
|
8 | configurable: true,
|
9 | writable: true
|
10 | });
|
11 | } else {
|
12 | obj[key] = value;
|
13 | }
|
14 |
|
15 | return obj;
|
16 | }
|
17 |
|
18 | function ownKeys(object, enumerableOnly) {
|
19 | var keys = Object.keys(object);
|
20 |
|
21 | if (Object.getOwnPropertySymbols) {
|
22 | var symbols = Object.getOwnPropertySymbols(object);
|
23 | if (enumerableOnly) symbols = symbols.filter(function (sym) {
|
24 | return Object.getOwnPropertyDescriptor(object, sym).enumerable;
|
25 | });
|
26 | keys.push.apply(keys, symbols);
|
27 | }
|
28 |
|
29 | return keys;
|
30 | }
|
31 |
|
32 | function _objectSpread2(target) {
|
33 | for (var i = 1; i < arguments.length; i++) {
|
34 | var source = arguments[i] != null ? arguments[i] : {};
|
35 |
|
36 | if (i % 2) {
|
37 | ownKeys(Object(source), true).forEach(function (key) {
|
38 | _defineProperty(target, key, source[key]);
|
39 | });
|
40 | } else if (Object.getOwnPropertyDescriptors) {
|
41 | Object.defineProperties(target, Object.getOwnPropertyDescriptors(source));
|
42 | } else {
|
43 | ownKeys(Object(source)).forEach(function (key) {
|
44 | Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key));
|
45 | });
|
46 | }
|
47 | }
|
48 |
|
49 | return target;
|
50 | }
|
51 |
|
52 | const whitespaceRegex = /^\s+$/;
|
53 | const nonWordRegex = /^[`~!@#$%^&*()\-=_+{}[\]\|\\;':",./<>?]+$/;
|
54 |
|
55 | const defaultOptions = {
|
56 | keySelector: s => s,
|
57 | threshold: .6,
|
58 | ignoreCase: true,
|
59 | ignoreSymbols: true,
|
60 | normalizeWhitespace: true,
|
61 | returnMatchData: false,
|
62 | useDamerau: true,
|
63 | useSellers: true
|
64 | };
|
65 |
|
66 | const noop = () => {};
|
67 |
|
68 | const arrayWrap = item => item instanceof Array ? item : [item];
|
69 |
|
70 |
|
71 | function normalize(string, options) {
|
72 | const lower = options.ignoreCase ? string.toLocaleLowerCase() : string;
|
73 |
|
74 | const normal = [];
|
75 | const map = [];
|
76 | let lastWasWhitespace = true;
|
77 | let length = 0;
|
78 |
|
79 | for (const grapheme of split(lower)) {
|
80 | whitespaceRegex.lastIndex = 0;
|
81 | nonWordRegex.lastIndex = 0;
|
82 |
|
83 | if (options.normalizeWhitespace && whitespaceRegex.test(grapheme)) {
|
84 | if (!lastWasWhitespace) {
|
85 | normal.push(" ");
|
86 | map.push(length);
|
87 | lastWasWhitespace = true;
|
88 | }
|
89 | } else if (!(options.ignoreSymbols && nonWordRegex.test(grapheme))) {
|
90 | normal.push(grapheme.normalize());
|
91 | map.push(length);
|
92 | lastWasWhitespace = false;
|
93 | }
|
94 |
|
95 | length += grapheme.length;
|
96 | }
|
97 |
|
98 |
|
99 | map.push(string.length);
|
100 |
|
101 | while (normal[normal.length - 1] === " ") {
|
102 | normal.pop();
|
103 | map.pop();
|
104 | }
|
105 |
|
106 | return {
|
107 | original: string,
|
108 | normal,
|
109 | map
|
110 | };
|
111 | }
|
112 |
|
113 |
|
114 | function denormalizeMatchPosition(match, map) {
|
115 | return {
|
116 | index: map[match.start],
|
117 | length: map[match.end + 1] - map[match.start]
|
118 | };
|
119 | }
|
120 |
|
121 |
|
122 | function walkBack(rows, scoreIndex) {
|
123 | if (scoreIndex === 0) {
|
124 | return {
|
125 | index: 0,
|
126 | length: 0
|
127 | };
|
128 | }
|
129 |
|
130 | let start = scoreIndex;
|
131 |
|
132 | for (let i = rows.length - 2; i > 0 && start > 1; i--) {
|
133 | const row = rows[i];
|
134 | start = row[start] < row[start - 1] ? start : start - 1;
|
135 | }
|
136 |
|
137 | return {
|
138 | start: start - 1,
|
139 | end: scoreIndex - 1
|
140 | };
|
141 | }
|
142 |
|
143 | const levUpdateScore = () => true;
|
144 |
|
145 | const sellersUpdateScore = (cur, min) => cur < min;
|
146 |
|
147 | function getLevScore(rows, length) {
|
148 | const lastRow = rows[rows.length - 1];
|
149 | const lastCell = lastRow[length - 1];
|
150 | const scoreLength = Math.max(rows.length, length);
|
151 | return {
|
152 | score: 1 - lastCell / (scoreLength - 1),
|
153 | scoreIndex: length - 1
|
154 | };
|
155 | }
|
156 |
|
157 | function getSellersScore(rows, length) {
|
158 |
|
159 | if (rows.length === 1) {
|
160 | return {
|
161 | score: 1,
|
162 | scoreIndex: 0
|
163 | };
|
164 | }
|
165 |
|
166 | const lastRow = rows[rows.length - 1];
|
167 | let minValue = lastRow[0];
|
168 | let minIndex = 0;
|
169 |
|
170 | for (let i = 1; i < length; i++) {
|
171 | const val = lastRow[i];
|
172 |
|
173 | if (val < minValue) {
|
174 | minValue = val;
|
175 | minIndex = i;
|
176 | }
|
177 | }
|
178 |
|
179 | return {
|
180 | score: 1 - minValue / (rows.length - 1),
|
181 | scoreIndex: minIndex
|
182 | };
|
183 | }
|
184 |
|
185 | function initLevRows(rowCount, columnCount) {
|
186 | const rows = new Array(rowCount);
|
187 |
|
188 | for (let i = 0; i < rowCount; i++) {
|
189 | rows[i] = new Array(columnCount);
|
190 | rows[i][0] = i;
|
191 | }
|
192 |
|
193 | for (let i = 0; i < columnCount; i++) {
|
194 | rows[0][i] = i;
|
195 | }
|
196 |
|
197 | return rows;
|
198 | }
|
199 |
|
200 | function initSellersRows(rowCount, columnCount) {
|
201 | const rows = new Array(rowCount);
|
202 | rows[0] = new Array(columnCount).fill(0);
|
203 |
|
204 | for (let i = 1; i < rowCount; i++) {
|
205 | rows[i] = new Array(columnCount);
|
206 | rows[i][0] = i;
|
207 | }
|
208 |
|
209 | return rows;
|
210 | }
|
211 |
|
212 |
|
213 | function levCore(term, candidate, rows, i, j) {
|
214 | const rowA = rows[i];
|
215 | const rowB = rows[i + 1];
|
216 | const cost = term[i] === candidate[j] ? 0 : 1;
|
217 | let m;
|
218 | let min = rowB[j] + 1;
|
219 |
|
220 | if ((m = rowA[j + 1] + 1) < min) min = m;
|
221 |
|
222 | if ((m = rowA[j] + cost) < min) min = m;
|
223 |
|
224 | rowB[j + 1] = min;
|
225 | }
|
226 |
|
227 |
|
228 |
|
229 | function levenshtein(term, candidate, rows, j) {
|
230 | for (let i = 0; i < term.length; i++) {
|
231 | levCore(term, candidate, rows, i, j);
|
232 | }
|
233 | }
|
234 |
|
235 |
|
236 |
|
237 |
|
238 | function damerauLevenshtein(term, candidate, rows, j) {
|
239 |
|
240 |
|
241 | if (j === 0) {
|
242 | levenshtein(term, candidate, rows, j);
|
243 | return;
|
244 | }
|
245 |
|
246 |
|
247 |
|
248 | if (term.length > 0) {
|
249 | levCore(term, candidate, rows, 0, j);
|
250 | }
|
251 |
|
252 | for (let i = 1; i < term.length; i++) {
|
253 | const rowA = rows[i - 1];
|
254 | const rowB = rows[i];
|
255 | const rowC = rows[i + 1];
|
256 | const cost = term[i] === candidate[j] ? 0 : 1;
|
257 | let m;
|
258 |
|
259 | let min = rowC[j] + 1;
|
260 |
|
261 | if ((m = rowB[j + 1] + 1) < min) min = m;
|
262 |
|
263 | if ((m = rowB[j] + cost) < min) min = m;
|
264 |
|
265 | if (term[i] === candidate[j - 1] && term[i - 1] === candidate[j] && (m = rowA[j - 1] + cost) < min) min = m;
|
266 | rowC[j + 1] = min;
|
267 | }
|
268 | }
|
269 |
|
270 |
|
271 |
|
272 | function trieInsert(trie, string, item) {
|
273 | let walker = trie;
|
274 |
|
275 | for (let i = 0; i < string.length; i++) {
|
276 | const char = string[i];
|
277 |
|
278 | if (walker.children[char] == null) {
|
279 | walker.children[char] = {
|
280 | children: {},
|
281 | candidates: [],
|
282 | depth: 0
|
283 | };
|
284 | }
|
285 |
|
286 |
|
287 | walker.depth = Math.max(walker.depth, string.length - i);
|
288 |
|
289 | walker = walker.children[char];
|
290 | }
|
291 |
|
292 | walker.candidates.push(item);
|
293 | }
|
294 |
|
295 |
|
296 |
|
297 |
|
298 | function createSearchTrie(trie, index, items, options) {
|
299 | for (const item of items) {
|
300 | const candidates = arrayWrap(options.keySelector(item)).map((key, keyIndex) => ({
|
301 | index,
|
302 | keyIndex,
|
303 | item,
|
304 | normalized: normalize(key, options)
|
305 | }));
|
306 | index++;
|
307 |
|
308 | for (const candidate of candidates) {
|
309 | trieInsert(trie, candidate.normalized.normal, candidate);
|
310 | }
|
311 | }
|
312 | }
|
313 |
|
314 |
|
315 | function compareItems(a, b) {
|
316 | const scoreDiff = b.score - a.score;
|
317 |
|
318 | if (scoreDiff !== 0) {
|
319 | return scoreDiff;
|
320 | }
|
321 |
|
322 | const keyIndexDiff = a.keyIndex - b.keyIndex;
|
323 |
|
324 | if (keyIndexDiff !== 0) {
|
325 | return keyIndexDiff;
|
326 | }
|
327 |
|
328 | const lengthDiff = a.lengthDiff - b.lengthDiff;
|
329 |
|
330 | if (lengthDiff !== 0) {
|
331 | return lengthDiff;
|
332 | }
|
333 |
|
334 | return a.index - b.index;
|
335 | }
|
336 |
|
337 |
|
338 | function addResult(results, resultMap, candidate, score, match, lengthDiff) {
|
339 | const scoredItem = {
|
340 | item: candidate.item,
|
341 | normalized: candidate.normalized,
|
342 | score,
|
343 | match,
|
344 | index: candidate.index,
|
345 | keyIndex: candidate.keyIndex,
|
346 | lengthDiff
|
347 | };
|
348 |
|
349 | if (resultMap[candidate.index] == null) {
|
350 | resultMap[candidate.index] = results.length;
|
351 | results.push(scoredItem);
|
352 | } else if (compareItems(scoredItem, results[resultMap[candidate.index]]) < 0) {
|
353 | results[resultMap[candidate.index]] = scoredItem;
|
354 | }
|
355 | }
|
356 |
|
357 | const getLevLength = Math.max;
|
358 |
|
359 | const getSellersLength = termLength => termLength;
|
360 |
|
361 |
|
362 | function levShouldContinue(node, pos, term, threshold, sValue) {
|
363 |
|
364 | const p1 = pos + sValue;
|
365 |
|
366 | const p2 = Math.min(term.length, pos + node.depth + 1);
|
367 |
|
368 |
|
369 | const length = Math.ceil((p1 + p2) / 2);
|
370 | const bestPossibleValue = length - p2;
|
371 | return 1 - bestPossibleValue / length >= threshold;
|
372 | }
|
373 |
|
374 | function sellersShouldContinue(node, _, term, threshold, sValue, lastValue) {
|
375 | const bestPossibleValue = Math.min(sValue, lastValue - (node.depth + 1));
|
376 | return 1 - bestPossibleValue / term.length >= threshold;
|
377 | }
|
378 |
|
379 |
|
380 | function searchRecurse(node, acc, len, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue) {
|
381 |
|
382 | scoreMethods.score(term, acc, rows, len - 1);
|
383 |
|
384 | const lastIndex = len;
|
385 | const lastValue = rows[rows.length - 1][lastIndex];
|
386 |
|
387 | if (scoreMethods.shouldUpdateScore(lastValue, sValue)) {
|
388 | sIndex = lastIndex;
|
389 | sValue = lastValue;
|
390 | }
|
391 |
|
392 |
|
393 | if (node.candidates.length > 0) {
|
394 | const length = scoreMethods.getLength(term.length, len);
|
395 | const score = 1 - sValue / length;
|
396 |
|
397 | if (score >= options.threshold) {
|
398 | const match = scoreMethods.walkBack(rows, sIndex);
|
399 | const lengthDiff = Math.abs(len - term.length);
|
400 |
|
401 | for (const candidate of node.candidates) {
|
402 | addResult(results, resultMap, candidate, score, match, lengthDiff);
|
403 | }
|
404 | }
|
405 | }
|
406 |
|
407 |
|
408 | for (const key in node.children) {
|
409 | const child = node.children[key];
|
410 |
|
411 | if (scoreMethods.shouldContinue(child, len, term, options.threshold, sValue, lastValue)) {
|
412 | acc[len] = key;
|
413 | searchRecurse(child, acc, len + 1, term, scoreMethods, rows, results, resultMap, options, sIndex, sValue);
|
414 | }
|
415 | }
|
416 | }
|
417 |
|
418 |
|
419 |
|
420 | function searchCore(term, trie, options) {
|
421 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
422 | const scoreMethods = {
|
423 | score: options.useDamerau ? damerauLevenshtein : levenshtein,
|
424 | getLength: options.useSellers ? getSellersLength : getLevLength,
|
425 | shouldUpdateScore: options.useSellers ? sellersUpdateScore : levUpdateScore,
|
426 | shouldContinue: options.useSellers ? sellersShouldContinue : levShouldContinue,
|
427 | walkBack: options.returnMatchData && options.useSellers ? walkBack : noop
|
428 | };
|
429 |
|
430 | const resultMap = {};
|
431 | const results = [];
|
432 | const rows = initMethod(term.length + 1, trie.depth + 1);
|
433 |
|
434 | if (options.threshold <= 0 || term.length === 0) {
|
435 | for (const candidate of trie.candidates) {
|
436 | addResult(results, resultMap, candidate, 0, {
|
437 | index: 0,
|
438 | length: 0
|
439 | }, term.length);
|
440 | }
|
441 | }
|
442 |
|
443 | for (const key in trie.children) {
|
444 | const value = trie.children[key];
|
445 | const acc = new Array(trie.depth);
|
446 | acc[0] = key;
|
447 | searchRecurse(value, acc, 1, term, scoreMethods, rows, results, resultMap, options, 0, term.length);
|
448 | }
|
449 |
|
450 | const sorted = results.sort(compareItems);
|
451 |
|
452 | if (options.returnMatchData) {
|
453 | const denormalize = options.useSellers ? denormalizeMatchPosition : noop;
|
454 | return sorted.map(candidate => ({
|
455 | item: candidate.item,
|
456 | original: candidate.normalized.original,
|
457 | key: candidate.normalized.normal.join(""),
|
458 | score: candidate.score,
|
459 | match: denormalize(candidate.match, candidate.normalized.map)
|
460 | }));
|
461 | }
|
462 |
|
463 | return sorted.map(candidate => candidate.item);
|
464 | }
|
465 |
|
466 |
|
467 | function fuzzy(term, candidate, options) {
|
468 | options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
|
469 | const initMethod = options.useSellers ? initSellersRows : initLevRows;
|
470 | const scoreMethod = options.useDamerau ? damerauLevenshtein : levenshtein;
|
471 | const getScore = options.useSellers ? getSellersScore : getLevScore;
|
472 | term = normalize(term, options).normal;
|
473 | const normalized = normalize(candidate, options);
|
474 | const rows = initMethod(term.length + 1, normalized.normal.length + 1);
|
475 |
|
476 | for (let j = 0; j < normalized.normal.length; j++) {
|
477 | scoreMethod(term, normalized.normal, rows, j);
|
478 | }
|
479 |
|
480 | const scoreResult = getScore(rows, normalized.normal.length + 1);
|
481 | return options.returnMatchData ? {
|
482 | item: candidate,
|
483 | original: normalized.original,
|
484 | key: normalized.normal.join(""),
|
485 | score: scoreResult.score,
|
486 | match: options.useSellers ? denormalizeMatchPosition(walkBack(rows, scoreResult.scoreIndex), normalized.map) : noop()
|
487 | } : scoreResult.score;
|
488 | }
|
489 |
|
490 | function search(term, candidates, options) {
|
491 | options = _objectSpread2(_objectSpread2({}, defaultOptions), options);
|
492 | const trie = {
|
493 | children: {},
|
494 | candidates: [],
|
495 | depth: 0
|
496 | };
|
497 | createSearchTrie(trie, 0, candidates, options);
|
498 | return searchCore(normalize(term, options).normal, trie, options);
|
499 | }
|
500 |
|
501 |
|
502 | class Searcher {
|
503 | constructor(candidates, options) {
|
504 | this.options = Object.assign({}, defaultOptions, options);
|
505 | this.trie = {
|
506 | children: {},
|
507 | candidates: [],
|
508 | depth: 0
|
509 | };
|
510 | createSearchTrie(this.trie, 0, candidates, this.options);
|
511 | this.count = candidates.length;
|
512 | }
|
513 |
|
514 | add(...candidates) {
|
515 | createSearchTrie(this.trie, this.count, candidates, this.options);
|
516 | this.count += candidates.length;
|
517 | }
|
518 |
|
519 | search(term, options) {
|
520 | options = Object.assign({}, this.options, options);
|
521 | return searchCore(normalize(term, this.options).normal, this.trie, options);
|
522 | }
|
523 |
|
524 | }
|
525 |
|
526 | export { Searcher, fuzzy, search };
|