1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 | "use strict";
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 | const similarity = (a, b) => {
|
34 | const l = Math.min(a.length, b.length);
|
35 | let dist = 0;
|
36 | for (let i = 0; i < l; i++) {
|
37 | const ca = a.charCodeAt(i);
|
38 | const cb = b.charCodeAt(i);
|
39 | dist += Math.max(0, 10 - Math.abs(ca - cb));
|
40 | }
|
41 | return dist;
|
42 | };
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 | const getName = (a, b, usedNames) => {
|
51 | const l = Math.min(a.length, b.length);
|
52 | let i = 0;
|
53 | while (i < l) {
|
54 | if (a.charCodeAt(i) !== b.charCodeAt(i)) {
|
55 | i++;
|
56 | break;
|
57 | }
|
58 | i++;
|
59 | }
|
60 | while (i < l) {
|
61 | const name = a.slice(0, i);
|
62 | const lowerName = name.toLowerCase();
|
63 | if (!usedNames.has(lowerName)) {
|
64 | usedNames.add(lowerName);
|
65 | return name;
|
66 | }
|
67 | i++;
|
68 | }
|
69 |
|
70 |
|
71 | return a;
|
72 | };
|
73 |
|
74 |
|
75 |
|
76 |
|
77 |
|
78 |
|
79 | const addSizeTo = (total, size) => {
|
80 | for (const key of Object.keys(size)) {
|
81 | total[key] = (total[key] || 0) + size[key];
|
82 | }
|
83 | };
|
84 |
|
85 |
|
86 |
|
87 |
|
88 |
|
89 |
|
90 | const subtractSizeFrom = (total, size) => {
|
91 | for (const key of Object.keys(size)) {
|
92 | total[key] -= size[key];
|
93 | }
|
94 | };
|
95 |
|
96 |
|
97 |
|
98 |
|
99 |
|
100 | const sumSize = nodes => {
|
101 | const sum = Object.create(null);
|
102 | for (const node of nodes) {
|
103 | addSizeTo(sum, node.size);
|
104 | }
|
105 | return sum;
|
106 | };
|
107 |
|
108 | const isTooBig = (size, maxSize) => {
|
109 | for (const key of Object.keys(size)) {
|
110 | const s = size[key];
|
111 | if (s === 0) continue;
|
112 | const maxSizeValue = maxSize[key];
|
113 | if (typeof maxSizeValue === "number") {
|
114 | if (s > maxSizeValue) return true;
|
115 | }
|
116 | }
|
117 | return false;
|
118 | };
|
119 |
|
120 | const isTooSmall = (size, minSize) => {
|
121 | for (const key of Object.keys(size)) {
|
122 | const s = size[key];
|
123 | if (s === 0) continue;
|
124 | const minSizeValue = minSize[key];
|
125 | if (typeof minSizeValue === "number") {
|
126 | if (s < minSizeValue) return true;
|
127 | }
|
128 | }
|
129 | return false;
|
130 | };
|
131 |
|
132 | const getTooSmallTypes = (size, minSize) => {
|
133 | const types = new Set();
|
134 | for (const key of Object.keys(size)) {
|
135 | const s = size[key];
|
136 | if (s === 0) continue;
|
137 | const minSizeValue = minSize[key];
|
138 | if (typeof minSizeValue === "number") {
|
139 | if (s < minSizeValue) types.add(key);
|
140 | }
|
141 | }
|
142 | return types;
|
143 | };
|
144 |
|
145 | const getNumberOfMatchingSizeTypes = (size, types) => {
|
146 | let i = 0;
|
147 | for (const key of Object.keys(size)) {
|
148 | if (size[key] !== 0 && types.has(key)) i++;
|
149 | }
|
150 | return i;
|
151 | };
|
152 |
|
153 | const selectiveSizeSum = (size, types) => {
|
154 | let sum = 0;
|
155 | for (const key of Object.keys(size)) {
|
156 | if (size[key] !== 0 && types.has(key)) sum += size[key];
|
157 | }
|
158 | return sum;
|
159 | };
|
160 |
|
161 |
|
162 |
|
163 |
|
164 | class Node {
|
165 | |
166 |
|
167 |
|
168 |
|
169 |
|
170 | constructor(item, key, size) {
|
171 | this.item = item;
|
172 | this.key = key;
|
173 | this.size = size;
|
174 | }
|
175 | }
|
176 |
|
177 |
|
178 |
|
179 |
|
180 | class Group {
|
181 | |
182 |
|
183 |
|
184 |
|
185 |
|
186 | constructor(nodes, similarities, size) {
|
187 | this.nodes = nodes;
|
188 | this.similarities = similarities;
|
189 | this.size = size || sumSize(nodes);
|
190 |
|
191 | this.key = undefined;
|
192 | }
|
193 |
|
194 | |
195 |
|
196 |
|
197 |
|
198 | popNodes(filter) {
|
199 | const newNodes = [];
|
200 | const newSimilarities = [];
|
201 | const resultNodes = [];
|
202 | let lastNode;
|
203 | for (let i = 0; i < this.nodes.length; i++) {
|
204 | const node = this.nodes[i];
|
205 | if (filter(node)) {
|
206 | resultNodes.push(node);
|
207 | } else {
|
208 | if (newNodes.length > 0) {
|
209 | newSimilarities.push(
|
210 | lastNode === this.nodes[i - 1]
|
211 | ? this.similarities[i - 1]
|
212 | : similarity(lastNode.key, node.key)
|
213 | );
|
214 | }
|
215 | newNodes.push(node);
|
216 | lastNode = node;
|
217 | }
|
218 | }
|
219 | if (resultNodes.length === this.nodes.length) return undefined;
|
220 | this.nodes = newNodes;
|
221 | this.similarities = newSimilarities;
|
222 | this.size = sumSize(newNodes);
|
223 | return resultNodes;
|
224 | }
|
225 | }
|
226 |
|
227 |
|
228 |
|
229 |
|
230 |
|
231 | const getSimilarities = nodes => {
|
232 |
|
233 |
|
234 | const similarities = [];
|
235 | let last = undefined;
|
236 | for (const node of nodes) {
|
237 | if (last !== undefined) {
|
238 | similarities.push(similarity(last.key, node.key));
|
239 | }
|
240 | last = node;
|
241 | }
|
242 | return similarities;
|
243 | };
|
244 |
|
245 |
|
246 |
|
247 |
|
248 |
|
249 |
|
250 |
|
251 |
|
252 |
|
253 |
|
254 |
|
255 |
|
256 |
|
257 |
|
258 |
|
259 |
|
260 |
|
261 |
|
262 |
|
263 |
|
264 |
|
265 |
|
266 |
|
267 |
|
268 | module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
|
269 |
|
270 | const result = [];
|
271 |
|
272 | const nodes = Array.from(
|
273 | items,
|
274 | item => new Node(item, getKey(item), getSize(item))
|
275 | );
|
276 |
|
277 |
|
278 | const initialNodes = [];
|
279 |
|
280 |
|
281 | nodes.sort((a, b) => {
|
282 | if (a.key < b.key) return -1;
|
283 | if (a.key > b.key) return 1;
|
284 | return 0;
|
285 | });
|
286 |
|
287 |
|
288 |
|
289 | for (const node of nodes) {
|
290 | if (isTooBig(node.size, maxSize) && !isTooSmall(node.size, minSize)) {
|
291 | result.push(new Group([node], []));
|
292 | } else {
|
293 | initialNodes.push(node);
|
294 | }
|
295 | }
|
296 |
|
297 | if (initialNodes.length > 0) {
|
298 | const initialGroup = new Group(initialNodes, getSimilarities(initialNodes));
|
299 |
|
300 | const removeProblematicNodes = (group, consideredSize = group.size) => {
|
301 | const problemTypes = getTooSmallTypes(consideredSize, minSize);
|
302 | if (problemTypes.size > 0) {
|
303 |
|
304 |
|
305 | const problemNodes = group.popNodes(
|
306 | n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
|
307 | );
|
308 | if (problemNodes === undefined) return false;
|
309 |
|
310 | const possibleResultGroups = result.filter(
|
311 | n => getNumberOfMatchingSizeTypes(n.size, problemTypes) > 0
|
312 | );
|
313 | if (possibleResultGroups.length > 0) {
|
314 | const bestGroup = possibleResultGroups.reduce((min, group) => {
|
315 | const minMatches = getNumberOfMatchingSizeTypes(min, problemTypes);
|
316 | const groupMatches = getNumberOfMatchingSizeTypes(
|
317 | group,
|
318 | problemTypes
|
319 | );
|
320 | if (minMatches !== groupMatches)
|
321 | return minMatches < groupMatches ? group : min;
|
322 | if (
|
323 | selectiveSizeSum(min.size, problemTypes) >
|
324 | selectiveSizeSum(group.size, problemTypes)
|
325 | )
|
326 | return group;
|
327 | return min;
|
328 | });
|
329 | for (const node of problemNodes) bestGroup.nodes.push(node);
|
330 | bestGroup.nodes.sort((a, b) => {
|
331 | if (a.key < b.key) return -1;
|
332 | if (a.key > b.key) return 1;
|
333 | return 0;
|
334 | });
|
335 | } else {
|
336 |
|
337 |
|
338 | result.push(new Group(problemNodes, null));
|
339 | }
|
340 | return true;
|
341 | } else {
|
342 | return false;
|
343 | }
|
344 | };
|
345 |
|
346 | if (initialGroup.nodes.length > 0) {
|
347 | const queue = [initialGroup];
|
348 |
|
349 | while (queue.length) {
|
350 | const group = queue.pop();
|
351 |
|
352 | if (!isTooBig(group.size, maxSize)) {
|
353 | result.push(group);
|
354 | continue;
|
355 | }
|
356 |
|
357 |
|
358 | if (removeProblematicNodes(group)) {
|
359 |
|
360 | queue.push(group);
|
361 | continue;
|
362 | }
|
363 |
|
364 |
|
365 |
|
366 |
|
367 | let left = 1;
|
368 | let leftSize = Object.create(null);
|
369 | addSizeTo(leftSize, group.nodes[0].size);
|
370 | while (left < group.nodes.length && isTooSmall(leftSize, minSize)) {
|
371 | addSizeTo(leftSize, group.nodes[left].size);
|
372 | left++;
|
373 | }
|
374 | let right = group.nodes.length - 2;
|
375 | let rightSize = Object.create(null);
|
376 | addSizeTo(rightSize, group.nodes[group.nodes.length - 1].size);
|
377 | while (right >= 0 && isTooSmall(rightSize, minSize)) {
|
378 | addSizeTo(rightSize, group.nodes[right].size);
|
379 | right--;
|
380 | }
|
381 |
|
382 |
|
383 |
|
384 |
|
385 |
|
386 |
|
387 |
|
388 |
|
389 |
|
390 |
|
391 |
|
392 | if (left - 1 > right) {
|
393 |
|
394 | let prevSize;
|
395 | if (right < group.nodes.length - left) {
|
396 | subtractSizeFrom(rightSize, group.nodes[right + 1].size);
|
397 | prevSize = rightSize;
|
398 | } else {
|
399 | subtractSizeFrom(leftSize, group.nodes[left - 1].size);
|
400 | prevSize = leftSize;
|
401 | }
|
402 | if (removeProblematicNodes(group, prevSize)) {
|
403 |
|
404 | queue.push(group);
|
405 | continue;
|
406 | }
|
407 |
|
408 |
|
409 |
|
410 |
|
411 | result.push(group);
|
412 | continue;
|
413 | }
|
414 | if (left <= right) {
|
415 |
|
416 |
|
417 |
|
418 |
|
419 |
|
420 | let best = -1;
|
421 | let bestSimilarity = Infinity;
|
422 | let pos = left;
|
423 | let rightSize = sumSize(group.nodes.slice(pos));
|
424 |
|
425 |
|
426 |
|
427 |
|
428 |
|
429 |
|
430 | while (pos <= right + 1) {
|
431 | const similarity = group.similarities[pos - 1];
|
432 | if (
|
433 | similarity < bestSimilarity &&
|
434 | !isTooSmall(leftSize, minSize) &&
|
435 | !isTooSmall(rightSize, minSize)
|
436 | ) {
|
437 | best = pos;
|
438 | bestSimilarity = similarity;
|
439 | }
|
440 | addSizeTo(leftSize, group.nodes[pos].size);
|
441 | subtractSizeFrom(rightSize, group.nodes[pos].size);
|
442 | pos++;
|
443 | }
|
444 | if (best < 0) {
|
445 |
|
446 |
|
447 |
|
448 | result.push(group);
|
449 | continue;
|
450 | }
|
451 | left = best;
|
452 | right = best - 1;
|
453 | }
|
454 |
|
455 |
|
456 |
|
457 | const rightNodes = [group.nodes[right + 1]];
|
458 |
|
459 | const rightSimilarities = [];
|
460 | for (let i = right + 2; i < group.nodes.length; i++) {
|
461 | rightSimilarities.push(group.similarities[i - 1]);
|
462 | rightNodes.push(group.nodes[i]);
|
463 | }
|
464 | queue.push(new Group(rightNodes, rightSimilarities));
|
465 |
|
466 | const leftNodes = [group.nodes[0]];
|
467 |
|
468 | const leftSimilarities = [];
|
469 | for (let i = 1; i < left; i++) {
|
470 | leftSimilarities.push(group.similarities[i - 1]);
|
471 | leftNodes.push(group.nodes[i]);
|
472 | }
|
473 | queue.push(new Group(leftNodes, leftSimilarities));
|
474 | }
|
475 | }
|
476 | }
|
477 |
|
478 |
|
479 | result.sort((a, b) => {
|
480 | if (a.nodes[0].key < b.nodes[0].key) return -1;
|
481 | if (a.nodes[0].key > b.nodes[0].key) return 1;
|
482 | return 0;
|
483 | });
|
484 |
|
485 |
|
486 | const usedNames = new Set();
|
487 | for (let i = 0; i < result.length; i++) {
|
488 | const group = result[i];
|
489 | if (group.nodes.length === 1) {
|
490 | group.key = group.nodes[0].key;
|
491 | } else {
|
492 | const first = group.nodes[0];
|
493 | const last = group.nodes[group.nodes.length - 1];
|
494 | const name = getName(first.key, last.key, usedNames);
|
495 | group.key = name;
|
496 | }
|
497 | }
|
498 |
|
499 |
|
500 | return result.map(group => {
|
501 |
|
502 | return {
|
503 | key: group.key,
|
504 | items: group.nodes.map(node => node.item),
|
505 | size: group.size
|
506 | };
|
507 | });
|
508 | };
|