1 | // Copyright (c) Jupyter Development Team.
|
2 | // Distributed under the terms of the Modified BSD License.
|
3 | /*-----------------------------------------------------------------------------
|
4 | | Copyright (c) 2014-2017, PhosphorJS Contributors
|
5 | |
|
6 | | Distributed under the terms of the BSD 3-Clause License.
|
7 | |
|
8 | | The full license is in the file LICENSE, distributed with this software.
|
9 | |----------------------------------------------------------------------------*/
|
10 |
|
11 | /**
|
12 | * The namespace for string-specific algorithms.
|
13 | */
|
14 | export namespace StringExt {
|
15 | /**
|
16 | * Find the indices of characters in a source text.
|
17 | *
|
18 | * @param source - The source text which should be searched.
|
19 | *
|
20 | * @param query - The characters to locate in the source text.
|
21 | *
|
22 | * @param start - The index to start the search.
|
23 | *
|
24 | * @returns The matched indices, or `null` if there is no match.
|
25 | *
|
26 | * #### Complexity
|
27 | * Linear on `sourceText`.
|
28 | *
|
29 | * #### Notes
|
30 | * In order for there to be a match, all of the characters in `query`
|
31 | * **must** appear in `source` in the order given by `query`.
|
32 | *
|
33 | * Characters are matched using strict `===` equality.
|
34 | */
|
35 | export function findIndices(
|
36 | source: string,
|
37 | query: string,
|
38 | start = 0
|
39 | ): number[] | null {
|
40 | let indices = new Array<number>(query.length);
|
41 | for (let i = 0, j = start, n = query.length; i < n; ++i, ++j) {
|
42 | j = source.indexOf(query[i], j);
|
43 | if (j === -1) {
|
44 | return null;
|
45 | }
|
46 | indices[i] = j;
|
47 | }
|
48 | return indices;
|
49 | }
|
50 |
|
51 | /**
|
52 | * The result of a string match function.
|
53 | */
|
54 | export interface IMatchResult {
|
55 | /**
|
56 | * A score which indicates the strength of the match.
|
57 | *
|
58 | * The documentation of a given match function should specify
|
59 | * whether a lower or higher score is a stronger match.
|
60 | */
|
61 | score: number;
|
62 |
|
63 | /**
|
64 | * The indices of the matched characters in the source text.
|
65 | *
|
66 | * The indices will appear in increasing order.
|
67 | */
|
68 | indices: number[];
|
69 | }
|
70 |
|
71 | /**
|
72 | * A string matcher which uses a sum-of-squares algorithm.
|
73 | *
|
74 | * @param source - The source text which should be searched.
|
75 | *
|
76 | * @param query - The characters to locate in the source text.
|
77 | *
|
78 | * @param start - The index to start the search.
|
79 | *
|
80 | * @returns The match result, or `null` if there is no match.
|
81 | * A lower `score` represents a stronger match.
|
82 | *
|
83 | * #### Complexity
|
84 | * Linear on `sourceText`.
|
85 | *
|
86 | * #### Notes
|
87 | * This scoring algorithm uses a sum-of-squares approach to determine
|
88 | * the score. In order for there to be a match, all of the characters
|
89 | * in `query` **must** appear in `source` in order. The index of each
|
90 | * matching character is squared and added to the score. This means
|
91 | * that early and consecutive character matches are preferred, while
|
92 | * late matches are heavily penalized.
|
93 | */
|
94 | export function matchSumOfSquares(
|
95 | source: string,
|
96 | query: string,
|
97 | start = 0
|
98 | ): IMatchResult | null {
|
99 | let indices = findIndices(source, query, start);
|
100 | if (!indices) {
|
101 | return null;
|
102 | }
|
103 | let score = 0;
|
104 | for (let i = 0, n = indices.length; i < n; ++i) {
|
105 | let j = indices[i] - start;
|
106 | score += j * j;
|
107 | }
|
108 | return { score, indices };
|
109 | }
|
110 |
|
111 | /**
|
112 | * A string matcher which uses a sum-of-deltas algorithm.
|
113 | *
|
114 | * @param source - The source text which should be searched.
|
115 | *
|
116 | * @param query - The characters to locate in the source text.
|
117 | *
|
118 | * @param start - The index to start the search.
|
119 | *
|
120 | * @returns The match result, or `null` if there is no match.
|
121 | * A lower `score` represents a stronger match.
|
122 | *
|
123 | * #### Complexity
|
124 | * Linear on `sourceText`.
|
125 | *
|
126 | * #### Notes
|
127 | * This scoring algorithm uses a sum-of-deltas approach to determine
|
128 | * the score. In order for there to be a match, all of the characters
|
129 | * in `query` **must** appear in `source` in order. The delta between
|
130 | * the indices are summed to create the score. This means that groups
|
131 | * of matched characters are preferred, while fragmented matches are
|
132 | * penalized.
|
133 | */
|
134 | export function matchSumOfDeltas(
|
135 | source: string,
|
136 | query: string,
|
137 | start = 0
|
138 | ): IMatchResult | null {
|
139 | let indices = findIndices(source, query, start);
|
140 | if (!indices) {
|
141 | return null;
|
142 | }
|
143 | let score = 0;
|
144 | let last = start - 1;
|
145 | for (let i = 0, n = indices.length; i < n; ++i) {
|
146 | let j = indices[i];
|
147 | score += j - last - 1;
|
148 | last = j;
|
149 | }
|
150 | return { score, indices };
|
151 | }
|
152 |
|
153 | /**
|
154 | * Highlight the matched characters of a source text.
|
155 | *
|
156 | * @param source - The text which should be highlighted.
|
157 | *
|
158 | * @param indices - The indices of the matched characters. They must
|
159 | * appear in increasing order and must be in bounds of the source.
|
160 | *
|
161 | * @param fn - The function to apply to the matched chunks.
|
162 | *
|
163 | * @returns An array of unmatched and highlighted chunks.
|
164 | */
|
165 | export function highlight<T>(
|
166 | source: string,
|
167 | indices: ReadonlyArray<number>,
|
168 | fn: (chunk: string) => T
|
169 | ): Array<string | T> {
|
170 | // Set up the result array.
|
171 | let result: Array<string | T> = [];
|
172 |
|
173 | // Set up the counter variables.
|
174 | let k = 0;
|
175 | let last = 0;
|
176 | let n = indices.length;
|
177 |
|
178 | // Iterator over each index.
|
179 | while (k < n) {
|
180 | // Set up the chunk indices.
|
181 | let i = indices[k];
|
182 | let j = indices[k];
|
183 |
|
184 | // Advance the right chunk index until it's non-contiguous.
|
185 | while (++k < n && indices[k] === j + 1) {
|
186 | j++;
|
187 | }
|
188 |
|
189 | // Extract the unmatched text.
|
190 | if (last < i) {
|
191 | result.push(source.slice(last, i));
|
192 | }
|
193 |
|
194 | // Extract and highlight the matched text.
|
195 | if (i < j + 1) {
|
196 | result.push(fn(source.slice(i, j + 1)));
|
197 | }
|
198 |
|
199 | // Update the last visited index.
|
200 | last = j + 1;
|
201 | }
|
202 |
|
203 | // Extract any remaining unmatched text.
|
204 | if (last < source.length) {
|
205 | result.push(source.slice(last));
|
206 | }
|
207 |
|
208 | // Return the highlighted result.
|
209 | return result;
|
210 | }
|
211 |
|
212 | /**
|
213 | * A 3-way string comparison function.
|
214 | *
|
215 | * @param a - The first string of interest.
|
216 | *
|
217 | * @param b - The second string of interest.
|
218 | *
|
219 | * @returns `-1` if `a < b`, else `1` if `a > b`, else `0`.
|
220 | */
|
221 | export function cmp(a: string, b: string): number {
|
222 | return a < b ? -1 : a > b ? 1 : 0;
|
223 | }
|
224 | }
|