1 | export as namespace lunr;
|
2 | export = lunr;
|
3 |
|
4 | /**
|
5 | * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright
|
6 | * Copyright (C) 2014 Oliver Nightingale
|
7 | * MIT Licensed
|
8 | * @license
|
9 | */
|
10 | declare namespace lunr {
|
11 | namespace Builder {
|
12 | /**
|
13 | * A plugin is a function that is called with the index builder as its context.
|
14 | * Plugins can be used to customise or extend the behaviour of the index
|
15 | * in some way. A plugin is just a function, that encapsulated the custom
|
16 | * behaviour that should be applied when building the index.
|
17 | *
|
18 | * The plugin function will be called with the index builder as its argument, additional
|
19 | * arguments can also be passed when calling use. The function will be called
|
20 | * with the index builder as its context.
|
21 | */
|
22 | type Plugin = (this: Builder, ...args: any[]) => void;
|
23 | }
|
24 |
|
25 | /**
|
26 | * lunr.Builder performs indexing on a set of documents and
|
27 | * returns instances of lunr.Index ready for querying.
|
28 | *
|
29 | * All configuration of the index is done via the builder, the
|
30 | * fields to index, the document reference, the text processing
|
31 | * pipeline and document scoring parameters are all set on the
|
32 | * builder before indexing.
|
33 | */
|
34 | class Builder {
|
35 | /**
|
36 | * Internal reference to the document reference field.
|
37 | */
|
38 | _ref: string;
|
39 |
|
40 | /**
|
41 | * Internal reference to the document fields to index.
|
42 | */
|
43 | _fields: string[];
|
44 |
|
45 | /**
|
46 | * The inverted index maps terms to document fields.
|
47 | */
|
48 | invertedIndex: object;
|
49 |
|
50 | /**
|
51 | * Keeps track of document term frequencies.
|
52 | */
|
53 | documentTermFrequencies: object;
|
54 |
|
55 | /**
|
56 | * Keeps track of the length of documents added to the index.
|
57 | */
|
58 | documentLengths: object;
|
59 |
|
60 | /**
|
61 | * Function for splitting strings into tokens for indexing.
|
62 | */
|
63 | tokenizer: typeof tokenizer;
|
64 |
|
65 | /**
|
66 | * The pipeline performs text processing on tokens before indexing.
|
67 | */
|
68 | pipeline: Pipeline;
|
69 |
|
70 | /**
|
71 | * A pipeline for processing search terms before querying the index.
|
72 | */
|
73 | searchPipeline: Pipeline;
|
74 |
|
75 | /**
|
76 | * Keeps track of the total number of documents indexed.
|
77 | */
|
78 | documentCount: number;
|
79 |
|
80 | /**
|
81 | * A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
|
82 | */
|
83 | _b: number;
|
84 |
|
85 | /**
|
86 | * A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
|
87 | */
|
88 | _k1: number;
|
89 |
|
90 | /**
|
91 | * A counter incremented for each unique term, used to identify a terms position in the vector space.
|
92 | */
|
93 | termIndex: number;
|
94 |
|
95 | /**
|
96 | * A list of metadata keys that have been whitelisted for entry in the index.
|
97 | */
|
98 | metadataWhitelist: string[];
|
99 |
|
100 | constructor();
|
101 |
|
102 | /**
|
103 | * Sets the document field used as the document reference. Every document must have this field.
|
104 | * The type of this field in the document should be a string, if it is not a string it will be
|
105 | * coerced into a string by calling toString.
|
106 | *
|
107 | * The default ref is 'id'.
|
108 | *
|
109 | * The ref should _not_ be changed during indexing, it should be set before any documents are
|
110 | * added to the index. Changing it during indexing can lead to inconsistent results.
|
111 | *
|
112 | * @param ref - The name of the reference field in the document.
|
113 | */
|
114 | ref(ref: string): void;
|
115 |
|
116 | /**
|
117 | * Adds a field to the list of document fields that will be indexed. Every document being
|
118 | * indexed should have this field. Null values for this field in indexed documents will
|
119 | * not cause errors but will limit the chance of that document being retrieved by searches.
|
120 | *
|
121 | * All fields should be added before adding documents to the index. Adding fields after
|
122 | * a document has been indexed will have no effect on already indexed documents.
|
123 | *
|
124 | * Fields can be boosted at build time. This allows terms within that field to have more
|
125 | * importance when ranking search results. Use a field boost to specify that matches
|
126 | * within one field are more important than other fields.
|
127 | *
|
128 | * @param fieldName - The name of a field to index in all documents.
|
129 | * @param attributes - Optional attributes associated with this field.
|
130 | */
|
131 | field(
|
132 | fieldName: string,
|
133 | attributes?: {
|
134 | boost?: number | undefined;
|
135 | extractor?: ((doc: object) => string | object | object[]) | undefined;
|
136 | },
|
137 | ): void;
|
138 |
|
139 | /**
|
140 | * A parameter to tune the amount of field length normalisation that is applied when
|
141 | * calculating relevance scores. A value of 0 will completely disable any normalisation
|
142 | * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
|
143 | * will be clamped to the range 0 - 1.
|
144 | *
|
145 | * @param number - The value to set for this tuning parameter.
|
146 | */
|
147 | b(number: number): void;
|
148 |
|
149 | /**
|
150 | * A parameter that controls the speed at which a rise in term frequency results in term
|
151 | * frequency saturation. The default value is 1.2. Setting this to a higher value will give
|
152 | * slower saturation levels, a lower value will result in quicker saturation.
|
153 | *
|
154 | * @param number - The value to set for this tuning parameter.
|
155 | */
|
156 | k1(number: number): void;
|
157 |
|
158 | /**
|
159 | * Adds a document to the index.
|
160 | *
|
161 | * Before adding fields to the index the index should have been fully setup, with the document
|
162 | * ref and all fields to index already having been specified.
|
163 | *
|
164 | * The document must have a field name as specified by the ref (by default this is 'id') and
|
165 | * it should have all fields defined for indexing, though null or undefined values will not
|
166 | * cause errors.
|
167 | *
|
168 | * Entire documents can be boosted at build time. Applying a boost to a document indicates that
|
169 | * this document should rank higher in search results than other documents.
|
170 | *
|
171 | * @param doc - The document to add to the index.
|
172 | * @param attributes - Optional attributes associated with this document.
|
173 | */
|
174 | add(doc: object, attributes?: { boost?: number | undefined }): void;
|
175 |
|
176 | /**
|
177 | * Builds the index, creating an instance of lunr.Index.
|
178 | *
|
179 | * This completes the indexing process and should only be called
|
180 | * once all documents have been added to the index.
|
181 | */
|
182 | build(): Index;
|
183 |
|
184 | /**
|
185 | * Applies a plugin to the index builder.
|
186 | *
|
187 | * A plugin is a function that is called with the index builder as its context.
|
188 | * Plugins can be used to customise or extend the behaviour of the index
|
189 | * in some way. A plugin is just a function, that encapsulated the custom
|
190 | * behaviour that should be applied when building the index.
|
191 | *
|
192 | * The plugin function will be called with the index builder as its argument, additional
|
193 | * arguments can also be passed when calling use. The function will be called
|
194 | * with the index builder as its context.
|
195 | *
|
196 | * @param plugin The plugin to apply.
|
197 | */
|
198 | use(plugin: Builder.Plugin, ...args: any[]): void;
|
199 | }
|
200 |
|
201 | namespace Index {
|
202 | interface Attributes {
|
203 | /**
|
204 | * An index of term/field to document reference.
|
205 | */
|
206 | invertedIndex: object;
|
207 | /**
|
208 | * Document vectors keyed by document reference.
|
209 | */
|
210 | documentVectors: { [docRef: string]: Vector };
|
211 | /**
|
212 | * An set of all corpus tokens.
|
213 | */
|
214 | tokenSet: TokenSet;
|
215 | /**
|
216 | * The names of indexed document fields.
|
217 | */
|
218 | fields: string[];
|
219 | /**
|
220 | * The pipeline to use for search terms.
|
221 | */
|
222 | pipeline: Pipeline;
|
223 | }
|
224 |
|
225 | /**
|
226 | * A result contains details of a document matching a search query.
|
227 | */
|
228 | interface Result {
|
229 | /**
|
230 | * The reference of the document this result represents.
|
231 | */
|
232 | ref: string;
|
233 |
|
234 | /**
|
235 | * A number between 0 and 1 representing how similar this document is to the query.
|
236 | */
|
237 | score: number;
|
238 |
|
239 | /**
|
240 | * Contains metadata about this match including which term(s) caused the match.
|
241 | */
|
242 | matchData: MatchData;
|
243 | }
|
244 |
|
245 | /**
|
246 | * A query builder callback provides a query object to be used to express
|
247 | * the query to perform on the index.
|
248 | *
|
249 | * @param query - The query object to build up.
|
250 | */
|
251 | type QueryBuilder = (this: Query, query: Query) => void;
|
252 |
|
253 | /**
|
254 | * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
|
255 | * query language which itself is parsed into an instance of lunr.Query.
|
256 | *
|
257 | * For programmatically building queries it is advised to directly use lunr.Query, the query language
|
258 | * is best used for human entered text rather than program generated text.
|
259 | *
|
260 | * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
|
261 | * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
|
262 | * or 'world', though those that contain both will rank higher in the results.
|
263 | *
|
264 | * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
|
265 | * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
|
266 | * wildcards will increase the number of documents that will be found but can also have a negative
|
267 | * impact on query performance, especially with wildcards at the beginning of a term.
|
268 | *
|
269 | * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
|
270 | * hello in the title field will match this query. Using a field not present in the index will lead
|
271 | * to an error being thrown.
|
272 | *
|
273 | * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
|
274 | * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
|
275 | * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
|
276 | * Avoid large values for edit distance to improve query performance.
|
277 | *
|
278 | * To escape special characters the backslash character '\' can be used, this allows searches to include
|
279 | * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
|
280 | * of attempting to apply a boost of 2 to the search term "foo".
|
281 | *
|
282 | * @example <caption>Simple single term query</caption>
|
283 | * hello
|
284 | * @example <caption>Multiple term query</caption>
|
285 | * hello world
|
286 | * @example <caption>term scoped to a field</caption>
|
287 | * title:hello
|
288 | * @example <caption>term with a boost of 10</caption>
|
289 | * hello^10
|
290 | * @example <caption>term with an edit distance of 2</caption>
|
291 | * hello~2
|
292 | */
|
293 | type QueryString = string;
|
294 | }
|
295 |
|
296 | /**
|
297 | * An index contains the built index of all documents and provides a query interface
|
298 | * to the index.
|
299 | *
|
300 | * Usually instances of lunr.Index will not be created using this constructor, instead
|
301 | * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
|
302 | * used to load previously built and serialized indexes.
|
303 | */
|
304 | class Index {
|
305 | /**
|
306 | * @param attrs The attributes of the built search index.
|
307 | */
|
308 | constructor(attrs: Index.Attributes);
|
309 |
|
310 | /**
|
311 | * Performs a search against the index using lunr query syntax.
|
312 | *
|
313 | * Results will be returned sorted by their score, the most relevant results
|
314 | * will be returned first.
|
315 | *
|
316 | * For more programmatic querying use lunr.Index#query.
|
317 | *
|
318 | * @param queryString - A string containing a lunr query.
|
319 | * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
|
320 | */
|
321 | search(queryString: Index.QueryString): Index.Result[];
|
322 |
|
323 | /**
|
324 | * Performs a query against the index using the yielded lunr.Query object.
|
325 | *
|
326 | * If performing programmatic queries against the index, this method is preferred
|
327 | * over lunr.Index#search so as to avoid the additional query parsing overhead.
|
328 | *
|
329 | * A query object is yielded to the supplied function which should be used to
|
330 | * express the query to be run against the index.
|
331 | *
|
332 | * Note that although this function takes a callback parameter it is _not_ an
|
333 | * asynchronous operation, the callback is just yielded a query object to be
|
334 | * customized.
|
335 | *
|
336 | * @param fn - A function that is used to build the query.
|
337 | */
|
338 | query(fn: Index.QueryBuilder): Index.Result[];
|
339 |
|
340 | /**
|
341 | * Prepares the index for JSON serialization.
|
342 | *
|
343 | * The schema for this JSON blob will be described in a
|
344 | * separate JSON schema file.
|
345 | */
|
346 | toJSON(): object;
|
347 |
|
348 | /**
|
349 | * Loads a previously serialized lunr.Index
|
350 | *
|
351 | * @param serializedIndex - A previously serialized lunr.Index
|
352 | */
|
353 | static load(serializedIndex: object): Index;
|
354 | }
|
355 |
|
356 | /**
|
357 | * Contains and collects metadata about a matching document.
|
358 | * A single instance of lunr.MatchData is returned as part of every
|
359 | * lunr.IndexResult.
|
360 | */
|
361 | class MatchData {
|
362 | /**
|
363 | * A cloned collection of metadata associated with this document.
|
364 | */
|
365 | metadata: object;
|
366 |
|
367 | /**
|
368 | * @param term - The term this match data is associated with
|
369 | * @param field - The field in which the term was found
|
370 | * @param metadata - The metadata recorded about this term in this field
|
371 | */
|
372 | constructor(term: string, field: string, metadata: object);
|
373 |
|
374 | /**
|
375 | * An instance of lunr.MatchData will be created for every term that matches a
|
376 | * document. However only one instance is required in a lunr.Index~Result. This
|
377 | * method combines metadata from another instance of lunr.MatchData with this
|
378 | * objects metadata.
|
379 | *
|
380 | * @param otherMatchData - Another instance of match data to merge with this one.
|
381 | * @see { lunr.Index~Result}
|
382 | */
|
383 | combine(otherMatchData: MatchData): void;
|
384 | }
|
385 |
|
386 | /**
|
387 | * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
|
388 | * string as well as all known metadata. A pipeline function can mutate the token string
|
389 | * or mutate (or add) metadata for a given token.
|
390 | *
|
391 | * A pipeline function can indicate that the passed token should be discarded by returning
|
392 | * null. This token will not be passed to any downstream pipeline functions and will not be
|
393 | * added to the index.
|
394 | *
|
395 | * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
|
396 | * to any downstream pipeline functions and all will returned tokens will be added to the index.
|
397 | *
|
398 | * Any number of pipeline functions may be chained together using a lunr.Pipeline.
|
399 | *
|
400 | * @param token - A token from the document being processed.
|
401 | * @param i - The index of this token in the complete list of tokens for this document/field.
|
402 | * @param tokens - All tokens for this document/field.
|
403 | */
|
404 | type PipelineFunction = (
|
405 | token: Token,
|
406 | i: number,
|
407 | tokens: Token[],
|
408 | ) => null | Token | Token[];
|
409 |
|
410 | /**
|
411 | * lunr.Pipelines maintain an ordered list of functions to be applied to all
|
412 | * tokens in documents entering the search index and queries being ran against
|
413 | * the index.
|
414 | *
|
415 | * An instance of lunr.Index created with the lunr shortcut will contain a
|
416 | * pipeline with a stop word filter and an English language stemmer. Extra
|
417 | * functions can be added before or after either of these functions or these
|
418 | * default functions can be removed.
|
419 | *
|
420 | * When run the pipeline will call each function in turn, passing a token, the
|
421 | * index of that token in the original list of all tokens and finally a list of
|
422 | * all the original tokens.
|
423 | *
|
424 | * The output of functions in the pipeline will be passed to the next function
|
425 | * in the pipeline. To exclude a token from entering the index the function
|
426 | * should return undefined, the rest of the pipeline will not be called with
|
427 | * this token.
|
428 | *
|
429 | * For serialisation of pipelines to work, all functions used in an instance of
|
430 | * a pipeline should be registered with lunr.Pipeline. Registered functions can
|
431 | * then be loaded. If trying to load a serialised pipeline that uses functions
|
432 | * that are not registered an error will be thrown.
|
433 | *
|
434 | * If not planning on serialising the pipeline then registering pipeline functions
|
435 | * is not necessary.
|
436 | */
|
437 | class Pipeline {
|
438 | constructor();
|
439 |
|
440 | /**
|
441 | * Register a function with the pipeline.
|
442 | *
|
443 | * Functions that are used in the pipeline should be registered if the pipeline
|
444 | * needs to be serialised, or a serialised pipeline needs to be loaded.
|
445 | *
|
446 | * Registering a function does not add it to a pipeline, functions must still be
|
447 | * added to instances of the pipeline for them to be used when running a pipeline.
|
448 | *
|
449 | * @param fn - The function to check for.
|
450 | * @param label - The label to register this function with
|
451 | */
|
452 | static registerFunction(fn: PipelineFunction, label: string): void;
|
453 |
|
454 | /**
|
455 | * Loads a previously serialised pipeline.
|
456 | *
|
457 | * All functions to be loaded must already be registered with lunr.Pipeline.
|
458 | * If any function from the serialised data has not been registered then an
|
459 | * error will be thrown.
|
460 | *
|
461 | * @param serialised - The serialised pipeline to load.
|
462 | */
|
463 | static load(serialised: object): Pipeline;
|
464 |
|
465 | /**
|
466 | * Adds new functions to the end of the pipeline.
|
467 | *
|
468 | * Logs a warning if the function has not been registered.
|
469 | *
|
470 | * @param functions - Any number of functions to add to the pipeline.
|
471 | */
|
472 | add(...functions: PipelineFunction[]): void;
|
473 |
|
474 | /**
|
475 | * Adds a single function after a function that already exists in the
|
476 | * pipeline.
|
477 | *
|
478 | * Logs a warning if the function has not been registered.
|
479 | *
|
480 | * @param existingFn - A function that already exists in the pipeline.
|
481 | * @param newFn - The new function to add to the pipeline.
|
482 | */
|
483 | after(existingFn: PipelineFunction, newFn: PipelineFunction): void;
|
484 |
|
485 | /**
|
486 | * Adds a single function before a function that already exists in the
|
487 | * pipeline.
|
488 | *
|
489 | * Logs a warning if the function has not been registered.
|
490 | *
|
491 | * @param existingFn - A function that already exists in the pipeline.
|
492 | * @param newFn - The new function to add to the pipeline.
|
493 | */
|
494 | before(existingFn: PipelineFunction, newFn: PipelineFunction): void;
|
495 |
|
496 | /**
|
497 | * Removes a function from the pipeline.
|
498 | *
|
499 | * @param fn The function to remove from the pipeline.
|
500 | */
|
501 | remove(fn: PipelineFunction): void;
|
502 |
|
503 | /**
|
504 | * Runs the current list of functions that make up the pipeline against the
|
505 | * passed tokens.
|
506 | *
|
507 | * @param tokens The tokens to run through the pipeline.
|
508 | */
|
509 | run(tokens: Token[]): Token[];
|
510 |
|
511 | /**
|
512 | * Convenience method for passing a string through a pipeline and getting
|
513 | * strings out. This method takes care of wrapping the passed string in a
|
514 | * token and mapping the resulting tokens back to strings.
|
515 | *
|
516 | * @param str - The string to pass through the pipeline.
|
517 | */
|
518 | runString(str: string): string[];
|
519 |
|
520 | /**
|
521 | * Resets the pipeline by removing any existing processors.
|
522 | */
|
523 | reset(): void;
|
524 |
|
525 | /**
|
526 | * Returns a representation of the pipeline ready for serialisation.
|
527 | *
|
528 | * Logs a warning if the function has not been registered.
|
529 | */
|
530 | toJSON(): PipelineFunction[];
|
531 | }
|
532 |
|
533 | namespace Query {
|
534 | /**
|
535 | * Constants for indicating what kind of presence a term must have in matching documents.
|
536 | */
|
537 | enum presence {
|
538 | /**
|
539 | * Term's presence in a document is optional, this is the default value.
|
540 | */
|
541 | OPTIONAL = 1,
|
542 | /**
|
543 | * Term's presence in a document is required, documents that do not contain this term will not be returned.
|
544 | */
|
545 | REQUIRED = 2,
|
546 | /**
|
547 | * Term's presence in a document is prohibited, documents that do contain this term will not be returned.
|
548 | */
|
549 | PROHIBITED = 3,
|
550 | }
|
551 |
|
552 | enum wildcard {
|
553 | NONE = 0,
|
554 | LEADING = 1 << 0,
|
555 | TRAILING = 1 << 1,
|
556 | }
|
557 |
|
558 | /**
|
559 | * A single clause in a {@link lunr.Query} contains a term and details on how to
|
560 | * match that term against a {@link lunr.Index}.
|
561 | */
|
562 | interface Clause {
|
563 | term: string;
|
564 | /** The fields in an index this clause should be matched against. */
|
565 | fields: string[];
|
566 | /** Any boost that should be applied when matching this clause. */
|
567 | boost: number;
|
568 | /** Whether the term should have fuzzy matching applied, and how fuzzy the match should be. */
|
569 | editDistance: number;
|
570 | /** Whether the term should be passed through the search pipeline. */
|
571 | usePipeline: boolean;
|
572 | /** Whether the term should have wildcards appended or prepended. */
|
573 | wildcard: number;
|
574 | }
|
575 | }
|
576 |
|
577 | /**
|
578 | * A lunr.Query provides a programmatic way of defining queries to be performed
|
579 | * against a {@link lunr.Index}.
|
580 | *
|
581 | * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
|
582 | * so the query object is pre-initialized with the right index fields.
|
583 | */
|
584 | class Query {
|
585 | /**
|
586 | * An array of query clauses.
|
587 | */
|
588 | clauses: Query.Clause[];
|
589 |
|
590 | /**
|
591 | * An array of all available fields in a lunr.Index.
|
592 | */
|
593 | allFields: string[];
|
594 |
|
595 | /**
|
596 | * @param allFields An array of all available fields in a lunr.Index.
|
597 | */
|
598 | constructor(allFields: string[]);
|
599 |
|
600 | /**
|
601 | * Adds a {this query.
lunr.Query~Clause} to |
602 | *
|
603 | * Unless the clause contains the fields to be matched all fields will be matched. In addition
|
604 | * a default boost of 1 is applied to the clause.
|
605 | *
|
606 | * this query.
clause - The clause to add to |
607 | * lunr.Query~Clause
|
608 | */
|
609 | clause(clause: Query.Clause): Query;
|
610 |
|
611 | /**
|
612 | * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
|
613 | * to the list of clauses that make up this query.
|
614 | *
|
615 | * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion
|
616 | * to a token or token-like string should be done before calling this method.
|
617 | *
|
618 | * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an
|
619 | * array, each term in the array will share the same options.
|
620 | *
|
621 | * @param term - The term to add to the query.
|
622 | * @param [options] - Any additional properties to add to the query clause.
|
623 | * @see lunr.Query#clause
|
624 | * @see lunr.Query~Clause
|
625 | * @example <caption>adding a single term to a query</caption>
|
626 | * query.term("foo")
|
627 | * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
|
628 | * query.term("foo", {
|
629 | * fields: ["title"],
|
630 | * boost: 10,
|
631 | * wildcard: lunr.Query.wildcard.TRAILING
|
632 | * })
|
633 | */
|
634 | term(term: string | string[] | Token | Token[], options: object): Query;
|
635 | }
|
636 |
|
637 | class QueryParseError extends Error {
|
638 | name: "QueryParseError";
|
639 | message: string;
|
640 | start: number;
|
641 | end: number;
|
642 |
|
643 | constructor(message: string, start: string, end: string);
|
644 | }
|
645 |
|
646 | /**
|
647 | * lunr.stemmer is an english language stemmer, this is a JavaScript
|
648 | * implementation of the PorterStemmer taken from http://tartarus.org/~martin
|
649 | *
|
650 | * Implements {lunr.PipelineFunction}
|
651 | *
|
652 | * string to stem
token - The |
653 | * { lunr.Pipeline}
|
654 | */
|
655 | function stemmer(token: Token): Token;
|
656 |
|
657 | /**
|
658 | * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
|
659 | * list of stop words.
|
660 | *
|
661 | * The built in lunr.stopWordFilter is built using this generator and can be used
|
662 | * to generate custom stopWordFilters for applications or non English languages.
|
663 | *
|
664 | * @param stopWords - The list of stop words
|
665 | * @see lunr.Pipeline
|
666 | * @see lunr.stopWordFilter
|
667 | */
|
668 | function generateStopWordFilter(stopWords: string[]): PipelineFunction;
|
669 |
|
670 | /**
|
671 | * lunr.stopWordFilter is an English language stop word list filter, any words
|
672 | * contained in the list will not be passed through the filter.
|
673 | *
|
674 | * This is intended to be used in the Pipeline. If the token does not pass the
|
675 | * filter then undefined will be returned.
|
676 | *
|
677 | * Implements {lunr.PipelineFunction}
|
678 | *
|
679 | * @param token - A token to check for being a stop word.
|
680 | * @see {@link lunr.Pipeline}
|
681 | */
|
682 | function stopWordFilter(token: Token): Token;
|
683 |
|
684 | namespace Token {
|
685 | /**
|
686 | * A token update function is used when updating or optionally
|
687 | * when cloning a token.
|
688 | *
|
689 | * @param str - The string representation of the token.
|
690 | * @param metadata - All metadata associated with this token.
|
691 | */
|
692 | type UpdateFunction = (str: string, metadata: object) => void;
|
693 | }
|
694 |
|
695 | /**
|
696 | * A token wraps a string representation of a token
|
697 | * as it is passed through the text processing pipeline.
|
698 | */
|
699 | class Token {
|
700 | /**
|
701 | * @param [str=''] - The string token being wrapped.
|
702 | * @param [metadata={}] - Metadata associated with this token.
|
703 | */
|
704 | constructor(str: string, metadata: object);
|
705 |
|
706 | /**
|
707 | * Returns the token string that is being wrapped by this object.
|
708 | */
|
709 | toString(): string;
|
710 |
|
711 | /**
|
712 | * Applies the given function to the wrapped string token.
|
713 | *
|
714 | * @example
|
715 | * token.update(function (str, metadata) {
|
716 | * return str.toUpperCase()
|
717 | * })
|
718 | *
|
719 | * @param fn - A function to apply to the token string.
|
720 | */
|
721 | update(fn: Token.UpdateFunction): Token;
|
722 |
|
723 | /**
|
724 | * Creates a clone of this token. Optionally a function can be
|
725 | * applied to the cloned token.
|
726 | *
|
727 | * @param fn - An optional function to apply to the cloned token.
|
728 | */
|
729 | clone(fn?: Token.UpdateFunction): Token;
|
730 | }
|
731 |
|
732 | /**
|
733 | * A token set is used to store the unique list of all tokens
|
734 | * within an index. Token sets are also used to represent an
|
735 | * incoming query to the index, this query token set and index
|
736 | * token set are then intersected to find which tokens to look
|
737 | * up in the inverted index.
|
738 | *
|
739 | * A token set can hold multiple tokens, as in the case of the
|
740 | * index token set, or it can hold a single token as in the
|
741 | * case of a simple query token set.
|
742 | *
|
743 | * Additionally token sets are used to perform wildcard matching.
|
744 | * Leading, contained and trailing wildcards are supported, and
|
745 | * from this edit distance matching can also be provided.
|
746 | *
|
747 | * Token sets are implemented as a minimal finite state automata,
|
748 | * where both common prefixes and suffixes are shared between tokens.
|
749 | * This helps to reduce the space used for storing the token set.
|
750 | */
|
751 | class TokenSet {
|
752 | constructor();
|
753 |
|
754 | /**
|
755 | * Creates a TokenSet instance from the given sorted array of words.
|
756 | *
|
757 | * @param arr - A sorted array of strings to create the set from.
|
758 | * @throws Will throw an error if the input array is not sorted.
|
759 | */
|
760 | fromArray(arr: string[]): TokenSet;
|
761 |
|
762 | /**
|
763 | * Creates a token set representing a single string with a specified
|
764 | * edit distance.
|
765 | *
|
766 | * Insertions, deletions, substitutions and transpositions are each
|
767 | * treated as an edit distance of 1.
|
768 | *
|
769 | * Increasing the allowed edit distance will have a dramatic impact
|
770 | * on the performance of both creating and intersecting these TokenSets.
|
771 | * It is advised to keep the edit distance less than 3.
|
772 | *
|
773 | * @param str - The string to create the token set from.
|
774 | * @param editDistance - The allowed edit distance to match.
|
775 | */
|
776 | fromFuzzyString(str: string, editDistance: number): Vector;
|
777 |
|
778 | /**
|
779 | * Creates a TokenSet from a string.
|
780 | *
|
781 | * The string may contain one or more wildcard characters (*)
|
782 | * that will allow wildcard matching when intersecting with
|
783 | * another TokenSet.
|
784 | *
|
785 | * @param str - The string to create a TokenSet from.
|
786 | */
|
787 | fromString(str: string): TokenSet;
|
788 |
|
789 | /**
|
790 | * Converts this TokenSet into an array of strings
|
791 | * contained within the TokenSet.
|
792 | */
|
793 | toArray(): string[];
|
794 |
|
795 | /**
|
796 | * Generates a string representation of a TokenSet.
|
797 | *
|
798 | * This is intended to allow TokenSets to be used as keys
|
799 | * in objects, largely to aid the construction and minimisation
|
800 | * of a TokenSet. As such it is not designed to be a human
|
801 | * friendly representation of the TokenSet.
|
802 | */
|
803 | toString(): string;
|
804 |
|
805 | /**
|
806 | * Returns a new TokenSet that is the intersection of
|
807 | * this TokenSet and the passed TokenSet.
|
808 | *
|
809 | * This intersection will take into account any wildcards
|
810 | * contained within the TokenSet.
|
811 | *
|
812 | * @param b - An other TokenSet to intersect with.
|
813 | */
|
814 | intersect(b: TokenSet): TokenSet;
|
815 | }
|
816 |
|
817 | namespace tokenizer {
|
818 | /**
|
819 | * The separator used to split a string into tokens. Override this property to change the behaviour of
|
820 | * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
|
821 | *
|
822 | * @see lunr.tokenizer
|
823 | */
|
824 | let separator: RegExp;
|
825 | }
|
826 |
|
827 | /**
|
828 | * A function for splitting a string into tokens ready to be inserted into
|
829 | * the search index. Uses `lunr.tokenizer.separator` to split strings, change
|
830 | * the value of this property to change how strings are split into tokens.
|
831 | *
|
832 | * This tokenizer will convert its parameter to a string by calling `toString` and
|
833 | * then will split this string on the character in `lunr.tokenizer.separator`.
|
834 | * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
|
835 | *
|
836 | * @param obj - The object to convert into tokens
|
837 | */
|
838 | function tokenizer(obj?: null | string | object | object[]): Token[];
|
839 |
|
840 | /**
|
841 | * lunr.trimmer is a pipeline function for trimming non word
|
842 | * characters from the beginning and end of tokens before they
|
843 | * enter the index.
|
844 | *
|
845 | * This implementation may not work correctly for non latin
|
846 | * characters and should either be removed or adapted for use
|
847 | * with languages with non-latin characters.
|
848 | *
|
849 | * Implements {lunr.PipelineFunction}
|
850 | *
|
851 | * @param token The token to pass through the filter
|
852 | * @see lunr.Pipeline
|
853 | */
|
854 | function trimmer(token: Token): Token;
|
855 |
|
856 | /**
|
857 | * A namespace containing utils for the rest of the lunr library
|
858 | */
|
859 | namespace utils {
|
860 | /**
|
861 | * Print a warning message to the console.
|
862 | *
|
863 | * @param message The message to be printed.
|
864 | */
|
865 | function warn(message: string): void;
|
866 |
|
867 | /**
|
868 | * Convert an object to a string.
|
869 | *
|
870 | * In the case of `null` and `undefined` the function returns
|
871 | * the empty string, in all other cases the result of calling
|
872 | * `toString` on the passed object is returned.
|
873 | *
|
874 | * @param obj The object to convert to a string.
|
875 | * @return string representation of the passed object.
|
876 | */
|
877 | function asString(obj: any): string;
|
878 | }
|
879 |
|
880 | /**
|
881 | * A vector is used to construct the vector space of documents and queries. These
|
882 | * vectors support operations to determine the similarity between two documents or
|
883 | * a document and a query.
|
884 | *
|
885 | * Normally no parameters are required for initializing a vector, but in the case of
|
886 | * loading a previously dumped vector the raw elements can be provided to the constructor.
|
887 | *
|
888 | * For performance reasons vectors are implemented with a flat array, where an elements
|
889 | * index is immediately followed by its value. E.g. [index, value, index, value]. This
|
890 | * allows the underlying array to be as sparse as possible and still offer decent
|
891 | * performance when being used for vector calculations.
|
892 | */
|
893 | class Vector {
|
894 | /**
|
895 | * @param [elements] - The flat list of element index and element value pairs.
|
896 | */
|
897 | constructor(elements: number[]);
|
898 |
|
899 | /**
|
900 | * Calculates the position within the vector to insert a given index.
|
901 | *
|
902 | * This is used internally by insert and upsert. If there are duplicate indexes then
|
903 | * the position is returned as if the value for that index were to be updated, but it
|
904 | * is the callers responsibility to check whether there is a duplicate at that index
|
905 | *
|
906 | * @param insertIdx - The index at which the element should be inserted.
|
907 | */
|
908 | positionForIndex(index: number): number;
|
909 |
|
910 | /**
|
911 | * Inserts an element at an index within the vector.
|
912 | *
|
913 | * Does not allow duplicates, will throw an error if there is already an entry
|
914 | * for this index.
|
915 | *
|
916 | * @param insertIdx - The index at which the element should be inserted.
|
917 | * @param val - The value to be inserted into the vector.
|
918 | */
|
919 | insert(insertIdx: number, val: number): void;
|
920 |
|
921 | /**
|
922 | * Inserts or updates an existing index within the vector.
|
923 | *
|
924 | * @param insertIdx - The index at which the element should be inserted.
|
925 | * @param val - The value to be inserted into the vector.
|
926 | * @param fn - A function that is called for updates, the existing value and the
|
927 | * requested value are passed as arguments
|
928 | */
|
929 | upsert(
|
930 | insertIdx: number,
|
931 | val: number,
|
932 | fn: (existingVal: number, val: number) => number,
|
933 | ): void;
|
934 |
|
935 | /**
|
936 | * Calculates the magnitude of this vector.
|
937 | */
|
938 | magnitude(): number;
|
939 |
|
940 | /**
|
941 | * Calculates the dot product of this vector and another vector.
|
942 | *
|
943 | * @param otherVector - The vector to compute the dot product with.
|
944 | */
|
945 | dot(otherVector: Vector): number;
|
946 |
|
947 | /**
|
948 | * Calculates the cosine similarity between this vector and another
|
949 | * vector.
|
950 | *
|
951 | * @param otherVector - The other vector to calculate the
|
952 | * similarity with.
|
953 | */
|
954 | similarity(otherVector: Vector): number;
|
955 |
|
956 | /**
|
957 | * Converts the vector to an array of the elements within the vector.
|
958 | */
|
959 | toArray(): number[];
|
960 |
|
961 | /**
|
962 | * A JSON serializable representation of the vector.
|
963 | */
|
964 | toJSON(): number[];
|
965 | }
|
966 |
|
967 | const version: string;
|
968 | type ConfigFunction = (this: Builder, builder: Builder) => void;
|
969 | }
|
970 |
|
971 | /**
|
972 | * Convenience function for instantiating a new lunr index and configuring it with the default
|
973 | * pipeline functions and the passed config function.
|
974 | *
|
975 | * When using this convenience function a new index will be created with the following functions
|
976 | * already in the pipeline:
|
977 | *
|
978 | * * lunr.StopWordFilter - filters out any stop words before they enter the index
|
979 | *
|
980 | * * lunr.stemmer - stems the tokens before entering the index.
|
981 | *
|
982 | * Example:
|
983 | *
|
984 | * ```javascript
|
985 | * var idx = lunr(function () {
|
986 | * this.field('title', 10);
|
987 | * this.field('tags', 100);
|
988 | * this.field('body');
|
989 | *
|
990 | * this.ref('cid');
|
991 | *
|
992 | * this.pipeline.add(function () {
|
993 | * // some custom pipeline function
|
994 | * });
|
995 | * });
|
996 | * ```
|
997 | */
|
998 | declare function lunr(config: lunr.ConfigFunction): lunr.Index;
|