UNPKG

38.2 kBTypeScriptView Raw
1export as namespace lunr;
2export = lunr;
3
4/**
5 * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright
6 * Copyright (C) 2014 Oliver Nightingale
7 * MIT Licensed
8 * @license
9 */
10declare namespace lunr {
11 namespace Builder {
12 /**
13 * A plugin is a function that is called with the index builder as its context.
14 * Plugins can be used to customise or extend the behaviour of the index
15 * in some way. A plugin is just a function, that encapsulated the custom
16 * behaviour that should be applied when building the index.
17 *
18 * The plugin function will be called with the index builder as its argument, additional
19 * arguments can also be passed when calling use. The function will be called
20 * with the index builder as its context.
21 */
22 type Plugin = (this: Builder, ...args: any[]) => void;
23 }
24
25 /**
26 * lunr.Builder performs indexing on a set of documents and
27 * returns instances of lunr.Index ready for querying.
28 *
29 * All configuration of the index is done via the builder, the
30 * fields to index, the document reference, the text processing
31 * pipeline and document scoring parameters are all set on the
32 * builder before indexing.
33 */
34 class Builder {
35 /**
36 * Internal reference to the document reference field.
37 */
38 _ref: string;
39
40 /**
41 * Internal reference to the document fields to index.
42 */
43 _fields: string[];
44
45 /**
46 * The inverted index maps terms to document fields.
47 */
48 invertedIndex: object;
49
50 /**
51 * Keeps track of document term frequencies.
52 */
53 documentTermFrequencies: object;
54
55 /**
56 * Keeps track of the length of documents added to the index.
57 */
58 documentLengths: object;
59
60 /**
61 * Function for splitting strings into tokens for indexing.
62 */
63 tokenizer: typeof tokenizer;
64
65 /**
66 * The pipeline performs text processing on tokens before indexing.
67 */
68 pipeline: Pipeline;
69
70 /**
71 * A pipeline for processing search terms before querying the index.
72 */
73 searchPipeline: Pipeline;
74
75 /**
76 * Keeps track of the total number of documents indexed.
77 */
78 documentCount: number;
79
80 /**
81 * A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
82 */
83 _b: number;
84
85 /**
86 * A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
87 */
88 _k1: number;
89
90 /**
91 * A counter incremented for each unique term, used to identify a terms position in the vector space.
92 */
93 termIndex: number;
94
95 /**
96 * A list of metadata keys that have been whitelisted for entry in the index.
97 */
98 metadataWhitelist: string[];
99
100 constructor();
101
102 /**
103 * Sets the document field used as the document reference. Every document must have this field.
104 * The type of this field in the document should be a string, if it is not a string it will be
105 * coerced into a string by calling toString.
106 *
107 * The default ref is 'id'.
108 *
109 * The ref should _not_ be changed during indexing, it should be set before any documents are
110 * added to the index. Changing it during indexing can lead to inconsistent results.
111 *
112 * @param ref - The name of the reference field in the document.
113 */
114 ref(ref: string): void;
115
116 /**
117 * Adds a field to the list of document fields that will be indexed. Every document being
118 * indexed should have this field. Null values for this field in indexed documents will
119 * not cause errors but will limit the chance of that document being retrieved by searches.
120 *
121 * All fields should be added before adding documents to the index. Adding fields after
122 * a document has been indexed will have no effect on already indexed documents.
123 *
124 * Fields can be boosted at build time. This allows terms within that field to have more
125 * importance when ranking search results. Use a field boost to specify that matches
126 * within one field are more important than other fields.
127 *
128 * @param fieldName - The name of a field to index in all documents.
129 * @param attributes - Optional attributes associated with this field.
130 */
131 field(
132 fieldName: string,
133 attributes?: {
134 boost?: number | undefined;
135 extractor?: ((doc: object) => string | object | object[]) | undefined;
136 },
137 ): void;
138
139 /**
140 * A parameter to tune the amount of field length normalisation that is applied when
141 * calculating relevance scores. A value of 0 will completely disable any normalisation
142 * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
143 * will be clamped to the range 0 - 1.
144 *
145 * @param number - The value to set for this tuning parameter.
146 */
147 b(number: number): void;
148
149 /**
150 * A parameter that controls the speed at which a rise in term frequency results in term
151 * frequency saturation. The default value is 1.2. Setting this to a higher value will give
152 * slower saturation levels, a lower value will result in quicker saturation.
153 *
154 * @param number - The value to set for this tuning parameter.
155 */
156 k1(number: number): void;
157
158 /**
159 * Adds a document to the index.
160 *
161 * Before adding fields to the index the index should have been fully setup, with the document
162 * ref and all fields to index already having been specified.
163 *
164 * The document must have a field name as specified by the ref (by default this is 'id') and
165 * it should have all fields defined for indexing, though null or undefined values will not
166 * cause errors.
167 *
168 * Entire documents can be boosted at build time. Applying a boost to a document indicates that
169 * this document should rank higher in search results than other documents.
170 *
171 * @param doc - The document to add to the index.
172 * @param attributes - Optional attributes associated with this document.
173 */
174 add(doc: object, attributes?: { boost?: number | undefined }): void;
175
176 /**
177 * Builds the index, creating an instance of lunr.Index.
178 *
179 * This completes the indexing process and should only be called
180 * once all documents have been added to the index.
181 */
182 build(): Index;
183
184 /**
185 * Applies a plugin to the index builder.
186 *
187 * A plugin is a function that is called with the index builder as its context.
188 * Plugins can be used to customise or extend the behaviour of the index
189 * in some way. A plugin is just a function, that encapsulated the custom
190 * behaviour that should be applied when building the index.
191 *
192 * The plugin function will be called with the index builder as its argument, additional
193 * arguments can also be passed when calling use. The function will be called
194 * with the index builder as its context.
195 *
196 * @param plugin The plugin to apply.
197 */
198 use(plugin: Builder.Plugin, ...args: any[]): void;
199 }
200
201 namespace Index {
202 interface Attributes {
203 /**
204 * An index of term/field to document reference.
205 */
206 invertedIndex: object;
207 /**
208 * Document vectors keyed by document reference.
209 */
210 documentVectors: { [docRef: string]: Vector };
211 /**
212 * An set of all corpus tokens.
213 */
214 tokenSet: TokenSet;
215 /**
216 * The names of indexed document fields.
217 */
218 fields: string[];
219 /**
220 * The pipeline to use for search terms.
221 */
222 pipeline: Pipeline;
223 }
224
225 /**
226 * A result contains details of a document matching a search query.
227 */
228 interface Result {
229 /**
230 * The reference of the document this result represents.
231 */
232 ref: string;
233
234 /**
235 * A number between 0 and 1 representing how similar this document is to the query.
236 */
237 score: number;
238
239 /**
240 * Contains metadata about this match including which term(s) caused the match.
241 */
242 matchData: MatchData;
243 }
244
245 /**
246 * A query builder callback provides a query object to be used to express
247 * the query to perform on the index.
248 *
249 * @param query - The query object to build up.
250 */
251 type QueryBuilder = (this: Query, query: Query) => void;
252
253 /**
254 * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
255 * query language which itself is parsed into an instance of lunr.Query.
256 *
257 * For programmatically building queries it is advised to directly use lunr.Query, the query language
258 * is best used for human entered text rather than program generated text.
259 *
260 * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
261 * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
262 * or 'world', though those that contain both will rank higher in the results.
263 *
264 * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
265 * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
266 * wildcards will increase the number of documents that will be found but can also have a negative
267 * impact on query performance, especially with wildcards at the beginning of a term.
268 *
269 * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
270 * hello in the title field will match this query. Using a field not present in the index will lead
271 * to an error being thrown.
272 *
273 * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
274 * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
275 * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
276 * Avoid large values for edit distance to improve query performance.
277 *
278 * To escape special characters the backslash character '\' can be used, this allows searches to include
279 * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
280 * of attempting to apply a boost of 2 to the search term "foo".
281 *
282 * @example <caption>Simple single term query</caption>
283 * hello
284 * @example <caption>Multiple term query</caption>
285 * hello world
286 * @example <caption>term scoped to a field</caption>
287 * title:hello
288 * @example <caption>term with a boost of 10</caption>
289 * hello^10
290 * @example <caption>term with an edit distance of 2</caption>
291 * hello~2
292 */
293 type QueryString = string;
294 }
295
296 /**
297 * An index contains the built index of all documents and provides a query interface
298 * to the index.
299 *
300 * Usually instances of lunr.Index will not be created using this constructor, instead
301 * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
302 * used to load previously built and serialized indexes.
303 */
304 class Index {
305 /**
306 * @param attrs The attributes of the built search index.
307 */
308 constructor(attrs: Index.Attributes);
309
310 /**
311 * Performs a search against the index using lunr query syntax.
312 *
313 * Results will be returned sorted by their score, the most relevant results
314 * will be returned first.
315 *
316 * For more programmatic querying use lunr.Index#query.
317 *
318 * @param queryString - A string containing a lunr query.
319 * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
320 */
321 search(queryString: Index.QueryString): Index.Result[];
322
323 /**
324 * Performs a query against the index using the yielded lunr.Query object.
325 *
326 * If performing programmatic queries against the index, this method is preferred
327 * over lunr.Index#search so as to avoid the additional query parsing overhead.
328 *
329 * A query object is yielded to the supplied function which should be used to
330 * express the query to be run against the index.
331 *
332 * Note that although this function takes a callback parameter it is _not_ an
333 * asynchronous operation, the callback is just yielded a query object to be
334 * customized.
335 *
336 * @param fn - A function that is used to build the query.
337 */
338 query(fn: Index.QueryBuilder): Index.Result[];
339
340 /**
341 * Prepares the index for JSON serialization.
342 *
343 * The schema for this JSON blob will be described in a
344 * separate JSON schema file.
345 */
346 toJSON(): object;
347
348 /**
349 * Loads a previously serialized lunr.Index
350 *
351 * @param serializedIndex - A previously serialized lunr.Index
352 */
353 static load(serializedIndex: object): Index;
354 }
355
356 /**
357 * Contains and collects metadata about a matching document.
358 * A single instance of lunr.MatchData is returned as part of every
359 * lunr.IndexResult.
360 */
361 class MatchData {
362 /**
363 * A cloned collection of metadata associated with this document.
364 */
365 metadata: object;
366
367 /**
368 * @param term - The term this match data is associated with
369 * @param field - The field in which the term was found
370 * @param metadata - The metadata recorded about this term in this field
371 */
372 constructor(term: string, field: string, metadata: object);
373
374 /**
375 * An instance of lunr.MatchData will be created for every term that matches a
376 * document. However only one instance is required in a lunr.Index~Result. This
377 * method combines metadata from another instance of lunr.MatchData with this
378 * objects metadata.
379 *
380 * @param otherMatchData - Another instance of match data to merge with this one.
381 * @see {@link lunr.Index~Result}
382 */
383 combine(otherMatchData: MatchData): void;
384 }
385
386 /**
387 * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
388 * string as well as all known metadata. A pipeline function can mutate the token string
389 * or mutate (or add) metadata for a given token.
390 *
391 * A pipeline function can indicate that the passed token should be discarded by returning
392 * null. This token will not be passed to any downstream pipeline functions and will not be
393 * added to the index.
394 *
395 * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
396 * to any downstream pipeline functions and all will returned tokens will be added to the index.
397 *
398 * Any number of pipeline functions may be chained together using a lunr.Pipeline.
399 *
400 * @param token - A token from the document being processed.
401 * @param i - The index of this token in the complete list of tokens for this document/field.
402 * @param tokens - All tokens for this document/field.
403 */
404 type PipelineFunction = (
405 token: Token,
406 i: number,
407 tokens: Token[],
408 ) => null | Token | Token[];
409
410 /**
411 * lunr.Pipelines maintain an ordered list of functions to be applied to all
412 * tokens in documents entering the search index and queries being ran against
413 * the index.
414 *
415 * An instance of lunr.Index created with the lunr shortcut will contain a
416 * pipeline with a stop word filter and an English language stemmer. Extra
417 * functions can be added before or after either of these functions or these
418 * default functions can be removed.
419 *
420 * When run the pipeline will call each function in turn, passing a token, the
421 * index of that token in the original list of all tokens and finally a list of
422 * all the original tokens.
423 *
424 * The output of functions in the pipeline will be passed to the next function
425 * in the pipeline. To exclude a token from entering the index the function
426 * should return undefined, the rest of the pipeline will not be called with
427 * this token.
428 *
429 * For serialisation of pipelines to work, all functions used in an instance of
430 * a pipeline should be registered with lunr.Pipeline. Registered functions can
431 * then be loaded. If trying to load a serialised pipeline that uses functions
432 * that are not registered an error will be thrown.
433 *
434 * If not planning on serialising the pipeline then registering pipeline functions
435 * is not necessary.
436 */
437 class Pipeline {
438 constructor();
439
440 /**
441 * Register a function with the pipeline.
442 *
443 * Functions that are used in the pipeline should be registered if the pipeline
444 * needs to be serialised, or a serialised pipeline needs to be loaded.
445 *
446 * Registering a function does not add it to a pipeline, functions must still be
447 * added to instances of the pipeline for them to be used when running a pipeline.
448 *
449 * @param fn - The function to check for.
450 * @param label - The label to register this function with
451 */
452 static registerFunction(fn: PipelineFunction, label: string): void;
453
454 /**
455 * Loads a previously serialised pipeline.
456 *
457 * All functions to be loaded must already be registered with lunr.Pipeline.
458 * If any function from the serialised data has not been registered then an
459 * error will be thrown.
460 *
461 * @param serialised - The serialised pipeline to load.
462 */
463 static load(serialised: object): Pipeline;
464
465 /**
466 * Adds new functions to the end of the pipeline.
467 *
468 * Logs a warning if the function has not been registered.
469 *
470 * @param functions - Any number of functions to add to the pipeline.
471 */
472 add(...functions: PipelineFunction[]): void;
473
474 /**
475 * Adds a single function after a function that already exists in the
476 * pipeline.
477 *
478 * Logs a warning if the function has not been registered.
479 *
480 * @param existingFn - A function that already exists in the pipeline.
481 * @param newFn - The new function to add to the pipeline.
482 */
483 after(existingFn: PipelineFunction, newFn: PipelineFunction): void;
484
485 /**
486 * Adds a single function before a function that already exists in the
487 * pipeline.
488 *
489 * Logs a warning if the function has not been registered.
490 *
491 * @param existingFn - A function that already exists in the pipeline.
492 * @param newFn - The new function to add to the pipeline.
493 */
494 before(existingFn: PipelineFunction, newFn: PipelineFunction): void;
495
496 /**
497 * Removes a function from the pipeline.
498 *
499 * @param fn The function to remove from the pipeline.
500 */
501 remove(fn: PipelineFunction): void;
502
503 /**
504 * Runs the current list of functions that make up the pipeline against the
505 * passed tokens.
506 *
507 * @param tokens The tokens to run through the pipeline.
508 */
509 run(tokens: Token[]): Token[];
510
511 /**
512 * Convenience method for passing a string through a pipeline and getting
513 * strings out. This method takes care of wrapping the passed string in a
514 * token and mapping the resulting tokens back to strings.
515 *
516 * @param str - The string to pass through the pipeline.
517 */
518 runString(str: string): string[];
519
520 /**
521 * Resets the pipeline by removing any existing processors.
522 */
523 reset(): void;
524
525 /**
526 * Returns a representation of the pipeline ready for serialisation.
527 *
528 * Logs a warning if the function has not been registered.
529 */
530 toJSON(): PipelineFunction[];
531 }
532
533 namespace Query {
534 /**
535 * Constants for indicating what kind of presence a term must have in matching documents.
536 */
537 enum presence {
538 /**
539 * Term's presence in a document is optional, this is the default value.
540 */
541 OPTIONAL = 1,
542 /**
543 * Term's presence in a document is required, documents that do not contain this term will not be returned.
544 */
545 REQUIRED = 2,
546 /**
547 * Term's presence in a document is prohibited, documents that do contain this term will not be returned.
548 */
549 PROHIBITED = 3,
550 }
551
552 enum wildcard {
553 NONE = 0,
554 LEADING = 1 << 0,
555 TRAILING = 1 << 1,
556 }
557
558 /**
559 * A single clause in a {@link lunr.Query} contains a term and details on how to
560 * match that term against a {@link lunr.Index}.
561 */
562 interface Clause {
563 term: string;
564 /** The fields in an index this clause should be matched against. */
565 fields: string[];
566 /** Any boost that should be applied when matching this clause. */
567 boost: number;
568 /** Whether the term should have fuzzy matching applied, and how fuzzy the match should be. */
569 editDistance: number;
570 /** Whether the term should be passed through the search pipeline. */
571 usePipeline: boolean;
572 /** Whether the term should have wildcards appended or prepended. */
573 wildcard: number;
574 }
575 }
576
577 /**
578 * A lunr.Query provides a programmatic way of defining queries to be performed
579 * against a {@link lunr.Index}.
580 *
581 * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
582 * so the query object is pre-initialized with the right index fields.
583 */
584 class Query {
585 /**
586 * An array of query clauses.
587 */
588 clauses: Query.Clause[];
589
590 /**
591 * An array of all available fields in a lunr.Index.
592 */
593 allFields: string[];
594
595 /**
596 * @param allFields An array of all available fields in a lunr.Index.
597 */
598 constructor(allFields: string[]);
599
600 /**
601 * Adds a {@link lunr.Query~Clause} to this query.
602 *
603 * Unless the clause contains the fields to be matched all fields will be matched. In addition
604 * a default boost of 1 is applied to the clause.
605 *
606 * @param clause - The clause to add to this query.
607 * @see lunr.Query~Clause
608 */
609 clause(clause: Query.Clause): Query;
610
611 /**
612 * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
613 * to the list of clauses that make up this query.
614 *
615 * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion
616 * to a token or token-like string should be done before calling this method.
617 *
618 * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an
619 * array, each term in the array will share the same options.
620 *
621 * @param term - The term to add to the query.
622 * @param [options] - Any additional properties to add to the query clause.
623 * @see lunr.Query#clause
624 * @see lunr.Query~Clause
625 * @example <caption>adding a single term to a query</caption>
626 * query.term("foo")
627 * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
628 * query.term("foo", {
629 * fields: ["title"],
630 * boost: 10,
631 * wildcard: lunr.Query.wildcard.TRAILING
632 * })
633 */
634 term(term: string | string[] | Token | Token[], options: object): Query;
635 }
636
637 class QueryParseError extends Error {
638 name: "QueryParseError";
639 message: string;
640 start: number;
641 end: number;
642
643 constructor(message: string, start: string, end: string);
644 }
645
646 /**
647 * lunr.stemmer is an english language stemmer, this is a JavaScript
648 * implementation of the PorterStemmer taken from http://tartarus.org/~martin
649 *
650 * Implements {lunr.PipelineFunction}
651 *
652 * @param token - The string to stem
653 * @see {@link lunr.Pipeline}
654 */
655 function stemmer(token: Token): Token;
656
657 /**
658 * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
659 * list of stop words.
660 *
661 * The built in lunr.stopWordFilter is built using this generator and can be used
662 * to generate custom stopWordFilters for applications or non English languages.
663 *
664 * @param stopWords - The list of stop words
665 * @see lunr.Pipeline
666 * @see lunr.stopWordFilter
667 */
668 function generateStopWordFilter(stopWords: string[]): PipelineFunction;
669
670 /**
671 * lunr.stopWordFilter is an English language stop word list filter, any words
672 * contained in the list will not be passed through the filter.
673 *
674 * This is intended to be used in the Pipeline. If the token does not pass the
675 * filter then undefined will be returned.
676 *
677 * Implements {lunr.PipelineFunction}
678 *
679 * @param token - A token to check for being a stop word.
680 * @see {@link lunr.Pipeline}
681 */
682 function stopWordFilter(token: Token): Token;
683
684 namespace Token {
685 /**
686 * A token update function is used when updating or optionally
687 * when cloning a token.
688 *
689 * @param str - The string representation of the token.
690 * @param metadata - All metadata associated with this token.
691 */
692 type UpdateFunction = (str: string, metadata: object) => void;
693 }
694
695 /**
696 * A token wraps a string representation of a token
697 * as it is passed through the text processing pipeline.
698 */
699 class Token {
700 /**
701 * @param [str=''] - The string token being wrapped.
702 * @param [metadata={}] - Metadata associated with this token.
703 */
704 constructor(str: string, metadata: object);
705
706 /**
707 * Returns the token string that is being wrapped by this object.
708 */
709 toString(): string;
710
711 /**
712 * Applies the given function to the wrapped string token.
713 *
714 * @example
715 * token.update(function (str, metadata) {
716 * return str.toUpperCase()
717 * })
718 *
719 * @param fn - A function to apply to the token string.
720 */
721 update(fn: Token.UpdateFunction): Token;
722
723 /**
724 * Creates a clone of this token. Optionally a function can be
725 * applied to the cloned token.
726 *
727 * @param fn - An optional function to apply to the cloned token.
728 */
729 clone(fn?: Token.UpdateFunction): Token;
730 }
731
732 /**
733 * A token set is used to store the unique list of all tokens
734 * within an index. Token sets are also used to represent an
735 * incoming query to the index, this query token set and index
736 * token set are then intersected to find which tokens to look
737 * up in the inverted index.
738 *
739 * A token set can hold multiple tokens, as in the case of the
740 * index token set, or it can hold a single token as in the
741 * case of a simple query token set.
742 *
743 * Additionally token sets are used to perform wildcard matching.
744 * Leading, contained and trailing wildcards are supported, and
745 * from this edit distance matching can also be provided.
746 *
747 * Token sets are implemented as a minimal finite state automata,
748 * where both common prefixes and suffixes are shared between tokens.
749 * This helps to reduce the space used for storing the token set.
750 */
751 class TokenSet {
752 constructor();
753
754 /**
755 * Creates a TokenSet instance from the given sorted array of words.
756 *
757 * @param arr - A sorted array of strings to create the set from.
758 * @throws Will throw an error if the input array is not sorted.
759 */
760 fromArray(arr: string[]): TokenSet;
761
762 /**
763 * Creates a token set representing a single string with a specified
764 * edit distance.
765 *
766 * Insertions, deletions, substitutions and transpositions are each
767 * treated as an edit distance of 1.
768 *
769 * Increasing the allowed edit distance will have a dramatic impact
770 * on the performance of both creating and intersecting these TokenSets.
771 * It is advised to keep the edit distance less than 3.
772 *
773 * @param str - The string to create the token set from.
774 * @param editDistance - The allowed edit distance to match.
775 */
776 fromFuzzyString(str: string, editDistance: number): Vector;
777
778 /**
779 * Creates a TokenSet from a string.
780 *
781 * The string may contain one or more wildcard characters (*)
782 * that will allow wildcard matching when intersecting with
783 * another TokenSet.
784 *
785 * @param str - The string to create a TokenSet from.
786 */
787 fromString(str: string): TokenSet;
788
789 /**
790 * Converts this TokenSet into an array of strings
791 * contained within the TokenSet.
792 */
793 toArray(): string[];
794
795 /**
796 * Generates a string representation of a TokenSet.
797 *
798 * This is intended to allow TokenSets to be used as keys
799 * in objects, largely to aid the construction and minimisation
800 * of a TokenSet. As such it is not designed to be a human
801 * friendly representation of the TokenSet.
802 */
803 toString(): string;
804
805 /**
806 * Returns a new TokenSet that is the intersection of
807 * this TokenSet and the passed TokenSet.
808 *
809 * This intersection will take into account any wildcards
810 * contained within the TokenSet.
811 *
812 * @param b - An other TokenSet to intersect with.
813 */
814 intersect(b: TokenSet): TokenSet;
815 }
816
817 namespace tokenizer {
818 /**
819 * The separator used to split a string into tokens. Override this property to change the behaviour of
820 * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
821 *
822 * @see lunr.tokenizer
823 */
824 let separator: RegExp;
825 }
826
827 /**
828 * A function for splitting a string into tokens ready to be inserted into
829 * the search index. Uses `lunr.tokenizer.separator` to split strings, change
830 * the value of this property to change how strings are split into tokens.
831 *
832 * This tokenizer will convert its parameter to a string by calling `toString` and
833 * then will split this string on the character in `lunr.tokenizer.separator`.
834 * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
835 *
836 * @param obj - The object to convert into tokens
837 */
838 function tokenizer(obj?: null | string | object | object[]): Token[];
839
840 /**
841 * lunr.trimmer is a pipeline function for trimming non word
842 * characters from the beginning and end of tokens before they
843 * enter the index.
844 *
845 * This implementation may not work correctly for non latin
846 * characters and should either be removed or adapted for use
847 * with languages with non-latin characters.
848 *
849 * Implements {lunr.PipelineFunction}
850 *
851 * @param token The token to pass through the filter
852 * @see lunr.Pipeline
853 */
854 function trimmer(token: Token): Token;
855
856 /**
857 * A namespace containing utils for the rest of the lunr library
858 */
859 namespace utils {
860 /**
861 * Print a warning message to the console.
862 *
863 * @param message The message to be printed.
864 */
865 function warn(message: string): void;
866
867 /**
868 * Convert an object to a string.
869 *
870 * In the case of `null` and `undefined` the function returns
871 * the empty string, in all other cases the result of calling
872 * `toString` on the passed object is returned.
873 *
874 * @param obj The object to convert to a string.
875 * @return string representation of the passed object.
876 */
877 function asString(obj: any): string;
878 }
879
880 /**
881 * A vector is used to construct the vector space of documents and queries. These
882 * vectors support operations to determine the similarity between two documents or
883 * a document and a query.
884 *
885 * Normally no parameters are required for initializing a vector, but in the case of
886 * loading a previously dumped vector the raw elements can be provided to the constructor.
887 *
888 * For performance reasons vectors are implemented with a flat array, where an elements
889 * index is immediately followed by its value. E.g. [index, value, index, value]. This
890 * allows the underlying array to be as sparse as possible and still offer decent
891 * performance when being used for vector calculations.
892 */
893 class Vector {
894 /**
895 * @param [elements] - The flat list of element index and element value pairs.
896 */
897 constructor(elements: number[]);
898
899 /**
900 * Calculates the position within the vector to insert a given index.
901 *
902 * This is used internally by insert and upsert. If there are duplicate indexes then
903 * the position is returned as if the value for that index were to be updated, but it
904 * is the callers responsibility to check whether there is a duplicate at that index
905 *
906 * @param insertIdx - The index at which the element should be inserted.
907 */
908 positionForIndex(index: number): number;
909
910 /**
911 * Inserts an element at an index within the vector.
912 *
913 * Does not allow duplicates, will throw an error if there is already an entry
914 * for this index.
915 *
916 * @param insertIdx - The index at which the element should be inserted.
917 * @param val - The value to be inserted into the vector.
918 */
919 insert(insertIdx: number, val: number): void;
920
921 /**
922 * Inserts or updates an existing index within the vector.
923 *
924 * @param insertIdx - The index at which the element should be inserted.
925 * @param val - The value to be inserted into the vector.
926 * @param fn - A function that is called for updates, the existing value and the
927 * requested value are passed as arguments
928 */
929 upsert(
930 insertIdx: number,
931 val: number,
932 fn: (existingVal: number, val: number) => number,
933 ): void;
934
935 /**
936 * Calculates the magnitude of this vector.
937 */
938 magnitude(): number;
939
940 /**
941 * Calculates the dot product of this vector and another vector.
942 *
943 * @param otherVector - The vector to compute the dot product with.
944 */
945 dot(otherVector: Vector): number;
946
947 /**
948 * Calculates the cosine similarity between this vector and another
949 * vector.
950 *
951 * @param otherVector - The other vector to calculate the
952 * similarity with.
953 */
954 similarity(otherVector: Vector): number;
955
956 /**
957 * Converts the vector to an array of the elements within the vector.
958 */
959 toArray(): number[];
960
961 /**
962 * A JSON serializable representation of the vector.
963 */
964 toJSON(): number[];
965 }
966
967 const version: string;
968 type ConfigFunction = (this: Builder, builder: Builder) => void;
969}
970
971/**
972 * Convenience function for instantiating a new lunr index and configuring it with the default
973 * pipeline functions and the passed config function.
974 *
975 * When using this convenience function a new index will be created with the following functions
976 * already in the pipeline:
977 *
978 * * lunr.StopWordFilter - filters out any stop words before they enter the index
979 *
980 * * lunr.stemmer - stems the tokens before entering the index.
981 *
982 * Example:
983 *
984 * ```javascript
985 * var idx = lunr(function () {
986 * this.field('title', 10);
987 * this.field('tags', 100);
988 * this.field('body');
989 *
990 * this.ref('cid');
991 *
992 * this.pipeline.add(function () {
993 * // some custom pipeline function
994 * });
995 * });
996 * ```
997 */
998declare function lunr(config: lunr.ConfigFunction): lunr.Index;