UNPKG

26.3 kBTypeScriptView Raw
1export as namespace nlp
2
3// a key-value object of words, terms
4declare interface Lexicon {
5 [key: string]: string
6}
7// documents indexed by a string
8declare interface DocIndex<
9 Ext extends object = {},
10 W extends nlp.World = nlp.World,
11 Ph extends nlp.Phrase = nlp.Phrase
12> {
13 [key: string]: nlp.ExtendedDocument<Ext, W, Ph>
14}
15
16declare interface nlp<D extends object, W extends object, Ph extends Object> {
17 /** normal usage */
18 (text?: string, lexicon?: Lexicon): nlp.ExtendedDocument<D, W, Ph>
19 /** tozenize string */
20 tokenize(text: string, lexicon?: Lexicon): nlp.ExtendedDocument<D, W, Ph>
21 /** mix in a compromise-plugin */
22 extend<P>(
23 plugin: P
24 ): nlp<
25 P extends nlp.Plugin<infer PD, infer PW, infer PPh>
26 ? { [k in keyof (PD & D)]: (PD & D)[k] }
27 : { [k in keyof D]: D[k] },
28 P extends nlp.Plugin<infer PD, infer PW, infer PPh>
29 ? { [k in keyof (PW & W)]: (PW & W)[k] }
30 : { [k in keyof W]: W[k] },
31 P extends nlp.Plugin<infer PD, infer PW, infer PPh>
32 ? { [k in keyof (PPh & Ph)]: (PPh & Ph)[k] }
33 : { [k in keyof Ph]: Ph[k] }
34 >
35
36 /** re-generate a Doc object from .json() results */
37 fromJSON(json: any): nlp.ExtendedDocument<D, W, Ph>
38 /** log our decision-making for debugging */
39 verbose(bool?: boolean): nlp.ExtendedDocument<D, W, Ph>
40 /** create instance using global world*/
41 clone(): nlp<D, W, Ph>
42 /** current semver version of the library */
43 version: nlp.ExtendedDocument<D, W, Ph>
44 /** grab the document's context data */
45 world(): W
46 /** pre-parse a match statement, for faster lookups*/
47 parseMatch(str: string, options?:object): nlp<D, W, Ph>
48}
49
50declare function nlp(text?: string, lexicon?: Lexicon): nlp.DefaultDocument
51declare function nlp<D extends object = {}, W extends object = {}, Ph extends object = {}>(
52 text?: string
53): nlp.ExtendedDocument<D, W, Ph>
54
55// possible values to .json()
56declare interface JsonOptions {
57 /** a perfect copy of the input text */
58 text?: boolean
59 /** normalized whitespace, case, unicode, punctuation */
60 normal?: boolean
61 /** lowercase, trimmed, contractions expanded. */
62 reduced?: boolean
63 /** cleanup whitespace */
64 trim?: boolean
65 /** character-position where this begins */
66 offset?: boolean
67 /** frequency of this match in the document */
68 count?: boolean
69 /** remove duplicate results*/
70 unique?: boolean
71 /** starting term # in document */
72 index?: boolean
73 /** options for each term */
74 terms?: {
75 text?: boolean
76 normal?: boolean
77 clean?: boolean
78 implicit?: boolean
79 tags?: boolean
80 whitespace?: boolean
81 id?: boolean
82 offset?: boolean
83 bestTag?: boolean
84 }
85}
86
87// Cleaner plugin types
88type PluginWorld<D extends object, W extends object, Ph extends object> = {
89 // Override post process type
90 postProcess(process: (Doc: nlp.ExtendedDocument<D, W, Ph>) => void): nlp.ExtendedWorld<W>
91} & nlp.ExtendedWorld<W>
92
93type PluginDocument<D extends object, W extends object, Ph extends object> = nlp.ExtendedDocument<D, W, Ph> & {
94 prototype: nlp.ExtendedDocument<D, W, Ph>
95}
96
97type PluginPhrase<Ph extends object> = nlp.ExtendedPhrase<Ph> & { prototype: nlp.ExtendedPhrase<Ph> }
98type PluginTerm = nlp.Term & PluginConstructor
99type PluginPool = nlp.Pool & PluginConstructor
100
101// Make these available, full support tbd
102type PluginConstructor = {
103 prototype: Record<string, any>
104}
105
106// Constructor
107declare module nlp {
108 export function tokenize(text?: string, lexicon?: Lexicon): DefaultDocument
109 /** mix in a compromise-plugin */
110 export function extend<P>(
111 plugin: P
112 ): nlp<
113 P extends Plugin<infer D, infer W, infer Ph> ? D : {},
114 P extends Plugin<infer D, infer W, infer Ph> ? W : {},
115 P extends Plugin<infer D, infer W, infer Ph> ? Ph : {}
116 >
117 /** re-generate a Doc object from .json() results */
118 export function fromJSON(json: any): DefaultDocument
119 /** log our decision-making for debugging */
120 export function verbose(bool?: boolean): DefaultDocument
121 /** create instance using global world */
122 export function clone(): nlp<{}, {}, {}>
123 /** current semver version of the library */
124 export const version: number
125
126 type Plugin<D extends object = {}, W extends object = {}, Ph extends object = {}> = (
127 Doc: PluginDocument<D, W, Ph>,
128 world: PluginWorld<D, W, Ph>,
129 nlp: nlp<D, W, Ph>,
130 Phrase: PluginPhrase<Ph>,
131 Term: PluginTerm, // @todo Add extend support
132 Pool: PluginPool
133 ) => void
134
135 type ExtendedWorld<W extends object> = nlp.World & W
136 type ExtendedDocument<D extends object, W extends object, Ph extends object> = {
137 [k in keyof (nlp.Document<D, ExtendedWorld<W>, ExtendedPhrase<Ph>> & D)]: (nlp.Document<
138 D,
139 ExtendedWorld<W>,
140 ExtendedPhrase<Ph>
141 > &
142 D)[k]
143 }
144 type ExtendedPhrase<Ph extends object> = nlp.Phrase & Ph
145 type DefaultDocument = {
146 [k in keyof nlp.Document]: nlp.Document[k]
147 }
148
149 class Document<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase> {
150 // Utils
151 /** return the whole original document ('zoom out') */
152 all(): ExtendedDocument<Ext, W, Ph>
153 /** is this document empty? */
154 found: boolean
155 /** return the previous result */
156 parent(): ExtendedDocument<Ext, W, Ph>
157 /** return all of the previous results */
158 parents(): ExtendedDocument<Ext, W, Ph>[]
159 /** (re)run the part-of-speech tagger on this document */
160 tagger(): ExtendedDocument<Ext, W, Ph>
161 /** count the # of terms in each match */
162 wordCount(): number
163 /** count the # of characters of each match */
164 length(): number
165 /** deep-copy the document, so that no references remain */
166 clone(shallow?: boolean): ExtendedDocument<Ext, W, Ph>
167 /** freeze the current state of the document, for speed-purposes */
168 cache(options?: object): ExtendedDocument<Ext, W, Ph>
169 /** un-freezes the current state of the document, so it may be transformed */
170 uncache(options?: object): ExtendedDocument<Ext, W, Ph>
171 /** the current world */
172 world: W
173
174 // Accessors
175 /** use only the first result(s) */
176 first(n?: number): ExtendedDocument<Ext, W, Ph>
177 /** use only the last result(s) */
178 last(n?: number): ExtendedDocument<Ext, W, Ph>
179 /** grab a subset of the results */
180 slice(start: number, end?: number): ExtendedDocument<Ext, W, Ph>
181 /** use only the nth result */
182 eq(n: number): ExtendedDocument<Ext, W, Ph>
183 /** get the first word in each match */
184 firstTerms(): ExtendedDocument<Ext, W, Ph>
185 /** get the end word in each match */
186 lastTerms(): ExtendedDocument<Ext, W, Ph>
187 /** return a flat list of all Term objects in match */
188 termList(): Term[]
189 /** grab a specific named capture group */
190 groups(name: string): ExtendedDocument<Ext, W, Ph>
191 /** grab all named capture groups */
192 groups(): DocIndex<Ext, W, Ph>
193 /** Access Phrase list */
194 list: Ph[]
195 /** Access pool */
196 pool(): Pool
197
198 // Match
199 /** return matching patterns in this doc */
200 match(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
201 /** return a named group in a match */
202 match(match: string | ExtendedDocument<Ext, W, Ph>, group:string|number): ExtendedDocument<Ext, W, Ph>
203 /** return all results except for this */
204 not(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
205 /** return only the first match */
206 matchOne(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
207 /** return each current phrase, only if it contains this match */
208 if(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
209 /** Filter-out any current phrases that have this match */
210 ifNo(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
211 /** Return a boolean if this match exists */
212 has(match: string | ExtendedDocument<Ext, W, Ph>, options:any): boolean
213 /** search through earlier terms, in the sentence */
214 lookBehind(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
215 /** search through following terms, in the sentence */
216 lookAhead(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
217 /** return the terms before each match */
218 before(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
219 /** return the terms after each match */
220 after(match: string | ExtendedDocument<Ext, W, Ph>, options:any): ExtendedDocument<Ext, W, Ph>
221 /** quick find for an array of string matches */
222 lookup(matches: string[]): ExtendedDocument<Ext, W, Ph>
223 /** quick find for an object of key-value matches */
224 lookup(matches: Lexicon): DocIndex<W>
225
226 // Case
227 /** turn every letter of every term to lower-cse */
228 toLowerCase(): ExtendedDocument<Ext, W, Ph>
229 /** turn every letter of every term to upper case */
230 toUpperCase(): ExtendedDocument<Ext, W, Ph>
231 /** upper-case the first letter of each term */
232 toTitleCase(): ExtendedDocument<Ext, W, Ph>
233 /** remove whitespace and title-case each term */
234 toCamelCase(): ExtendedDocument<Ext, W, Ph>
235
236 // Whitespace
237 /** add this punctuation or whitespace before each match */
238 pre(str: string, concat: boolean): ExtendedDocument<Ext, W, Ph>
239 /** add this punctuation or whitespace after each match */
240 post(str: string, concat: boolean): ExtendedDocument<Ext, W, Ph>
241 /** remove start and end whitespace */
242 trim(): ExtendedDocument<Ext, W, Ph>
243 /** connect words with hyphen, and remove whitespace */
244 hyphenate(): ExtendedDocument<Ext, W, Ph>
245 /** remove hyphens between words, and set whitespace */
246 dehyphenate(): ExtendedDocument<Ext, W, Ph>
247
248 // Tag
249 /** Give all terms the given tag */
250 tag(tag: string, reason?: string): ExtendedDocument<Ext, W, Ph>
251 /** Only apply tag to terms if it is consistent with current tags */
252 tagSafe(tag: string, reason?: string): ExtendedDocument<Ext, W, Ph>
253 /** Remove this term from the given terms */
254 unTag(tag: string, reason?: string): ExtendedDocument<Ext, W, Ph>
255 /** return only the terms that can be this tag */
256 canBe(tag: string): ExtendedDocument<Ext, W, Ph>
257
258 // Loops
259 /** run each phrase through a function, and create a new document */
260 map(fn: (p: ExtendedPhrase<Ph>) => void): ExtendedDocument<Ext, W, Ph> | []
261 /** run a function on each phrase, as an individual document */
262 forEach(fn: (doc: ExtendedDocument<Ext, W, Ph>) => void): ExtendedDocument<Ext, W, Ph>
263 /** return only the phrases that return true */
264 filter(fn: (p: ExtendedPhrase<Ph>) => boolean): ExtendedDocument<Ext, W, Ph>
265 /** return a document with only the first phrase that matches */
266 find(fn: (p: ExtendedPhrase<Ph>) => boolean): ExtendedDocument<Ext, W, Ph> | undefined
267 /** return true or false if there is one matching phrase */
268 some(fn: (p: ExtendedPhrase<Ph>) => boolean): ExtendedDocument<Ext, W, Ph>
269 /** sample a subset of the results */
270 random(n?: number): ExtendedDocument<Ext, W, Ph>
271
272 // Insert
273 /** substitute-in new content */
274 replaceWith(text: string | Function, keepTags?: boolean | object, keepCase?: boolean): ExtendedDocument<Ext, W, Ph>
275 /** search and replace match with new content */
276 replace(
277 match: string,
278 text?: string | Function,
279 keepTags?: boolean | object,
280 keepCase?: boolean
281 ): ExtendedDocument<Ext, W, Ph>
282 /** fully remove these terms from the document */
283 delete(match: string): ExtendedDocument<Ext, W, Ph>
284 /** add these new terms to the end (insertAfter) */
285 append(text: string): ExtendedDocument<Ext, W, Ph>
286 /** add these new terms to the front (insertBefore) */
287 prepend(text: string): ExtendedDocument<Ext, W, Ph>
288 /** add these new things to the end */
289 concat(text: string): ExtendedDocument<Ext, W, Ph>
290
291 // transform
292 /**re-arrange the order of the matches (in place) */
293 sort(method?: string | Function): ExtendedDocument<Ext, W, Ph>
294 /**reverse the order of the matches, but not the words */
295 reverse(): ExtendedDocument<Ext, W, Ph>
296 /** clean-up the document, in various ways */
297 normalize(options?: string | object): ExtendedDocument<Ext, W, Ph>
298 /** remove any duplicate matches */
299 unique(): ExtendedDocument<Ext, W, Ph>
300 /** return a Document with three parts for every match ('splitOn') */
301 split(match?: string): ExtendedDocument<Ext, W, Ph>
302 /** separate everything after the match as a new phrase */
303 splitBefore(match?: string): ExtendedDocument<Ext, W, Ph>
304 /** separate everything before the word, as a new phrase */
305 splitAfter(match?: string): ExtendedDocument<Ext, W, Ph>
306 /** split a document into labeled sections */
307 segment(regs: object, options?: object): ExtendedDocument<Ext, W, Ph>
308 /** make all phrases into one phrase */
309 join(str?: string): ExtendedDocument<Ext, W, Ph>
310
311 // Output
312 /** return the document as text */
313 text(options?: string | object): string
314 /** pull out desired metadata from the document */
315 json(options?: JsonOptions | string): any
316 /** some named output formats */
317 out(format?: 'text' | 'normal' | 'offset' | 'terms'): string
318 out(format: 'array'): string[]
319 out(format: 'tags' | 'terms'): Array<{ normal: string; text: string; tags: string[] }>
320 out(format: 'json'): Array<{ normal: string; text: string; tags: () => void }>[]
321 out(format: 'debug'): ExtendedDocument<Ext, W, Ph>
322 out(format: 'topk'): Array<{ normal: string; count: number; percent: number }>
323 /** pretty-print the current document and its tags */
324 debug(): ExtendedDocument<Ext, W, Ph>
325 /** store a parsed document for later use */
326 export(): any
327
328 // Selections
329 /** split-up results by each individual term */
330 terms(n?: number): ExtendedDocument<Ext, W, Ph>
331 /** split-up results into multi-term phrases */
332 clauses(n?: number): ExtendedDocument<Ext, W, Ph>
333 /** return all terms connected with a hyphen or dash like `'wash-out'`*/
334 hyphenated(n?: number): ExtendedDocument<Ext, W, Ph>
335 /** add quoation marks around each match */
336 toQuoations(start?: string, end?: string): ExtendedDocument<Ext, W, Ph>
337 /** add brackets around each match */
338 toParentheses(start?: string, end?: string): ExtendedDocument<Ext, W, Ph>
339 /** return things like `'(939) 555-0113'` */
340 phoneNumbers(n?: number): ExtendedDocument<Ext, W, Ph>
341 /** return things like `'#nlp'` */
342 hashTags(n?: number): ExtendedDocument<Ext, W, Ph>
343 /** return things like `'hi@compromise.cool'` */
344 emails(n?: number): ExtendedDocument<Ext, W, Ph>
345 /** return things like `:)` */
346 emoticons(n?: number): ExtendedDocument<Ext, W, Ph>
347 /** return athings like `💋` */
348 emoji(n?: number): ExtendedDocument<Ext, W, Ph>
349 /** return things like `'@nlp_compromise'`*/
350 atMentions(n?: number): ExtendedDocument<Ext, W, Ph>
351 /** return things like `'compromise.cool'` */
352 urls(n?: number): ExtendedDocument<Ext, W, Ph>
353 /** return things like `'quickly'` */
354 adverbs(n?: number): ExtendedDocument<Ext, W, Ph>
355 /** return things like `'he'` */
356 pronouns(n?: number): ExtendedDocument<Ext, W, Ph>
357 /** return things like `'but'`*/
358 conjunctions(n?: number): ExtendedDocument<Ext, W, Ph>
359 /** return things like `'of'`*/
360 prepositions(n?: number): ExtendedDocument<Ext, W, Ph>
361 /** return person names like `'John A. Smith'`*/
362 people(n?: number): ExtendedDocument<Ext, W, Ph>
363 /** return location names like `'Paris, France'`*/
364 places(n?: number): ExtendedDocument<Ext, W, Ph>
365 /** return companies and org names like `'Google Inc.'`*/
366 organizations(n?: number): ExtendedDocument<Ext, W, Ph>
367 /** return people, places, and organizations */
368 topics(n?: number): ExtendedDocument<Ext, W, Ph>
369
370 // Subsets
371 /** get the whole sentence for each match */
372 sentences(n?: number): ExtendedDocument<Ext, W, Ph>
373 /** return things like `'Mrs.'`*/
374 abbreviations(n?: number): Abbreviations<Ext, W, Ph>
375 /** return any multi-word terms, like "didn't" */
376 contractions(n?: number): Contractions<Ext, W, Ph>
377 /** contract words that can combine, like "did not" */
378 contract(): ExtendedDocument<Ext, W, Ph>
379 /** return anything inside (parentheses) */
380 parentheses(n?: number): Parentheses<Ext, W, Ph>
381 /** return things like "Spencer's" */
382 possessives(n?: number): Possessives<Ext, W, Ph>
383 /** return any terms inside 'quotation marks' */
384 quotations(n?: number): Quotations<Ext, W, Ph>
385 /** return things like `'FBI'` */
386 acronyms(n?: number): Acronyms<Ext, W, Ph>
387 /** return things like `'eats, shoots, and leaves'` */
388 lists(n?: number): Lists<Ext, W, Ph>
389 /** return any subsequent terms tagged as a Noun */
390 nouns(n?: number, opts?: object): Nouns<Ext, W, Ph>
391 /** return any subsequent terms tagged as a Verb */
392 verbs(n?: number): Verbs<Ext, W, Ph>
393 }
394
395 // Nouns class
396 interface Nouns<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
397 extends ExtendedDocument<{}, W, Ph> {
398 /** get any adjectives describing this noun*/
399 adjectives(): ExtendedDocument<Ext, W, Ph>
400 /** return only plural nouns */
401 isPlural(): ExtendedDocument<Ext, W, Ph>
402 /** return only nouns that _can be_ inflected as plural */
403 hasPlural(): ExtendedDocument<Ext, W, Ph>
404 /** 'football captain' → 'football captains' */
405 toPlural(setArticle?: boolean): ExtendedDocument<Ext, W, Ph>
406 /** 'turnovers' → 'turnover' */
407 toSingular(setArticle?: boolean): ExtendedDocument<Ext, W, Ph>
408 /** add a `'s` to the end, in a safe manner. */
409 toPossessive(): ExtendedDocument<Ext, W, Ph>
410 }
411
412 // Verbs class
413 interface Verbs<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
414 extends ExtendedDocument<{}, W, Ph> {
415 /** return the adverbs describing this verb */
416 adverbs(): ExtendedDocument<Ext, W, Ph>
417 /** return only plural nouns */
418 isPlural(): ExtendedDocument<Ext, W, Ph>
419 /** return only singular nouns */
420 isSingular(): ExtendedDocument<Ext, W, Ph>
421 /** return all forms of these verbs */
422 conjugate(): ExtendedDocument<Ext, W, Ph>
423 /** 'will go' → 'went' */
424 toPastTense(): ExtendedDocument<Ext, W, Ph>
425 /** 'walked' → 'walks' */
426 toPresentTense(): ExtendedDocument<Ext, W, Ph>
427 /** 'walked' → 'will walk' */
428 toFutureTense(): ExtendedDocument<Ext, W, Ph>
429 /** 'walks' → 'walk' */
430 toInfinitive(): ExtendedDocument<Ext, W, Ph>
431 /** 'walks' → 'walking' */
432 toGerund(): ExtendedDocument<Ext, W, Ph>
433 /** 'drive' → 'driven' if it exists, otherwise past-tense */
434 toParticiple(): ExtendedDocument<Ext, W, Ph>
435 /** return verbs with 'not' */
436 isNegative(): ExtendedDocument<Ext, W, Ph>
437 /** only verbs without 'not'*/
438 isPositive(): ExtendedDocument<Ext, W, Ph>
439 /** 'went' → 'did not go'*/
440 toNegative(): ExtendedDocument<Ext, W, Ph>
441 /** "didn't study" → 'studied' */
442 toPositive(): ExtendedDocument<Ext, W, Ph>
443 }
444
445 interface Abbreviations<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
446 extends ExtendedDocument<{}, W, Ph> {
447 /** */
448 stripPeriods(): ExtendedDocument<Ext, W, Ph>
449 /** */
450 addPeriods(): ExtendedDocument<Ext, W, Ph>
451 }
452
453 interface Acronyms<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
454 extends ExtendedDocument<{}, W, Ph> {
455 /** */
456 stripPeriods(): ExtendedDocument<Ext, W, Ph>
457 /** */
458 addPeriods(): ExtendedDocument<Ext, W, Ph>
459 }
460
461 interface Contractions<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
462 extends ExtendedDocument<{}, W, Ph> {
463 /** */
464 expand(): ExtendedDocument<Ext, W, Ph>
465 }
466
467 interface Parentheses<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
468 extends ExtendedDocument<{}, W, Ph> {
469 /** */
470 unwrap(): ExtendedDocument<Ext, W, Ph>
471 }
472
473 interface Possessives<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
474 extends ExtendedDocument<{}, W, Ph> {
475 /** */
476 strip(): ExtendedDocument<Ext, W, Ph>
477 }
478
479 interface Quotations<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
480 extends ExtendedDocument<{}, W, Ph> {
481 /** */
482 unwrap(): ExtendedDocument<Ext, W, Ph>
483 }
484
485 interface Lists<Ext extends object = {}, W extends World = World, Ph extends Phrase = Phrase>
486 extends ExtendedDocument<{}, W, Ph> {
487 /** */
488 conjunctions(): ExtendedDocument<Ext, W, Ph>
489 /** */
490 parts(): ExtendedDocument<Ext, W, Ph>
491 /** */
492 items(): ExtendedDocument<Ext, W, Ph>
493 /** */
494 add(): ExtendedDocument<Ext, W, Ph>
495 /** */
496 remove(): ExtendedDocument<Ext, W, Ph>
497 /** */
498 hasOxfordComma(): ExtendedDocument<Ext, W, Ph>
499 }
500
501 class World {
502 /** more logs for debugging */
503 verbose(on?: boolean): this
504 isVerbose(): boolean
505
506 /** get all terms in our lexicon with this tag */
507 getByTag(tag: string): Record<string, true>
508
509 /** put new words into our lexicon, properly */
510 addWords(words: Record<string, string>): void
511
512 /** extend the compromise tagset */
513 addTags(
514 tags: Record<
515 string,
516 {
517 isA?: string | string[]
518 notA?: string | string[]
519 }
520 >
521 ): void
522
523 /** call methods after tagger runs */
524 postProcess<D extends Document = Document>(process: (Doc: D) => void): this
525 }
526
527 class Pool {
528 /** throw a new term object in */
529 add(term: Term): this
530 /** find a term by it's id */
531 get(id: string): Term
532 /** find a term by it's id */
533 remove(id: string): void
534 /** merge with another pool */
535 merge(pool: Pool): this
536 /** size of pool */
537 stats(): number
538 }
539
540 class Cache {
541 terms: Term[]
542 words: any
543 tags: Record<string, true>
544 set: boolean
545 }
546
547 class Phrase {
548 isA: 'Phrase' // Get Type
549 start: string // id of start Term
550 length: number // number of terms in phrase
551 pool: Pool // global pool
552 cache: Cache // global cache
553
554 /** return a flat array of Term objects */
555 terms(): Term[]
556 }
557
558 // @todo
559 interface RegSyntax {
560 [index: string]: any
561 }
562
563 type TextOutOptions =
564 | 'reduced'
565 | 'root'
566 | 'implicit'
567 | 'normal'
568 | 'unicode'
569 | 'titlecase'
570 | 'lowercase'
571 | 'acronyms'
572 | 'whitespace'
573 | 'punctuation'
574 | 'abbreviations'
575
576 type JsonOutOptions = 'text' | 'normal' | 'tags' | 'clean' | 'id' | 'offset' | 'implicit' | 'whitespace' | 'bestTag'
577
578 class Term {
579 isA: 'Term' // Get Type
580 id: string
581
582 // main data
583 text: string
584 tags: Record<string, boolean>
585
586 // alternative forms of this.text
587 root: string | null
588 implicit: string | null
589 clean?: string
590 reduced?: string
591
592 // additional surrounding information
593 prev: string | null // id of prev term
594 next: string | null // id of next term
595 pre?: string // character before e.g. ' ' ','
596 post?: string // character after e.g. ' ' ','
597
598 // support alternative matches
599 alias?: string
600
601 constructor(text?: string)
602 set(text: string): this
603
604 /** clone contents to new term */
605 clone(): Term
606
607 /** convert all text to uppercase */
608 toUpperCase(): this
609
610 /** convert all text to lowercase */
611 toLowerCase(): this
612
613 /** only set the first letter to uppercase
614 * leave any existing uppercase alone
615 */
616 toTitleCase(): this
617
618 /** if all letters are uppercase */
619 isUpperCase(): this
620
621 /** if the first letter is uppercase, and the rest are lowercase */
622 isTitleCase(): this
623 titleCase(): this
624
625 /** search the term's 'post' punctuation */
626 hasPost(): boolean
627
628 /** search the term's 'pre' punctuation */
629 hasPre(): boolean
630
631 /** does it have a quotation symbol? */
632 hasQuote(): boolean
633 hasQuotation(): boolean
634
635 /** does it have a comma? */
636 hasComma(): boolean
637
638 /** does it end in a period? */
639 hasPeriod(): boolean
640
641 /** does it end in an exclamation */
642 hasExclamation(): boolean
643
644 /** does it end with a question mark? */
645 hasQuestionMark(): boolean
646
647 /** is there a ... at the end? */
648 hasEllipses(): boolean
649
650 /** is there a semicolon after this word? */
651 hasSemicolon(): boolean
652
653 /** is there a slash '/' in this word? */
654 hasSlash(): boolean
655
656 /** a hyphen connects two words like-this */
657 hasHyphen(): boolean
658
659 /** a dash separates words - like that */
660 hasDash(): boolean
661
662 /** is it multiple words combinded */
663 hasContraction(): boolean
664
665 /** try to sensibly put this punctuation mark into the term */
666 addPunctuation(punct: string): this
667
668 doesMatch(reg: RegSyntax, index: number, length: number): boolean
669
670 /** does this term look like an acronym? */
671 isAcronym(): boolean
672
673 /** is this term implied by a contraction? */
674 isImplicit(): boolean
675
676 /** does the term have at least one good tag? */
677 isKnown(): boolean
678
679 /** cache the root property of the term */
680 setRoot(world: World): void
681
682 /** return various text formats of this term */
683 textOut(options?: Record<TextOutOptions, boolean>, showPre?: boolean, showPost?: boolean): string
684
685 /** return various metadata for this term */
686 // @todo create output type from options...
687 json(options?: Record<JsonOutOptions, boolean>, world?: World): object
688
689 /** add a tag or tags, and their descendents to this term */
690 tag(tags: string | string[], reason?: string, world?: World): this
691
692 /** only tag this term if it's consistent with it's current tags */
693 tagSafe(tags: string | string[], reason?: string, world?: World): this
694
695 /** remove a tag or tags, and their descendents from this term */
696 unTag(tags: string | string[], reason?: string, world?: World): this
697
698 /** is this tag consistent with the word's current tags? */
699 canBe(tags: string | string[], world?: World): boolean
700 }
701}
702
703export default nlp
704
\No newline at end of file