1 | export as namespace nlp
|
2 |
|
3 | declare interface nlp<D extends object, W extends object> {
|
4 |
|
5 | (text: string): nlp.ExtendedDocument<D, W>
|
6 |
|
7 | tokenize(text: string): nlp.ExtendedDocument<D, W>
|
8 |
|
9 | extend<P>(
|
10 | plugin: P
|
11 | ): nlp<
|
12 | P extends nlp.Plugin<infer PD, infer PW> ? { [k in keyof (PD & D)]: (PD & D)[k] } : { [k in keyof D]: D[k] },
|
13 | P extends nlp.Plugin<infer PD, infer PW> ? { [k in keyof (PW & W)]: (PW & W)[k] } : { [k in keyof W]: W[k] }
|
14 | >
|
15 |
|
16 |
|
17 | load(json: any): nlp.ExtendedDocument<D, W>
|
18 |
|
19 | verbose(bool: boolean): nlp.ExtendedDocument<D, W>
|
20 |
|
21 | version: nlp.ExtendedDocument<D, W>
|
22 | }
|
23 |
|
24 | declare function nlp(text: string): nlp.DefaultDocument
|
25 | declare function nlp<D extends object, W extends object>(text: string): nlp.ExtendedDocument<D, W>
|
26 |
|
27 | // possible values to .json()
|
28 | declare interface JsonOptions {
|
29 | /** a perfect copy of the input text */
|
30 | text?: boolean
|
31 | /** normalized whitespace, case, unicode, punctuation */
|
32 | normal?: boolean
|
33 | /** lowercase, trimmed, contractions expanded. */
|
34 | reduced?: boolean
|
35 | /** cleanup whitespace */
|
36 | trim?: boolean
|
37 | /** character-position where this begins */
|
38 | offset?: boolean
|
39 | /** frequency of this match in the document */
|
40 | count?: boolean
|
41 | /** remove duplicate results*/
|
42 | unique?: boolean
|
43 | /** starting term # in document */
|
44 | index?: boolean
|
45 | /** options for each term */
|
46 | terms?: {
|
47 | text?: boolean
|
48 | normal?: boolean
|
49 | clean?: boolean
|
50 | implicit?: boolean
|
51 | tags?: boolean
|
52 | whitespace?: boolean
|
53 | id?: boolean
|
54 | offset?: boolean
|
55 | bestTag?: boolean
|
56 | }
|
57 | }
|
58 |
|
59 |
|
60 | declare module nlp {
|
61 | export function tokenize(text: string): DefaultDocument
|
62 | /** mix in a compromise-plugin */
|
63 | export function extend<P>(
|
64 | plugin: P
|
65 | ): nlp<P extends Plugin<infer D, infer W> ? D : {}, P extends Plugin<infer D, infer W> ? W : {}>
|
66 |
|
67 | export function load(json: any): DefaultDocument
|
68 | /** log our decision-making for debugging */
|
69 | export function verbose(bool: boolean): DefaultDocument
|
70 | /** current semver version of the library */
|
71 | export const version: number
|
72 |
|
73 | type Plugin<D extends object, W extends object> = (
|
74 | Doc: Document<World & W> & D & { prototype: D },
|
75 | world: World & W
|
76 | ) => void
|
77 |
|
78 | type ExtendedWorld<W extends object> = nlp.World & W
|
79 | type ExtendedDocument<D extends object, W extends object> = {
|
80 | [k in keyof (nlp.Document<ExtendedWorld<W>> & D)]: (nlp.Document<ExtendedWorld<W>> & D)[k]
|
81 | }
|
82 | type DefaultDocument = {
|
83 | [k in keyof nlp.Document]: nlp.Document[k]
|
84 | }
|
85 |
|
86 | class Document<W extends World = World> {
|
87 |
|
88 |
|
89 | all(): Document<W>
|
90 |
|
91 | found: boolean
|
92 |
|
93 | parent(): Document<W>
|
94 |
|
95 | parents(): Document<W>[]
|
96 |
|
97 | tagger(): Document<W>
|
98 |
|
99 | wordCount(): number
|
100 |
|
101 | length(): number
|
102 |
|
103 | clone(shallow?: boolean): Document<W>
|
104 |
|
105 | cache(options?: object): Document<W>
|
106 |
|
107 | uncache(options?: object): Document<W>
|
108 |
|
109 | world: W
|
110 |
|
111 |
|
112 |
|
113 | first(n?: number): Document<W>
|
114 |
|
115 | last(n?: number): Document<W>
|
116 |
|
117 | slice(start: number, end?: number): Document<W>
|
118 |
|
119 | eq(n: number): Document<W>
|
120 |
|
121 | firstTerm(): Document<W>
|
122 |
|
123 | lastTerm(): Document<W>
|
124 |
|
125 | termList(): any
|
126 |
|
127 |
|
128 |
|
129 | match(match: string | Document<W>): Document<W>
|
130 |
|
131 | not(match: string | Document<W>): Document<W>
|
132 |
|
133 | matchOne(match: string | Document<W>): Document<W>
|
134 |
|
135 | if(match: string | Document<W>): Document<W>
|
136 |
|
137 | ifNo(match: string | Document<W>): Document<W>
|
138 |
|
139 | has(match: string | Document<W>): boolean
|
140 |
|
141 | lookBehind(match: string | Document<W>): Document<W>
|
142 |
|
143 | lookAhead(match: string | Document<W>): Document<W>
|
144 |
|
145 | before(match: string | Document<W>): Document<W>
|
146 |
|
147 | after(match: string | Document<W>): Document<W>
|
148 |
|
149 | lookup(matches: string[]): Document<W>
|
150 |
|
151 |
|
152 |
|
153 | toLowerCase(): Document<W>
|
154 |
|
155 | toUpperCase(): Document<W>
|
156 |
|
157 | toTitleCase(): Document<W>
|
158 |
|
159 | toCamelCase(): Document<W>
|
160 |
|
161 |
|
162 |
|
163 | pre(str: string, concat: boolean): Document<W>
|
164 |
|
165 | post(str: string, concat: boolean): Document<W>
|
166 |
|
167 | trim(): Document<W>
|
168 |
|
169 | hyphenate(): Document<W>
|
170 |
|
171 | dehyphenate(): Document<W>
|
172 |
|
173 |
|
174 |
|
175 | tag(tag: string, reason?: string): Document<W>
|
176 |
|
177 | tagSafe(tag: string, reason?: string): Document<W>
|
178 |
|
179 | unTag(tag: string, reason?: string): Document<W>
|
180 |
|
181 | canBe(tag: string): Document<W>
|
182 |
|
183 |
|
184 |
|
185 | map(fn: Function): Document<W> | []
|
186 |
|
187 | forEach(fn: Function): Document<W>
|
188 |
|
189 | filter(fn: Function): Document<W>
|
190 |
|
191 | find(fn: Function): Document<W> | undefined
|
192 |
|
193 | some(fn: Function): Document<W>
|
194 |
|
195 | random(n?: number): Document<W>
|
196 |
|
197 |
|
198 |
|
199 | replaceWith(text: string | Function, keepTags?: boolean | object, keepCase?: boolean): Document<W>
|
200 |
|
201 | replace(match: string, text?: string | Function, keepTags?: boolean | object, keepCase?: boolean): Document<W>
|
202 |
|
203 | delete(match: string): Document<W>
|
204 |
|
205 | append(text: string): Document<W>
|
206 |
|
207 | prepend(text: string): Document<W>
|
208 |
|
209 | concat(text: string): Document<W>
|
210 |
|
211 |
|
212 |
|
213 | sort(method?: string | Function): Document<W>
|
214 |
|
215 | reverse(): Document<W>
|
216 |
|
217 | normalize(options?: string | object): string
|
218 |
|
219 | unique(): Document<W>
|
220 |
|
221 | split(match?: string): Document<W>
|
222 |
|
223 | splitBefore(match?: string): Document<W>
|
224 |
|
225 | splitAfter(match?: string): Document<W>
|
226 |
|
227 | segment(regs: object, options?: object): Document<W>
|
228 |
|
229 | join(str?: string): Document<W>
|
230 |
|
231 |
|
232 |
|
233 | text(options?: string | object): string
|
234 |
|
235 | json(options?: JsonOptions | string): any
|
236 |
|
237 | out(format?: 'text' | 'normal' | 'offset' | 'terms'): string
|
238 | out(format: 'array'): string[]
|
239 | out(format: 'tags' | 'terms'): Array<{ normal: string; text: string; tags: string[] }>
|
240 | out(format: 'json'): Array<{ normal: string; text: string; tags: () => void }>[]
|
241 | out(format: 'debug'): Text
|
242 | out(format: 'topk'): Array<{ normal: string; count: number; percent: number }>
|
243 |
|
244 | debug(): Document<W>
|
245 |
|
246 | export(): any
|
247 |
|
248 |
|
249 |
|
250 | terms(n?: number): Document<W>
|
251 |
|
252 | clauses(n?: number): Document<W>
|
253 |
|
254 | hyphenated(n?: number): Document<W>
|
255 |
|
256 | toQuoations(start?: string, end?: string): Document<W>
|
257 |
|
258 | toParentheses(start?: string, end?: string): Document<W>
|
259 |
|
260 | phoneNumbers(n?: number): Document<W>
|
261 |
|
262 | hashTags(n?: number): Document<W>
|
263 |
|
264 | emails(n?: number): Document<W>
|
265 |
|
266 | emoticons(n?: number): Document<W>
|
267 |
|
268 | emoji(n?: number): Document<W>
|
269 |
|
270 | atMentions(n?: number): Document<W>
|
271 |
|
272 | urls(n?: number): Document<W>
|
273 |
|
274 | adverbs(n?: number): Document<W>
|
275 |
|
276 | pronouns(n?: number): Document<W>
|
277 |
|
278 | conjunctions(n?: number): Document<W>
|
279 |
|
280 | prepositions(n?: number): Document<W>
|
281 |
|
282 | people(n?: number): Document<W>
|
283 |
|
284 | places(n?: number): Document<W>
|
285 |
|
286 | organizations(n?: number): Document<W>
|
287 |
|
288 | topics(n?: number): Document<W>
|
289 |
|
290 |
|
291 |
|
292 | sentences(): Document<W>
|
293 |
|
294 | abbreviations(n?: number): Abbreviations<W>
|
295 |
|
296 | contractions(n?: number): Contractions<W>
|
297 |
|
298 | contract(): Document<W>
|
299 |
|
300 | parentheses(n?: number): Parentheses<W>
|
301 |
|
302 | possessives(n?: number): Possessives<W>
|
303 |
|
304 | quotations(n?: number): Quotations<W>
|
305 |
|
306 | acronyms(n?: number): Acronyms<W>
|
307 |
|
308 | lists(n?: number): Lists<W>
|
309 |
|
310 | nouns(n?: number): Nouns<W>
|
311 |
|
312 | verbs(n?: number): Verbs<W>
|
313 | }
|
314 |
|
315 |
|
316 | interface Nouns<W extends World = World> extends ExtendedDocument<{}, W> {
|
317 |
|
318 | adjectives(): Document<W>
|
319 |
|
320 | isPlural(): Document<W>
|
321 |
|
322 | hasPlural(): Document<W>
|
323 |
|
324 | toPlural(setArticle?: boolean): Document<W>
|
325 |
|
326 | toSingular(setArticle?: boolean): Document<W>
|
327 |
|
328 | toPossessive(): Document<W>
|
329 | }
|
330 |
|
331 |
|
332 | interface Verbs<W extends World = World> extends Document<W> {
|
333 |
|
334 | adverbs(): Document<W>
|
335 |
|
336 | isPlural(): Document<W>
|
337 |
|
338 | isSingular(): Document<W>
|
339 |
|
340 | conjugate(): Document<W>
|
341 |
|
342 | toPastTense(): Document<W>
|
343 |
|
344 | toPresentTense(): Document<W>
|
345 |
|
346 | toFutureTense(): Document<W>
|
347 |
|
348 | toInfinitive(): Document<W>
|
349 |
|
350 | toGerund(): Document<W>
|
351 |
|
352 | isNegative(): Document<W>
|
353 |
|
354 | isPositive(): Document<W>
|
355 |
|
356 | toNegative(): Document<W>
|
357 |
|
358 | toPositive(): Document<W>
|
359 | }
|
360 |
|
361 | interface Abbreviations<W extends World = World> extends Document<W> {
|
362 |
|
363 | stripPeriods(): Document<W>
|
364 |
|
365 | addPeriods(): Document<W>
|
366 | }
|
367 |
|
368 | interface Acronyms<W extends World = World> extends Document<W> {
|
369 |
|
370 | stripPeriods(): Document<W>
|
371 |
|
372 | addPeriods(): Document<W>
|
373 | }
|
374 |
|
375 | interface Contractions<W extends World = World> extends Document<W> {
|
376 |
|
377 | expand(): Document<W>
|
378 | }
|
379 |
|
380 | interface Parentheses<W extends World = World> extends Document<W> {
|
381 |
|
382 | unwrap(): Document<W>
|
383 | }
|
384 |
|
385 | interface Possessives<W extends World = World> extends Document<W> {
|
386 |
|
387 | strip(): Document<W>
|
388 | }
|
389 |
|
390 | interface Quotations<W extends World = World> extends Document<W> {
|
391 |
|
392 | unwrap(): Document<W>
|
393 | }
|
394 |
|
395 | interface Lists<W extends World = World> extends Document<W> {
|
396 |
|
397 | conjunctions(): Document<W>
|
398 |
|
399 | parts(): Document<W>
|
400 |
|
401 | items(): Document<W>
|
402 |
|
403 | add(): Document<W>
|
404 |
|
405 | remove(): Document<W>
|
406 |
|
407 | hasOxfordComma(): Document<W>
|
408 | }
|
409 |
|
410 | class World {}
|
411 | }
|
412 |
|
413 | export default nlp
|