export as namespace nlp // a key-value object of words, terms declare interface Lexicon { [key: string]: string } // documents indexed by a string declare interface DocIndex< Ext extends object = {}, W extends nlp.World = nlp.World, Ph extends nlp.Phrase = nlp.Phrase > { [key: string]: nlp.ExtendedDocument } declare interface nlp { /** normal usage */ (text?: string, lexicon?: Lexicon): nlp.ExtendedDocument /** tozenize string */ tokenize(text: string, lexicon?: Lexicon): nlp.ExtendedDocument /** mix in a compromise-plugin */ extend

( plugin: P ): nlp< P extends nlp.Plugin ? { [k in keyof (PD & D)]: (PD & D)[k] } : { [k in keyof D]: D[k] }, P extends nlp.Plugin ? { [k in keyof (PW & W)]: (PW & W)[k] } : { [k in keyof W]: W[k] }, P extends nlp.Plugin ? { [k in keyof (PPh & Ph)]: (PPh & Ph)[k] } : { [k in keyof Ph]: Ph[k] } > /** re-generate a Doc object from .json() results */ fromJSON(json: any): nlp.ExtendedDocument /** log our decision-making for debugging */ verbose(bool?: boolean): nlp.ExtendedDocument /** create instance using global world*/ clone(): nlp /** current semver version of the library */ version: nlp.ExtendedDocument /** grab the document's context data */ world(): W /** pre-parse a match statement, for faster lookups*/ parseMatch(str: string, options?:object): nlp } declare function nlp(text?: string, lexicon?: Lexicon): nlp.DefaultDocument declare function nlp( text?: string ): nlp.ExtendedDocument // possible values to .json() declare interface JsonOptions { /** a perfect copy of the input text */ text?: boolean /** normalized whitespace, case, unicode, punctuation */ normal?: boolean /** lowercase, trimmed, contractions expanded. */ reduced?: boolean /** cleanup whitespace */ trim?: boolean /** character-position where this begins */ offset?: boolean /** frequency of this match in the document */ count?: boolean /** remove duplicate results*/ unique?: boolean /** starting term # in document */ index?: boolean /** options for each term */ terms?: { text?: boolean normal?: boolean clean?: boolean implicit?: boolean tags?: boolean whitespace?: boolean id?: boolean offset?: boolean bestTag?: boolean } } // Cleaner plugin types type PluginWorld = { // Override post process type postProcess(process: (Doc: nlp.ExtendedDocument) => void): nlp.ExtendedWorld } & nlp.ExtendedWorld type PluginDocument = nlp.ExtendedDocument & { prototype: nlp.ExtendedDocument } type PluginPhrase = nlp.ExtendedPhrase & { prototype: nlp.ExtendedPhrase } type PluginTerm = nlp.Term & PluginConstructor type PluginPool = nlp.Pool & PluginConstructor // Make these available, full support tbd type PluginConstructor = { prototype: Record } // Constructor declare module nlp { export function tokenize(text?: string, lexicon?: Lexicon): DefaultDocument /** mix in a compromise-plugin */ export function extend

( plugin: P ): nlp< P extends Plugin ? D : {}, P extends Plugin ? W : {}, P extends Plugin ? Ph : {} > /** re-generate a Doc object from .json() results */ export function fromJSON(json: any): DefaultDocument /** log our decision-making for debugging */ export function verbose(bool?: boolean): DefaultDocument /** create instance using global world */ export function clone(): nlp<{}, {}, {}> /** current semver version of the library */ export const version: number type Plugin = ( Doc: PluginDocument, world: PluginWorld, nlp: nlp, Phrase: PluginPhrase, Term: PluginTerm, // @todo Add extend support Pool: PluginPool ) => void type ExtendedWorld = nlp.World & W type ExtendedDocument = { [k in keyof (nlp.Document, ExtendedPhrase> & D)]: (nlp.Document< D, ExtendedWorld, ExtendedPhrase > & D)[k] } type ExtendedPhrase = nlp.Phrase & Ph type DefaultDocument = { [k in keyof nlp.Document]: nlp.Document[k] } class Document { // Utils /** return the whole original document ('zoom out') */ all(): ExtendedDocument /** is this document empty? */ found: boolean /** return the previous result */ parent(): ExtendedDocument /** return all of the previous results */ parents(): ExtendedDocument[] /** (re)run the part-of-speech tagger on this document */ tagger(): ExtendedDocument /** count the # of terms in each match */ wordCount(): number /** count the # of characters of each match */ length(): number /** deep-copy the document, so that no references remain */ clone(shallow?: boolean): ExtendedDocument /** freeze the current state of the document, for speed-purposes */ cache(options?: object): ExtendedDocument /** un-freezes the current state of the document, so it may be transformed */ uncache(options?: object): ExtendedDocument /** the current world */ world: W // Accessors /** use only the first result(s) */ first(n?: number): ExtendedDocument /** use only the last result(s) */ last(n?: number): ExtendedDocument /** grab a subset of the results */ slice(start: number, end?: number): ExtendedDocument /** use only the nth result */ eq(n: number): ExtendedDocument /** get the first word in each match */ firstTerms(): ExtendedDocument /** get the end word in each match */ lastTerms(): ExtendedDocument /** return a flat list of all Term objects in match */ termList(): Term[] /** grab a specific named capture group */ groups(name: string): ExtendedDocument /** grab all named capture groups */ groups(): DocIndex /** Access Phrase list */ list: Ph[] /** Access pool */ pool(): Pool // Match /** return matching patterns in this doc */ match(match: string | ExtendedDocument, options:any): ExtendedDocument /** return a named group in a match */ match(match: string | ExtendedDocument, group:string|number): ExtendedDocument /** return all results except for this */ not(match: string | ExtendedDocument, options:any): ExtendedDocument /** return only the first match */ matchOne(match: string | ExtendedDocument, options:any): ExtendedDocument /** return each current phrase, only if it contains this match */ if(match: string | ExtendedDocument, options:any): ExtendedDocument /** Filter-out any current phrases that have this match */ ifNo(match: string | ExtendedDocument, options:any): ExtendedDocument /** Return a boolean if this match exists */ has(match: string | ExtendedDocument, options:any): boolean /** search through earlier terms, in the sentence */ lookBehind(match: string | ExtendedDocument, options:any): ExtendedDocument /** search through following terms, in the sentence */ lookAhead(match: string | ExtendedDocument, options:any): ExtendedDocument /** return the terms before each match */ before(match: string | ExtendedDocument, options:any): ExtendedDocument /** return the terms after each match */ after(match: string | ExtendedDocument, options:any): ExtendedDocument /** quick find for an array of string matches */ lookup(matches: string[]): ExtendedDocument /** quick find for an object of key-value matches */ lookup(matches: Lexicon): DocIndex // Case /** turn every letter of every term to lower-cse */ toLowerCase(): ExtendedDocument /** turn every letter of every term to upper case */ toUpperCase(): ExtendedDocument /** upper-case the first letter of each term */ toTitleCase(): ExtendedDocument /** remove whitespace and title-case each term */ toCamelCase(): ExtendedDocument // Whitespace /** add this punctuation or whitespace before each match */ pre(str: string, concat: boolean): ExtendedDocument /** add this punctuation or whitespace after each match */ post(str: string, concat: boolean): ExtendedDocument /** remove start and end whitespace */ trim(): ExtendedDocument /** connect words with hyphen, and remove whitespace */ hyphenate(): ExtendedDocument /** remove hyphens between words, and set whitespace */ dehyphenate(): ExtendedDocument // Tag /** Give all terms the given tag */ tag(tag: string, reason?: string): ExtendedDocument /** Only apply tag to terms if it is consistent with current tags */ tagSafe(tag: string, reason?: string): ExtendedDocument /** Remove this term from the given terms */ unTag(tag: string, reason?: string): ExtendedDocument /** return only the terms that can be this tag */ canBe(tag: string): ExtendedDocument // Loops /** run each phrase through a function, and create a new document */ map(fn: (p: ExtendedPhrase) => void): ExtendedDocument | [] /** run a function on each phrase, as an individual document */ forEach(fn: (doc: ExtendedDocument) => void): ExtendedDocument /** return only the phrases that return true */ filter(fn: (p: ExtendedPhrase) => boolean): ExtendedDocument /** return a document with only the first phrase that matches */ find(fn: (p: ExtendedPhrase) => boolean): ExtendedDocument | undefined /** return true or false if there is one matching phrase */ some(fn: (p: ExtendedPhrase) => boolean): ExtendedDocument /** sample a subset of the results */ random(n?: number): ExtendedDocument // Insert /** substitute-in new content */ replaceWith(text: string | Function, keepTags?: boolean | object, keepCase?: boolean): ExtendedDocument /** search and replace match with new content */ replace( match: string, text?: string | Function, keepTags?: boolean | object, keepCase?: boolean ): ExtendedDocument /** fully remove these terms from the document */ delete(match: string): ExtendedDocument /** add these new terms to the end (insertAfter) */ append(text: string): ExtendedDocument /** add these new terms to the front (insertBefore) */ prepend(text: string): ExtendedDocument /** add these new things to the end */ concat(text: string): ExtendedDocument // transform /**re-arrange the order of the matches (in place) */ sort(method?: string | Function): ExtendedDocument /**reverse the order of the matches, but not the words */ reverse(): ExtendedDocument /** clean-up the document, in various ways */ normalize(options?: string | object): ExtendedDocument /** remove any duplicate matches */ unique(): ExtendedDocument /** return a Document with three parts for every match ('splitOn') */ split(match?: string): ExtendedDocument /** separate everything after the match as a new phrase */ splitBefore(match?: string): ExtendedDocument /** separate everything before the word, as a new phrase */ splitAfter(match?: string): ExtendedDocument /** split a document into labeled sections */ segment(regs: object, options?: object): ExtendedDocument /** make all phrases into one phrase */ join(str?: string): ExtendedDocument // Output /** return the document as text */ text(options?: string | object): string /** pull out desired metadata from the document */ json(options?: JsonOptions | string): any /** some named output formats */ out(format?: 'text' | 'normal' | 'offset' | 'terms'): string out(format: 'array'): string[] out(format: 'tags' | 'terms'): Array<{ normal: string; text: string; tags: string[] }> out(format: 'json'): Array<{ normal: string; text: string; tags: () => void }>[] out(format: 'debug'): ExtendedDocument out(format: 'topk'): Array<{ normal: string; count: number; percent: number }> /** pretty-print the current document and its tags */ debug(): ExtendedDocument /** store a parsed document for later use */ export(): any // Selections /** split-up results by each individual term */ terms(n?: number): ExtendedDocument /** split-up results into multi-term phrases */ clauses(n?: number): ExtendedDocument /** return all terms connected with a hyphen or dash like `'wash-out'`*/ hyphenated(n?: number): ExtendedDocument /** add quoation marks around each match */ toQuoations(start?: string, end?: string): ExtendedDocument /** add brackets around each match */ toParentheses(start?: string, end?: string): ExtendedDocument /** return things like `'(939) 555-0113'` */ phoneNumbers(n?: number): ExtendedDocument /** return things like `'#nlp'` */ hashTags(n?: number): ExtendedDocument /** return things like `'hi@compromise.cool'` */ emails(n?: number): ExtendedDocument /** return things like `:)` */ emoticons(n?: number): ExtendedDocument /** return athings like `💋` */ emoji(n?: number): ExtendedDocument /** return things like `'@nlp_compromise'`*/ atMentions(n?: number): ExtendedDocument /** return things like `'compromise.cool'` */ urls(n?: number): ExtendedDocument /** return things like `'quickly'` */ adverbs(n?: number): ExtendedDocument /** return things like `'he'` */ pronouns(n?: number): ExtendedDocument /** return things like `'but'`*/ conjunctions(n?: number): ExtendedDocument /** return things like `'of'`*/ prepositions(n?: number): ExtendedDocument /** return person names like `'John A. Smith'`*/ people(n?: number): ExtendedDocument /** return location names like `'Paris, France'`*/ places(n?: number): ExtendedDocument /** return companies and org names like `'Google Inc.'`*/ organizations(n?: number): ExtendedDocument /** return people, places, and organizations */ topics(n?: number): ExtendedDocument // Subsets /** get the whole sentence for each match */ sentences(n?: number): ExtendedDocument /** return things like `'Mrs.'`*/ abbreviations(n?: number): Abbreviations /** return any multi-word terms, like "didn't" */ contractions(n?: number): Contractions /** contract words that can combine, like "did not" */ contract(): ExtendedDocument /** return anything inside (parentheses) */ parentheses(n?: number): Parentheses /** return things like "Spencer's" */ possessives(n?: number): Possessives /** return any terms inside 'quotation marks' */ quotations(n?: number): Quotations /** return things like `'FBI'` */ acronyms(n?: number): Acronyms /** return things like `'eats, shoots, and leaves'` */ lists(n?: number): Lists /** return any subsequent terms tagged as a Noun */ nouns(n?: number, opts?: object): Nouns /** return any subsequent terms tagged as a Verb */ verbs(n?: number): Verbs } // Nouns class interface Nouns extends ExtendedDocument<{}, W, Ph> { /** get any adjectives describing this noun*/ adjectives(): ExtendedDocument /** return only plural nouns */ isPlural(): ExtendedDocument /** return only nouns that _can be_ inflected as plural */ hasPlural(): ExtendedDocument /** 'football captain' → 'football captains' */ toPlural(setArticle?: boolean): ExtendedDocument /** 'turnovers' → 'turnover' */ toSingular(setArticle?: boolean): ExtendedDocument /** add a `'s` to the end, in a safe manner. */ toPossessive(): ExtendedDocument } // Verbs class interface Verbs extends ExtendedDocument<{}, W, Ph> { /** return the adverbs describing this verb */ adverbs(): ExtendedDocument /** return only plural nouns */ isPlural(): ExtendedDocument /** return only singular nouns */ isSingular(): ExtendedDocument /** return all forms of these verbs */ conjugate(): ExtendedDocument /** 'will go' → 'went' */ toPastTense(): ExtendedDocument /** 'walked' → 'walks' */ toPresentTense(): ExtendedDocument /** 'walked' → 'will walk' */ toFutureTense(): ExtendedDocument /** 'walks' → 'walk' */ toInfinitive(): ExtendedDocument /** 'walks' → 'walking' */ toGerund(): ExtendedDocument /** 'drive' → 'driven' if it exists, otherwise past-tense */ toParticiple(): ExtendedDocument /** return verbs with 'not' */ isNegative(): ExtendedDocument /** only verbs without 'not'*/ isPositive(): ExtendedDocument /** 'went' → 'did not go'*/ toNegative(): ExtendedDocument /** "didn't study" → 'studied' */ toPositive(): ExtendedDocument } interface Abbreviations extends ExtendedDocument<{}, W, Ph> { /** */ stripPeriods(): ExtendedDocument /** */ addPeriods(): ExtendedDocument } interface Acronyms extends ExtendedDocument<{}, W, Ph> { /** */ stripPeriods(): ExtendedDocument /** */ addPeriods(): ExtendedDocument } interface Contractions extends ExtendedDocument<{}, W, Ph> { /** */ expand(): ExtendedDocument } interface Parentheses extends ExtendedDocument<{}, W, Ph> { /** */ unwrap(): ExtendedDocument } interface Possessives extends ExtendedDocument<{}, W, Ph> { /** */ strip(): ExtendedDocument } interface Quotations extends ExtendedDocument<{}, W, Ph> { /** */ unwrap(): ExtendedDocument } interface Lists extends ExtendedDocument<{}, W, Ph> { /** */ conjunctions(): ExtendedDocument /** */ parts(): ExtendedDocument /** */ items(): ExtendedDocument /** */ add(): ExtendedDocument /** */ remove(): ExtendedDocument /** */ hasOxfordComma(): ExtendedDocument } class World { /** more logs for debugging */ verbose(on?: boolean): this isVerbose(): boolean /** get all terms in our lexicon with this tag */ getByTag(tag: string): Record /** put new words into our lexicon, properly */ addWords(words: Record): void /** extend the compromise tagset */ addTags( tags: Record< string, { isA?: string | string[] notA?: string | string[] } > ): void /** call methods after tagger runs */ postProcess(process: (Doc: D) => void): this } class Pool { /** throw a new term object in */ add(term: Term): this /** find a term by it's id */ get(id: string): Term /** find a term by it's id */ remove(id: string): void /** merge with another pool */ merge(pool: Pool): this /** size of pool */ stats(): number } class Cache { terms: Term[] words: any tags: Record set: boolean } class Phrase { isA: 'Phrase' // Get Type start: string // id of start Term length: number // number of terms in phrase pool: Pool // global pool cache: Cache // global cache /** return a flat array of Term objects */ terms(): Term[] } // @todo interface RegSyntax { [index: string]: any } type TextOutOptions = | 'reduced' | 'root' | 'implicit' | 'normal' | 'unicode' | 'titlecase' | 'lowercase' | 'acronyms' | 'whitespace' | 'punctuation' | 'abbreviations' type JsonOutOptions = 'text' | 'normal' | 'tags' | 'clean' | 'id' | 'offset' | 'implicit' | 'whitespace' | 'bestTag' class Term { isA: 'Term' // Get Type id: string // main data text: string tags: Record // alternative forms of this.text root: string | null implicit: string | null clean?: string reduced?: string // additional surrounding information prev: string | null // id of prev term next: string | null // id of next term pre?: string // character before e.g. ' ' ',' post?: string // character after e.g. ' ' ',' // support alternative matches alias?: string constructor(text?: string) set(text: string): this /** clone contents to new term */ clone(): Term /** convert all text to uppercase */ toUpperCase(): this /** convert all text to lowercase */ toLowerCase(): this /** only set the first letter to uppercase * leave any existing uppercase alone */ toTitleCase(): this /** if all letters are uppercase */ isUpperCase(): this /** if the first letter is uppercase, and the rest are lowercase */ isTitleCase(): this titleCase(): this /** search the term's 'post' punctuation */ hasPost(): boolean /** search the term's 'pre' punctuation */ hasPre(): boolean /** does it have a quotation symbol? */ hasQuote(): boolean hasQuotation(): boolean /** does it have a comma? */ hasComma(): boolean /** does it end in a period? */ hasPeriod(): boolean /** does it end in an exclamation */ hasExclamation(): boolean /** does it end with a question mark? */ hasQuestionMark(): boolean /** is there a ... at the end? */ hasEllipses(): boolean /** is there a semicolon after this word? */ hasSemicolon(): boolean /** is there a slash '/' in this word? */ hasSlash(): boolean /** a hyphen connects two words like-this */ hasHyphen(): boolean /** a dash separates words - like that */ hasDash(): boolean /** is it multiple words combinded */ hasContraction(): boolean /** try to sensibly put this punctuation mark into the term */ addPunctuation(punct: string): this doesMatch(reg: RegSyntax, index: number, length: number): boolean /** does this term look like an acronym? */ isAcronym(): boolean /** is this term implied by a contraction? */ isImplicit(): boolean /** does the term have at least one good tag? */ isKnown(): boolean /** cache the root property of the term */ setRoot(world: World): void /** return various text formats of this term */ textOut(options?: Record, showPre?: boolean, showPost?: boolean): string /** return various metadata for this term */ // @todo create output type from options... json(options?: Record, world?: World): object /** add a tag or tags, and their descendents to this term */ tag(tags: string | string[], reason?: string, world?: World): this /** only tag this term if it's consistent with it's current tags */ tagSafe(tags: string | string[], reason?: string, world?: World): this /** remove a tag or tags, and their descendents from this term */ unTag(tags: string | string[], reason?: string, world?: World): this /** is this tag consistent with the word's current tags? */ canBe(tags: string | string[], world?: World): boolean } } export default nlp