/// <reference types="node" />
import { Readable, Writable } from 'stream';
declare type EmptyTokenParseResult = [HeadId, EmptyId, EmptyToken];
declare type NominalTokenParseResult = [number, NominalToken];
/**
 * A generator function that keep return a `Sentence` object on each call.
 * Use this generator if whole document cannot be fit into memory.
 *
 * @param stream A `Readable` stream that contains CoNLL-U format text.
 * @param Parser A derivative of `XPOSParser` object for parsing `xpos` field
 */
export declare function sentences(stream: Readable, Parser?: XPOSParser): AsyncGenerator<Sentence>;
/**
 * `Document` is an entry point to `conllu`. It contains zero or more sentences.
 *
 * To programmatically construct a `Document` use it constructor.
 * To construct a `Document` using CoNLL-U format text, use either
 * `parse`, `load`, or `read` method depending on source of text.
 *
 * If `Document` cannot be fit into memory, use `sentences` generator function.
 */
export declare class Document {
    sentences: Sentence[];
    constructor(sentences: Sentence[]);
    /**
     * Load conllu file as Document. This method is async.
     *
     * @param file_path Path to conllu file
     * @param Parser An optional Parser that is derivative of type XPOSParser for mapping XPOS to UPOS
     */
    static load(file_path: string, Parser?: XPOSParser): Promise<Document>;
    /**
     * Parse given stream line by line to construct an object of Document.
     *
     * @param stream A stream source of text to be parse
     * @param Parser An optional Parser that is derivative of type XPOSParser for mapping XPOS to UPOS
     */
    static read(stream: Readable, Parser?: XPOSParser): Promise<Document>;
    /**
     * An async utitility function that cumulatively parse each line of string then return a document.
     *
     * @param line_iter An async generator object where each call return a line of string
     * @param Parser a Parser derivative from XPOSParser
     */
    protected static parse_core(line_iter: AsyncGenerator<string>, Parser?: XPOSParser): Promise<Document>;
    /**
     * Attempt to parse string as a document. This method is async.
     *
     * @param str An entire document in string where each line is terminate by '\u000a'
     * @param Parser An optional XPOSParser instance
     */
    static parse(str: string, Parser?: XPOSParser): Promise<Document>;
    /** Save this document to a file in given path. The content encoding is UTF-8 */
    save(path: string): Promise<void>;
    /** Return CoNLL-U string representation of the doc */
    toString(): string;
    /**
     * Validate every sentence dependencies. It immediately return when there's an error.
     * Otherwise, it return SentenceValidationResult.Ok
     */
    validate(): SentenceValidationResult;
    /** Serialize this document as CoNLL-U text into given stream */
    write(stream: Writable): Promise<void>;
}
/**
 * Sentence meta data.
 *
 * It's a key/value pair. It's defined by prefixing the sentence with
 * `# key = value` format.
 */
export declare class Meta {
    key: string;
    value: string;
    /**
     * Construct `Meta` by given dictionary.
     * @param param0 A dic of `key` and `value` where `value` is optional.
     * If `value` is omitted, `toString` method will return `Comment` format
     * string rather than empty value `key`
     */
    constructor({ key, value }: {
        key: string;
        value?: string;
    });
    /**
     * Instantiate the object by providing a `conllu` string.
     * @param str A string to be parsed into `Meta`
     */
    static parse(str: string): Meta;
    /** Convert this object into `conllu` string */
    toString(): string;
}
/**
 * A comment of sentence. It's similar to `Meta` but doesn't have `=` symbol.
 * Similar to `Meta`, it must be prefix of sentence.
 */
export declare class Comment {
    text: string;
    /**
     * @param text Comment to be added
     */
    constructor(text?: string);
    /**
     * Construct a comment object from given string.
     * @param str A string to be parse as `Comment`
     */
    static parse(str: string): Comment;
    /** Get `conllu` string from this comment */
    toString(): string;
}
/**
 * A validation result for calling validate on each `Sentence`.
 * It may also throw some exceptions such as "Head of deps that reference to hidden/empty token must be in [integer, integer] format".
 */
export declare enum SentenceValidationResult {
    Ok = 0,
    /** Compound token end range is beyond index of last token error */
    CompoundEndBeyondLastTokenError = 1,
    /** Some of compound token is overlap to other compound token error */
    CompoundOverlapError = 2,
    /** Compound token start index point to token prior to itself error */
    /** Head index is larger than number of tokens or less than 1 error */
    DepHeadOutOfBoundError = 3,
    CompoundStartAfterTokenError = 4,
    /** Empty token after compound token error */
    EmptyAfterCompoundError = 5,
    /** Head index is larger than number of tokens or less than 1 error */
    HeadOutOfBoundError = 6,
    /** NominalToken with head with missing deprel error */
    HeadWithoutDeprelError = 7,
    /** NominalToken with non-intenger value in head error */
    NonIntegerHeadError = 8
}
/**
 * `Sentence` consists of:
 * 1. `meta` which is array. The object inside array can either be `Meta` object or `Comment` object.
 * 1. `tokens` which is array of derivative of `Token` class.
 *
 * To parse sentence text:
 * 1. You can either construct a `Document` from text by using `parse`, `load`, `read` method and access
 * `Sentence` via `sentences` field of `Document` object.
 * 2. You can also use generator function `sentences` to parse each text chunk incrementally.
 */
export declare class Sentence {
    meta: (Meta | Comment)[];
    tokens: Token[];
    /**
     * Construct a new sentence from given dictionary
     * @param param0 A dictionary object contain optional `meta` array of either
     * `Meta` or `Comment` and tokens field which is array of `Token` derivative.
     */
    constructor({ meta, tokens }: {
        meta?: (Meta | Comment)[];
        tokens: Token[];
    });
    /** get `conllu` formatted string of current sentence */
    toString(): string;
    /**
     * Parse given string as `Sentence` object
     * @param str A string to be used to instantiate `Sentence`.
     * @param Parser An `XPOSParser` derivative object
     */
    static parse(str: string, Parser?: XPOSParser): Sentence;
    /**
     * Validate current sentence whether the token structure is valid and all
     * `head`, `relation`, and `deps` are valid.
     */
    validate(): SentenceValidationResult;
}
/** Root ancestor that all type of Token should inherit from */
export declare abstract class Token {
    /** Format the token into `conllu` string */
    abstract toString(): string;
}
export declare type IdRange = [FirstId, LastId];
export declare type FirstId = number;
export declare type LastId = number;
/**
 * A CompoundToken is a token which `id` is a range between [start, end] inclusively
 * at both start and end index.
 *
 * The token requires `id` and `form` with optionally `misc` column.
 *
 * All other fields, when convert to string, has `_` values.
 * ID in string format will be `start`-`end`, e.g. `1-2`.
 * The `end` index must be greater than start. It is an error to have ID with
 * `[1, 1]`
 */
export declare class CompoundToken implements Token {
    id: [number, number];
    form: string;
    misc?: string[];
    constructor({ id: [start, end], form, misc }: {
        id: IdRange;
        form: string;
        misc?: string[];
    });
    /**
     * Parse given string and return a `CompoundToken`
     *
     * The string must be tab separate with 10 columns.
     * See https://universaldependencies.org/format.html for file format.
     *
     * Only `id`, `form`, and `misc` columns are use.
     * All other columns are ignored as
     * https://universaldependencies.org/format.html#words-tokens-and-empty-nodes
     * state that all other columns beside these three must be empty.
     */
    static parse(str: string): CompoundToken;
    /** Retrieve a CoNLL-U format string representation of this token */
    toString(): string;
}
export declare type HeadId = number;
export declare type AdvanceDep = [[HeadId] | [HeadId, EmptyId], DepsRelation];
/**
 * Nominal token is a basic type of token which must exist in `Sentence` in order to
 * use other type of token.
 *
 * The mandatory field is `form` and `upos`. All other fields are optional.
 * All optional field, when converted to string, will become "_".
 *
 * If `deps` field is supplied when construct, it will automatically sort it to comply with
 * https://universaldependencies.org/format.html#syntactic-annotation
 */
export declare class NominalToken implements Token {
    form: string;
    lemma?: string;
    upos: UPOS;
    xpos?: XPOS;
    feats?: Feature[];
    head?: HeadId;
    deprel?: Relation;
    deps?: AdvanceDep[];
    misc?: string[];
    constructor({ form, lemma, upos, xpos, feats, headRel, deps, misc }: {
        form: string;
        lemma: string;
        upos: UPOS;
        xpos?: XPOS;
        feats?: Feature[];
        headRel?: [HeadId, Relation];
        deps?: [[HeadId] | [HeadId, EmptyId], DepsRelation][];
        misc?: string[];
    });
    /**
     * Parse given string and construct a `NominalToken` out of it.
     * If text contains XPOS column and you need to keep XPOS field, you
     * need to supply a name of an implementation of `XPOSParser`.
     * @param str A string to be parsed
     * @param XPOSParser XPOS parser to convert given column into an object of `XPOS`
     */
    static parse(str: string, Parser?: XPOSParser): NominalTokenParseResult;
    /**
     * Retrieve a CoNLL-U representation string of this token. The string will have
     * no `id` as its' ID rely on sequence in sentence.
     */
    toString(): string;
}
export declare type EmptyId = number;
/**
 * `EmptyToken` is a null token type. Everything except `deps` are optional.
 * It will automatically sort `deps` field according to
 * https://universaldependencies.org/format.html#syntactic-annotation
 */
export declare class EmptyToken implements Token {
    form?: string;
    lemma?: string;
    upos?: UPOS;
    xpos?: XPOS;
    feats?: Feature[];
    deps: AdvanceDep[];
    misc?: string[];
    constructor({ form, lemma, upos, xpos, feats, deps, misc }: {
        form?: string;
        lemma?: string;
        upos?: UPOS;
        xpos?: XPOS;
        feats?: Feature[];
        deps: [[HeadId] | [HeadId, EmptyId], DepsRelation][];
        misc?: string[];
    });
    /**
     * Parse given string and return an `EmptyToken`.
     * @param str
     * @param Parser
     */
    static parse(str: string, Parser?: XPOSParser): EmptyTokenParseResult;
    /**
     * Retrieve a CoNLL-U representation string of this token. The string will have
     * no `id` as its' ID rely on sequence in sentence.
     */
    toString(): string;
}
/**
 * All possible part-of-speech defined in CoNLL-U.
 * A complete list of POS can be found here:
 * https://universaldependencies.org/u/pos/index.html
 */
export declare enum UPOS {
    ADJ = "ADJ",
    ADP = "ADP",
    ADV = "ADV",
    AUX = "AUX",
    CCONJ = "CCONJ",
    DET = "DET",
    INTJ = "INTJ",
    NOUN = "NOUN",
    NUM = "NUM",
    PART = "PART",
    PRON = "PRON",
    PROPN = "PROPN",
    PUNCT = "PUNCT",
    SCONJ = "SCONJ",
    SYM = "SYM",
    VERB = "VERB",
    Other = "X"
}
/** Utility function to parse string as UPOS object */
export declare function toUPOS(str: string): UPOS;
/**
 * An abstract class XPOS which every languages that use
 * `xpos` field need to implement.
 *
 * It is mandatory to implement this class to preserve `xpos` field when you want
 * to use `xpos` field.
 */
export declare abstract class XPOS {
    abstract toUPOS(): UPOS;
    abstract toString(): string;
}
/**
 * An abstract class XPOSParser which any language that use `xpos` field and
 * require to deserialize need to implement.
 *
 * The implementation need to implement `parse` as static method.
 * If you don't pass an implementation of this class when deserialize the
 * `Document`, `Sentence`, `NominalToken`, and `EmptyToken` then the deserialized
 * object will have no `xpos` field.
 */
export declare abstract class XPOSParser {
    abstract parse(str: string): XPOS;
}
/**
 * A feature as describe in https://universaldependencies.org/format.html#morphological-annotation
 * It is a kind of key/values pair where key is a name of feature type and values is a list of
 * feature name.
 *
 * If you construct this feature via its' constructor, it will validate the name and sort the values for you.
 */
export declare class Feature {
    name: string;
    value: string[];
    constructor(name: string, value: string[]);
    toString(): string;
}
/**
 * A DepsRelation is a relation used in `deps` field on `NominalToken` and `EmptyToken`.
 * A constructor will validate the relation name according to
 * https://universaldependencies.org/u/overview/enhanced-syntax.html
 */
export declare class DepsRelation {
    rel: string;
    constructor(rel: string);
    toString(): string;
}
/**
 * A Relation is a name of relation that is used to describe the token relation
 * to it `head`. The field that uses this class is `deprel`.
 * `deprel` is mandatory if `head` is not empty.
 * See https://universaldependencies.org/format.html#syntactic-annotation
 */
export declare class Relation {
    rel: string;
    constructor(rel: string);
    toString(): string;
}
export {};
