/**
 * Trie in TypeScript. Creates a Trie out of a list of words. The trie is used to split on `added_tokens` in one pass
 * Loose reference https://en.wikipedia.org/wiki/Trie
 */
declare class Trie {
    private data;
    private _tokens;
    constructor();
    /**
     * Passes over every char (utf-8 char) on word and recursively adds it to the internal `data` trie representation.
     * The special key `""` is used to represent termination.
     *
     * This function is idempotent, adding twice the same word will leave the trie unchanged
     *
     * Example:
     *
     * ```typescript
     * const trie = new Trie();
     * trie.add("Hello 友達");
     * console.log(trie.data);
     * // {"H": {"e": {"l": {"l": {"o": {" ": {"友": {"達": {"": 1}}}}}}}}}
     *
     * trie.add("Hello");
     * console.log(trie.data);
     * // {"H": {"e": {"l": {"l": {"o": {"": 1, " ": {"友": {"達": {"": 1}}}}}}}}}
     * ```
     */
    add(word: string): void;
    /**
     * Will look for the words added to the trie within `text`. Output is the original string splitted along the
     * boundaries of the words found.
     *
     * This trie will match the longest possible word first !
     *
     * Example:
     *
     * ```typescript
     * const trie = new Trie();
     * console.log(trie.split("[CLS] This is a extra_id_100"));
     * // ["[CLS] This is a extra_id_100"]
     *
     * trie.add("[CLS]");
     * trie.add("extra_id_1");
     * trie.add("extra_id_100");
     * console.log(trie.split("[CLS] This is a extra_id_100"));
     * // ["[CLS]", " This is a ", "extra_id_100"]
     * ```
     */
    split(text: string): string[];
    protected cutText(text: string, offsets: number[]): string[];
}

declare class Llama2Tokenizer {
    protected tokens_trie: Trie;
    protected special_tokens: Record<string, number>;
    protected vocab: Record<string, number>;
    protected vocab_ids: Record<number, string>;
    constructor();
    /**
     * Install the provided vocabulary into the class instance.
     *
     * @param {Record<string, number>} vocab - The vocabulary to be installed
     */
    install_vocab(vocab: Record<string, number>): void;
    /**
     * Get the size of the vocabulary, including special tokens.
     *
     * @return {number} the size of the vocabulary
     */
    get vocab_size(): number;
    /**
     * Get the maximum id from the vocab_ids and special_tokens.
     *
     * @return {number} the maximum id
     */
    get max_id(): number;
    /**
     * Adds a special token with an optional token ID.
     *
     * @param {string} token - the special token to be added
     * @param {number} [token_id] - the optional token ID
     * @return {void}
     */
    add_special_token(token: string, token_id?: number): void;
    /**
     * Adds special tokens to the list of tokens.
     *
     * @param {Array} tokens - An array of tokens to add. Each token can be a string or an object with `token` and `token_id` properties.
     */
    add_special_tokens(tokens: (string | {
        token: string;
        token_id: number;
    })[]): void;
    /**
     * Convert an id to a token.
     *
     * @param {number} id - The id to be converted to a token.
     * @return {string} The corresponding token for the given id.
     */
    ids_to_token(id: number): string;
    /**
     * token_to_id function takes a token as input and returns its corresponding id if found in the vocabulary, otherwise throws an error.
     *
     * @param {string} token - the input token
     * @return {number} the corresponding id of the input token
     */
    token_to_id(token: string): number;
    /**
     * Retrieve the vocabulary.
     *
     * @return {Object} a shallow copy of the vocabulary
     */
    get_vocab(): {
        [x: string]: number;
    };
    /**
     * Checks if the token is a valid token.
     *
     * @param {string} token - the token to be checked
     * @return {boolean} true if the token is valid, false otherwise
     */
    valid_token(token: string): boolean;
    /**
     * Converts a string in a sequence of tokens, using the tokenizer.
     */
    tokenize(text: string): string[];
    /**
     * Converts a string to a sequence of ids (integer), using the tokenizer and vocabulary.
     */
    encode(text: string): number[];
    /**
     * Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and added tokens.
     */
    decode(ids: number[]): string;
    /**
     * Converts a sequence of tokens (string) in a single string.
     */
    convert_tokens_to_string(tokens: string[]): string;
    /**
     * Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the vocabulary.
     */
    convert_tokens_to_ids(tokens: string[]): number[];
    /**
     * Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and added tokens.
     */
    convert_ids_to_tokens(ids: number[]): string[];
}

export { Llama2Tokenizer };
