/**
 * Options for loading a pretrained model.
 */
type PretrainedOptions = {
    /**
     * Whether to load the 8-bit quantized version of the model (only applicable when loading model files).
     */
    quantized?: boolean | null | undefined;
    /**
     * If specified, this function will be called during model construction, to provide the user with progress updates.
     */
    progress_callback?: Function | undefined;
    /**
     * Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
     * - The model is a model provided by the library (loaded with the *model id* string of a pretrained model).
     * - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory.
     */
    config?: Object | undefined;
    /**
     * Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.
     */
    cache_dir?: string | undefined;
    /**
     * Whether or not to only look at local files (e.g., not try downloading the model).
     */
    local_files_only?: boolean | undefined;
    /**
     * The specific model version to use. It can be a branch name, a tag name, or a commit id,
     * since we use a git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
     * NOTE: This setting is ignored for local requests.
     */
    revision?: string | undefined;
    /**
     * If specified, load the model with this name (excluding the .onnx suffix). Currently only valid for encoder- or decoder-only models.
     */
    model_file_name?: string | undefined;
};

declare class Tensor {
    /**
     * Create a new Tensor or copy an existing Tensor.
     *
     * *JUST PLACEHOLDER
     */
    constructor(...args: any[]);
}

/**
 * Checks whether the given Unicode codepoint represents a CJK (Chinese, Japanese, or Korean) character.
 *
 * A "chinese character" is defined as anything in the CJK Unicode block:
 * https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
 *
 * Note that the CJK Unicode block is NOT all Japanese and Korean characters, despite its name.
 * The modern Korean Hangul alphabet is a different block, as is Japanese Hiragana and Katakana.
 * Those alphabets are used to write space-separated words, so they are not treated specially
 * and are handled like all other languages.
 *
 * @param {number|bigint} cp The Unicode codepoint to check.
 * @returns {boolean} True if the codepoint represents a CJK character, false otherwise.
 */
declare function is_chinese_char(cp: number | bigint): boolean;
declare const TokenizerModel_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * Abstract base class for tokenizer models.
 *
 * @extends Callable
 */
declare class TokenizerModel extends TokenizerModel_base {
    /**
     * Instantiates a new TokenizerModel instance based on the configuration object provided.
     * @param {Object} config The configuration object for the TokenizerModel.
     * @param {...*} args Optional arguments to pass to the specific TokenizerModel constructor.
     * @returns {TokenizerModel} A new instance of a TokenizerModel.
     * @throws Will throw an error if the TokenizerModel type in the config is not recognized.
     */
    static fromConfig(config: Object, ...args: any[]): TokenizerModel;
    /**
     * Creates a new instance of TokenizerModel.
     * @param {Object} config The configuration object for the TokenizerModel.
     */
    constructor(config: Object);
    config: Object;
    /** @type {string[]} */
    vocab: string[];
    /**
     * A mapping of tokens to ids.
     * @type {Map<string, number>}
     */
    tokens_to_ids: Map<string, number>;
    unk_token_id: any;
    unk_token: any;
    end_of_word_suffix: any;
    /** @type {boolean} Whether to fuse unknown tokens when encoding. Defaults to false. */
    fuse_unk: boolean;
    /**
     * Internal function to call the TokenizerModel instance.
     * @param {string[]} tokens The tokens to encode.
     * @returns {string[]} The encoded tokens.
     */
    _call(tokens: string[]): string[];
    /**
     * Encodes a list of tokens into a list of token IDs.
     * @param {string[]} tokens The tokens to encode.
     * @returns {string[]} The encoded tokens.
     * @throws Will throw an error if not implemented in a subclass.
     */
    encode(tokens: string[]): string[];
    /**
     * Converts a list of tokens into a list of token IDs.
     * @param {string[]} tokens The tokens to convert.
     * @returns {number[]} The converted token IDs.
     */
    convert_tokens_to_ids(tokens: string[]): number[];
    /**
     * Converts a list of token IDs into a list of tokens.
     * @param {number[]|bigint[]} ids The token IDs to convert.
     * @returns {string[]} The converted tokens.
     */
    convert_ids_to_tokens(ids: number[] | bigint[]): string[];
}
declare const PreTrainedTokenizer_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * @typedef {Object} Message
 * @property {string} role The role of the message (e.g., "user" or "assistant" or "system").
 * @property {string} content The content of the message.
 */
declare class PreTrainedTokenizer extends PreTrainedTokenizer_base {
    /**
     * Loads a pre-trained tokenizer from the given `pretrained_model_name_or_path`.
     *
     * @param {string} pretrained_model_name_or_path The path to the pre-trained tokenizer.
     * @param {PretrainedTokenizerOptions} options Additional options for loading the tokenizer.
     *
     * @throws {Error} Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the `pretrained_model_name_or_path`.
     * @returns {Promise<PreTrainedTokenizer>} A new instance of the `PreTrainedTokenizer` class.
     */
    static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, legacy, }?: PretrainedTokenizerOptions): Promise<PreTrainedTokenizer>;
    /**
     * Create a new PreTrainedTokenizer instance.
     * @param {Object} tokenizerJSON The JSON of the tokenizer.
     * @param {Object} tokenizerConfig The config of the tokenizer.
     */
    constructor(tokenizerJSON: Object, tokenizerConfig: Object);
    return_token_type_ids: boolean;
    padding_side: string;
    _tokenizer_config: Object;
    normalizer: Normalizer | null;
    pre_tokenizer: PreTokenizer;
    model: TokenizerModel;
    post_processor: PostProcessor;
    decoder: Decoder;
    special_tokens: any[];
    all_special_ids: number[];
    /** @type {AddedToken[]} */
    added_tokens: AddedToken[];
    additional_special_tokens: any;
    added_tokens_regex: RegExp | null;
    mask_token: string | null;
    mask_token_id: number | undefined;
    pad_token: string | null;
    pad_token_id: number | undefined;
    sep_token: string | null;
    sep_token_id: number | undefined;
    unk_token: string | null;
    unk_token_id: number | undefined;
    bos_token: string | null;
    bos_token_id: number | undefined;
    eos_token: string | null;
    eos_token_id: number | undefined;
    model_max_length: any;
    /** @type {boolean} Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). */
    remove_space: boolean;
    clean_up_tokenization_spaces: any;
    do_lowercase_and_remove_accent: any;
    legacy: boolean;
    chat_template: any;
    _compiled_template_cache: Map<any, any>;
    /**
     * Returns the value of the first matching key in the tokenizer config object.
     * @param {...string} keys One or more keys to search for in the tokenizer config object.
     * @returns {string|null} The value associated with the first matching key, or null if no match is found.
     * @throws {Error} If an object is found for a matching key and its __type property is not "AddedToken".
     * @private
     */
    private getToken;
    /**
     * @typedef {number[]|number[][]|Tensor} BatchEncodingItem
     *
     * @typedef {Object} BatchEncoding Holds the output of the tokenizer's call function.
     * @property {BatchEncodingItem} input_ids List of token ids to be fed to a model.
     * @property {BatchEncodingItem} attention_mask List of indices specifying which tokens should be attended to by the model.
     * @property {BatchEncodingItem} [token_type_ids] List of token type ids to be fed to a model.
     */
    /**
     * Encode/tokenize the given text(s).
     * @param {string|string[]} text The text to tokenize.
     * @param {Object} options An optional object containing the following properties:
     * @param {string|string[]} [options.text_pair=null] Optional second sequence to be encoded. If set, must be the same type as text.
     * @param {boolean|'max_length'} [options.padding=false] Whether to pad the input sequences.
     * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
     * @param {boolean} [options.truncation=null] Whether to truncate the input sequences.
     * @param {number} [options.max_length=null] Maximum length of the returned list and optionally padding length.
     * @param {boolean} [options.return_tensor=false] Whether to return the results as Tensors or arrays.
     * @param {boolean} [options.return_token_type_ids=null] Whether to return the token type ids.
     * @returns {BatchEncoding} Object to be passed to the model.
     */
    _call(text: string | string[], { text_pair, add_special_tokens, padding, truncation, max_length, return_tensor, return_token_type_ids, }?: {
        text_pair?: string | string[] | undefined;
        padding?: boolean | "max_length" | undefined;
        add_special_tokens?: boolean | undefined;
        truncation?: boolean | undefined;
        max_length?: number | undefined;
        return_tensor?: boolean | undefined;
        return_token_type_ids?: boolean | undefined;
    }): {
        /**
         * List of token ids to be fed to a model.
         */
        input_ids: number[] | number[][] | Tensor;
        /**
         * List of indices specifying which tokens should be attended to by the model.
         */
        attention_mask: number[] | number[][] | Tensor;
        /**
         * List of token type ids to be fed to a model.
         */
        token_type_ids?: (number[] | number[][] | Tensor) | undefined;
    };
    /**
     * Encodes a single text using the preprocessor pipeline of the tokenizer.
     *
     * @param {string|null} text The text to encode.
     * @returns {string[]|null} The encoded tokens.
     */
    _encode_text(text: string | null): string[] | null;
    /**
     * Encodes a single text or a pair of texts using the model's tokenizer.
     *
     * @param {string} text The text to encode.
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.text_pair=null] The optional second text to encode.
     * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
     * @returns {EncodingSingle} An object containing the encoded text.
     * @private
     */
    private _encode_plus;
    /**
     * Internal helper function to tokenize a text, and optionally a pair of texts.
     * @param {string} text The text to tokenize.
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.pair=null] The optional second text to tokenize.
     * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
     * @returns {{tokens: string[], token_type_ids?: number[]}} An object containing the tokens and optionally the token type IDs.
     */
    _tokenize_helper(text: string, { pair, add_special_tokens }?: {
        pair?: string | undefined;
        add_special_tokens?: boolean | undefined;
    }): {
        tokens: string[];
        token_type_ids?: number[];
    };
    /**
     * Converts a string into a sequence of tokens.
     * @param {string} text The sequence to be encoded.
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.pair] A second sequence to be encoded with the first.
     * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
     * @returns {string[]} The list of tokens.
     */
    tokenize(text: string, { pair, add_special_tokens }?: {
        pair?: string | undefined;
        add_special_tokens?: boolean | undefined;
    }): string[];
    /**
     * Encodes a single text or a pair of texts using the model's tokenizer.
     *
     * @param {string} text The text to encode.
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.text_pair=null] The optional second text to encode.
     * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
     * @returns {number[]} An array of token IDs representing the encoded text(s).
     */
    encode(text: string, { text_pair, add_special_tokens, return_token_type_ids, }?: {
        text_pair?: string | undefined;
        add_special_tokens?: boolean | undefined;
        return_token_type_ids?: boolean | undefined;
    }): number[];
    /**
     * Decode a batch of tokenized sequences.
     * @param {number[][]|Tensor} batch List/Tensor of tokenized input sequences.
     * @param {Object} decode_args (Optional) Object with decoding arguments.
     * @returns {string[]} List of decoded sequences.
     */
    batch_decode(batch: number[][] | Tensor, decode_args?: Object): string[];
    /**
     * Decodes a sequence of token IDs back to a string.
     *
     * @param {number[]|bigint[]|Tensor} token_ids List/Tensor of token IDs to decode.
     * @param {Object} [decode_args={}]
     * @param {boolean} [decode_args.skip_special_tokens=false] If true, special tokens are removed from the output string.
     * @param {boolean} [decode_args.clean_up_tokenization_spaces=true] If true, spaces before punctuations and abbreviated forms are removed.
     *
     * @returns {string} The decoded string.
     * @throws {Error} If `token_ids` is not a non-empty array of integers.
     */
    decode(token_ids: number[] | bigint[] | Tensor, decode_args?: {
        skip_special_tokens?: boolean | undefined;
        clean_up_tokenization_spaces?: boolean | undefined;
    } | undefined): string;
    /**
     * Decode a single list of token ids to a string.
     * @param {number[]|bigint[]} token_ids List of token ids to decode
     * @param {Object} decode_args Optional arguments for decoding
     * @param {boolean} [decode_args.skip_special_tokens=false] Whether to skip special tokens during decoding
     * @param {boolean} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
     * If null, the value is set to `this.decoder.cleanup` if it exists, falling back to `this.clean_up_tokenization_spaces` if it exists, falling back to `true`.
     * @returns {string} The decoded string
     */
    decode_single(token_ids: number[] | bigint[], { skip_special_tokens, clean_up_tokenization_spaces }: {
        skip_special_tokens?: boolean | undefined;
        clean_up_tokenization_spaces?: boolean | undefined;
    }): string;
    /**
     * Retrieve the chat template string used for tokenizing chat messages. This template is used
     * internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
     * template for better generation tracking.
     *
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.chat_template=null]
     * A Jinja template or the name of a template to use for this conversion.
     * It is usually not necessary to pass anything to this argument,
     * as the model's template will be used by default.
     * @param {Object[]} [options.tools=null]
     * A list of tools (callable functions) that will be accessible to the model. If the template does not
     * support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
     * giving the name, description and argument types for the tool. See our
     * [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
     * for more information.
     * @returns {string} The chat template string.
     */
    get_chat_template({ chat_template, tools }?: {
        chat_template?: string | undefined;
        tools?: Object[] | undefined;
    }): string;
    /**
     * Converts a list of message objects with `"role"` and `"content"` keys to a list of token
     * ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
     * determine the format and control tokens to use when converting.
     *
     * See [here](https://huggingface.co/docs/transformers/chat_templating) for more information.
     *
     * **Example:** Applying a chat template to a conversation.
     *
     * ```javascript
     * import { AutoTokenizer } from "@huggingface/transformers";
     *
     * const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
     *
     * const chat = [
     *   { "role": "user", "content": "Hello, how are you?" },
     *   { "role": "assistant", "content": "I'm doing great. How can I help you today?" },
     *   { "role": "user", "content": "I'd like to show off how chat templating works!" },
     * ]
     *
     * const text = tokenizer.apply_chat_template(chat, { tokenize: false });
     * // "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
     *
     * const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
     * // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]
     * ```
     *
     * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys,
     * representing the chat history so far.
     * @param {Object} options An optional object containing the following properties:
     * @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
     * this is not passed, the model's chat template will be used instead.
     * @param {Object[]} [options.tools=null]
     * A list of tools (callable functions) that will be accessible to the model. If the template does not
     * support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
     * giving the name, description and argument types for the tool. See our
     * [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
     * for more information.
     * @param {Record<string, string>[]} [options.documents=null]
     * A list of dicts representing documents that will be accessible to the model if it is performing RAG
     * (retrieval-augmented generation). If the template does not support RAG, this argument will have no
     * effect. We recommend that each document should be a dict containing "title" and "text" keys. Please
     * see the RAG section of the [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG)
     * for examples of passing documents with chat templates.
     * @param {boolean} [options.add_generation_prompt=false] Whether to end the prompt with the token(s) that indicate
     * the start of an assistant message. This is useful when you want to generate a response from the model.
     * Note that this argument will be passed to the chat template, and so it must be supported in the
     * template for this argument to have any effect.
     * @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string.
     * @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false.
     * @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.
     * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
     * If not specified, the tokenizer's `max_length` attribute will be used as a default.
     * @param {boolean} [options.return_tensor=false] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
     * @param {boolean} [options.return_dict=true] Whether to return a dictionary with named outputs. Has no effect if tokenize is false.
     * @param {Object} [options.tokenizer_kwargs={}] Additional options to pass to the tokenizer.
     * @returns {string | Tensor | number[]| number[][]|BatchEncoding} The tokenized output.
     */
    apply_chat_template(conversation: Message[], { tools, documents, chat_template, add_generation_prompt, tokenize, padding, truncation, max_length, return_tensor, return_dict, tokenizer_kwargs, ...kwargs }?: {
        chat_template?: string | undefined;
        tools?: Object[] | undefined;
        documents?: Record<string, string>[] | undefined;
        add_generation_prompt?: boolean | undefined;
        tokenize?: boolean | undefined;
        padding?: boolean | undefined;
        truncation?: boolean | undefined;
        max_length?: number | undefined;
        return_tensor?: boolean | undefined;
        return_dict?: boolean | undefined;
        tokenizer_kwargs?: Object | undefined;
    }): string | number[] | number[][] | Tensor | {
        /**
         * List of token ids to be fed to a model.
         */
        input_ids: number[] | number[][] | Tensor;
        /**
         * List of indices specifying which tokens should be attended to by the model.
         */
        attention_mask: number[] | number[][] | Tensor;
        /**
         * List of token type ids to be fed to a model.
         */
        token_type_ids?: (number[] | number[][] | Tensor) | undefined;
    };
}
/**
 * BertTokenizer is a class used to tokenize text for BERT models.
 * @extends PreTrainedTokenizer
 */
declare class BertTokenizer extends PreTrainedTokenizer {
}
/**
 * Albert tokenizer
 * @extends PreTrainedTokenizer
 */
declare class AlbertTokenizer extends PreTrainedTokenizer {
}
declare class MobileBertTokenizer extends PreTrainedTokenizer {
}
declare class SqueezeBertTokenizer extends PreTrainedTokenizer {
}
declare class DebertaTokenizer extends PreTrainedTokenizer {
}
declare class DebertaV2Tokenizer extends PreTrainedTokenizer {
}
declare class HerbertTokenizer extends PreTrainedTokenizer {
}
declare class ConvBertTokenizer extends PreTrainedTokenizer {
}
declare class RoFormerTokenizer extends PreTrainedTokenizer {
}
declare class DistilBertTokenizer extends PreTrainedTokenizer {
}
declare class CamembertTokenizer extends PreTrainedTokenizer {
}
declare class XLMTokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
}
declare class ElectraTokenizer extends PreTrainedTokenizer {
}
declare class T5Tokenizer extends PreTrainedTokenizer {
}
declare class GPT2Tokenizer extends PreTrainedTokenizer {
}
declare class BartTokenizer extends PreTrainedTokenizer {
}
declare class MBartTokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
    languageRegex: RegExp;
    language_codes: any[];
    lang_to_token: (x: any) => any;
    /**
     * Helper function to build translation inputs for an `MBartTokenizer`.
     * @param {string|string[]} raw_inputs The text to tokenize.
     * @param {Object} tokenizer_options Options to be sent to the tokenizer
     * @param {Object} generate_kwargs Generation options.
     * @returns {Object} Object to be passed to the model.
     */
    _build_translation_inputs(raw_inputs: string | string[], tokenizer_options: Object, generate_kwargs: Object): Object;
}
declare class MBart50Tokenizer extends MBartTokenizer {
}
declare class RobertaTokenizer extends PreTrainedTokenizer {
}
declare class BloomTokenizer extends PreTrainedTokenizer {
}
declare class LlamaTokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
    legacy: any;
    /**
     * Helper function to handle legacy encoding of SPM tokenizers.
     * Adapted from https://github.com/huggingface/transformers/blob/e6dcf8abd6f65bb4b6dfc1831b20d9ba49ce00e2/src/transformers/models/t5/tokenization_t5.py#L374-L387
     * @param {string} text The text to encode.
     * @returns {string[]} The encoded tokens.
     */
    _encode_text(text: string): string[];
}
declare class CodeLlamaTokenizer extends PreTrainedTokenizer {
}
declare class XLMRobertaTokenizer extends PreTrainedTokenizer {
}
declare class MPNetTokenizer extends PreTrainedTokenizer {
}
declare class FalconTokenizer extends PreTrainedTokenizer {
}
declare class GPTNeoXTokenizer extends PreTrainedTokenizer {
}
declare class EsmTokenizer extends PreTrainedTokenizer {
}
declare class Qwen2Tokenizer extends PreTrainedTokenizer {
}
declare class GemmaTokenizer extends PreTrainedTokenizer {
}
declare class Grok1Tokenizer extends PreTrainedTokenizer {
}
/**
 * The NllbTokenizer class is used to tokenize text for NLLB ("No Language Left Behind") models.
 *
 * No Language Left Behind (NLLB) is a first-of-its-kind, AI breakthrough project
 * that open-sources models capable of delivering high-quality translations directly
 * between any pair of 200+ languages — including low-resource languages like Asturian,
 * Luganda, Urdu and more. It aims to help people communicate with anyone, anywhere,
 * regardless of their language preferences. For more information, check out their
 * [paper](https://arxiv.org/abs/2207.04672).
 *
 * For a list of supported languages (along with their language codes),
 * @see {@link https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200}
 */
declare class NllbTokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
    languageRegex: RegExp;
    language_codes: any[];
    lang_to_token: (x: any) => any;
    /**
     * Helper function to build translation inputs for an `NllbTokenizer`.
     * @param {string|string[]} raw_inputs The text to tokenize.
     * @param {Object} tokenizer_options Options to be sent to the tokenizer
     * @param {Object} generate_kwargs Generation options.
     * @returns {Object} Object to be passed to the model.
     */
    _build_translation_inputs(raw_inputs: string | string[], tokenizer_options: Object, generate_kwargs: Object): Object;
}
/**
 * The M2M100Tokenizer class is used to tokenize text for M2M100 ("Many-to-Many") models.
 *
 * M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many
 * multilingual translation. It was introduced in this [paper](https://arxiv.org/abs/2010.11125)
 * and first released in [this](https://github.com/pytorch/fairseq/tree/master/examples/m2m_100) repository.
 *
 * For a list of supported languages (along with their language codes),
 * @see {@link https://huggingface.co/facebook/m2m100_418M#languages-covered}
 */
declare class M2M100Tokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
    languageRegex: RegExp;
    language_codes: any[];
    lang_to_token: (x: any) => string;
    /**
     * Helper function to build translation inputs for an `M2M100Tokenizer`.
     * @param {string|string[]} raw_inputs The text to tokenize.
     * @param {Object} tokenizer_options Options to be sent to the tokenizer
     * @param {Object} generate_kwargs Generation options.
     * @returns {Object} Object to be passed to the model.
     */
    _build_translation_inputs(raw_inputs: string | string[], tokenizer_options: Object, generate_kwargs: Object): Object;
}
/**
 * WhisperTokenizer tokenizer
 * @extends PreTrainedTokenizer
 */
declare class WhisperTokenizer extends PreTrainedTokenizer {
    get timestamp_begin(): number;
    /**
     * Decodes automatic speech recognition (ASR) sequences.
     * @param {Array<{tokens: bigint[], token_timestamps?: number[], stride: number[]}>} sequences The sequences to decode.
     * @param {Record<string, any>} options The options to use for decoding.
     * @returns {Array<string|{chunks?: undefined|Array<{language: string|null, timestamp: Array<number|null>, text: string}>}>} The decoded sequences.
     */
    _decode_asr(sequences: Array<{
        tokens: bigint[];
        token_timestamps?: number[];
        stride: number[];
    }>, { return_timestamps, return_language, time_precision, force_full_sequences, }?: Record<string, any>): Array<string | {
        chunks?: undefined | Array<{
            language: string | null;
            timestamp: Array<number | null>;
            text: string;
        }>;
    }>;
    /**
     * Finds the longest common sequence among the provided sequences.
     * @param {number[][]} sequences An array of sequences of token ids to compare.
     * @returns {number[][]} The longest common sequence found.
     * @throws {Error} If there is a bug within the function.
     * @private
     */
    private findLongestCommonSequence;
    /** @private */
    private collateWordTimestamps;
    /**
     * Groups tokens by word. Returns a tuple containing a list of strings with the words,
     * and a list of `token_id` sequences with the tokens making up each word.
     * @param {number[]} tokens
     * @param {string} [language]
     * @param {string} prepend_punctionations
     * @param {string} append_punctuations
     *
     * @private
     */
    private combineTokensIntoWords;
    /**
     * @param {number[]|bigint[]} token_ids List of token IDs to decode.
     * @param {Object} decode_args Optional arguments for decoding
     * @private
     */
    private decodeWithTimestamps;
    /**
     * Combine tokens into words by splitting at any position where the tokens are decoded as valid unicode points.
     * @param {number[]} tokens
     * @returns {*}
     * @private
     */
    private splitTokensOnUnicode;
    /**
     * Combine tokens into words by splitting at whitespace and punctuation tokens.
     * @param {number[]} tokens
     * @private
     */
    private splitTokensOnSpaces;
    /**
     * Merges punctuation tokens with neighboring words.
     * @param {string[]} words
     * @param {number[][]} tokens
     * @param {number[][]} indices
     * @param {string} prepended
     * @param {string} appended
     * @private
     */
    private mergePunctuations;
}
declare class CodeGenTokenizer extends PreTrainedTokenizer {
}
declare class CLIPTokenizer extends PreTrainedTokenizer {
}
declare class SiglipTokenizer extends PreTrainedTokenizer {
}
/**
 * @todo This model is not yet supported by Hugging Face's "fast" tokenizers library (https://github.com/huggingface/tokenizers).
 * Therefore, this implementation (which is based on fast tokenizers) may produce slightly inaccurate results.
 */
declare class MarianTokenizer extends PreTrainedTokenizer {
    languageRegex: RegExp;
    supported_language_codes: string[];
    /**
     * Encodes a single text. Overriding this method is necessary since the language codes
     * must be removed before encoding with sentencepiece model.
     * @see https://github.com/huggingface/transformers/blob/12d51db243a00726a548a43cc333390ebae731e3/src/transformers/models/marian/tokenization_marian.py#L204-L213
     *
     * @param {string|null} text The text to encode.
     * @returns {Array} The encoded tokens.
     */
    _encode_text(text: string | null): any[];
}
declare class Wav2Vec2CTCTokenizer extends PreTrainedTokenizer {
}
declare class BlenderbotTokenizer extends PreTrainedTokenizer {
}
declare class BlenderbotSmallTokenizer extends PreTrainedTokenizer {
}
declare class SpeechT5Tokenizer extends PreTrainedTokenizer {
}
declare class NougatTokenizer extends PreTrainedTokenizer {
}
declare class VitsTokenizer extends PreTrainedTokenizer {
    constructor(tokenizerJSON: any, tokenizerConfig: any);
}
declare class CohereTokenizer extends PreTrainedTokenizer {
}
declare class MgpstrTokenizer extends PreTrainedTokenizer {
}
/**
 * Helper class which is used to instantiate pretrained tokenizers with the `from_pretrained` function.
 * The chosen tokenizer class is determined by the type specified in the tokenizer config.
 *
 * @example
 * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
 */
declare class AutoTokenizer {
    static TOKENIZER_CLASS_MAPPING: {
        T5Tokenizer: typeof T5Tokenizer;
        DistilBertTokenizer: typeof DistilBertTokenizer;
        CamembertTokenizer: typeof CamembertTokenizer;
        DebertaTokenizer: typeof DebertaTokenizer;
        DebertaV2Tokenizer: typeof DebertaV2Tokenizer;
        BertTokenizer: typeof BertTokenizer;
        HerbertTokenizer: typeof HerbertTokenizer;
        ConvBertTokenizer: typeof ConvBertTokenizer;
        RoFormerTokenizer: typeof RoFormerTokenizer;
        XLMTokenizer: typeof XLMTokenizer;
        ElectraTokenizer: typeof ElectraTokenizer;
        MobileBertTokenizer: typeof MobileBertTokenizer;
        SqueezeBertTokenizer: typeof SqueezeBertTokenizer;
        AlbertTokenizer: typeof AlbertTokenizer;
        GPT2Tokenizer: typeof GPT2Tokenizer;
        BartTokenizer: typeof BartTokenizer;
        MBartTokenizer: typeof MBartTokenizer;
        MBart50Tokenizer: typeof MBart50Tokenizer;
        RobertaTokenizer: typeof RobertaTokenizer;
        WhisperTokenizer: typeof WhisperTokenizer;
        CodeGenTokenizer: typeof CodeGenTokenizer;
        CLIPTokenizer: typeof CLIPTokenizer;
        SiglipTokenizer: typeof SiglipTokenizer;
        MarianTokenizer: typeof MarianTokenizer;
        BloomTokenizer: typeof BloomTokenizer;
        NllbTokenizer: typeof NllbTokenizer;
        M2M100Tokenizer: typeof M2M100Tokenizer;
        LlamaTokenizer: typeof LlamaTokenizer;
        CodeLlamaTokenizer: typeof CodeLlamaTokenizer;
        XLMRobertaTokenizer: typeof XLMRobertaTokenizer;
        MPNetTokenizer: typeof MPNetTokenizer;
        FalconTokenizer: typeof FalconTokenizer;
        GPTNeoXTokenizer: typeof GPTNeoXTokenizer;
        EsmTokenizer: typeof EsmTokenizer;
        Wav2Vec2CTCTokenizer: typeof Wav2Vec2CTCTokenizer;
        BlenderbotTokenizer: typeof BlenderbotTokenizer;
        BlenderbotSmallTokenizer: typeof BlenderbotSmallTokenizer;
        SpeechT5Tokenizer: typeof SpeechT5Tokenizer;
        NougatTokenizer: typeof NougatTokenizer;
        VitsTokenizer: typeof VitsTokenizer;
        Qwen2Tokenizer: typeof Qwen2Tokenizer;
        GemmaTokenizer: typeof GemmaTokenizer;
        Grok1Tokenizer: typeof Grok1Tokenizer;
        CohereTokenizer: typeof CohereTokenizer;
        MgpstrTokenizer: typeof MgpstrTokenizer;
        PreTrainedTokenizer: typeof PreTrainedTokenizer;
    };
    /**
     * Instantiate one of the tokenizer classes of the library from a pretrained model.
     *
     * The tokenizer class to instantiate is selected based on the `tokenizer_class` property of the config object
     * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
     *
     * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
     * - A string, the *model id* of a pretrained tokenizer hosted inside a model repo on huggingface.co.
     *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
     *   user or organization name, like `dbmdz/bert-base-german-cased`.
     * - A path to a *directory* containing tokenizer files, e.g., `./my_model_directory/`.
     * @param {PretrainedTokenizerOptions} options Additional options for loading the tokenizer.
     *
     * @returns {Promise<PreTrainedTokenizer>} A new instance of the PreTrainedTokenizer class.
     */
    static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, legacy, }?: PretrainedTokenizerOptions): Promise<PreTrainedTokenizer>;
}
/**
 * Additional tokenizer-specific properties.
 */
type TokenizerProperties = {
    /**
     * Whether or not the `legacy` behavior of the tokenizer should be used.
     */
    legacy?: boolean | undefined;
};
type PretrainedTokenizerOptions = PretrainedOptions & TokenizerProperties;
type BPENode = {
    /**
     * The token associated with the node
     */
    token: string;
    /**
     * A positional bias for the node.
     */
    bias: number;
    /**
     * The score of the node.
     */
    score?: number | undefined;
    /**
     * The previous node in the linked list.
     */
    prev?: BPENode | undefined;
    /**
     * The next node in the linked list.
     */
    next?: BPENode | undefined;
};
type SplitDelimiterBehavior = 'removed' | 'isolated' | 'mergedWithPrevious' | 'mergedWithNext' | 'contiguous';
type PostProcessedOutput = {
    /**
     * List of token produced by the post-processor.
     */
    tokens: string[];
    /**
     * List of token type ids produced by the post-processor.
     */
    token_type_ids?: number[] | undefined;
};
type EncodingSingle = {
    /**
     * List of token ids to be fed to a model.
     */
    input_ids: number[];
    /**
     * List of token type ids to be fed to a model
     */
    attention_mask: number[];
    /**
     * List of indices specifying which tokens should be attended to by the model
     */
    token_type_ids?: number[] | undefined;
};
type Message = {
    /**
     * The role of the message (e.g., "user" or "assistant" or "system").
     */
    role: string;
    /**
     * The content of the message.
     */
    content: string;
};
declare const Normalizer_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * A base class for text normalization.
 * @abstract
 */
declare class Normalizer extends Normalizer_base {
    /**
     * Factory method for creating normalizers from config objects.
     * @static
     * @param {Object} config The configuration object for the normalizer.
     * @returns {Normalizer|null} A Normalizer object.
     * @throws {Error} If an unknown Normalizer type is specified in the config.
     */
    static fromConfig(config: Object): Normalizer | null;
    /**
     * @param {Object} config The configuration object for the normalizer.
     */
    constructor(config: Object);
    config: Object;
    /**
     * Normalize the input text.
     * @abstract
     * @param {string} text The text to normalize.
     * @returns {string} The normalized text.
     * @throws {Error} If this method is not implemented in a subclass.
     */
    normalize(text: string): string;
    /**
     * Alias for {@link Normalizer#normalize}.
     * @param {string} text The text to normalize.
     * @returns {string} The normalized text.
     */
    _call(text: string): string;
}
declare const PreTokenizer_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * A callable class representing a pre-tokenizer used in tokenization. Subclasses
 * should implement the `pre_tokenize_text` method to define the specific pre-tokenization logic.
 * @extends Callable
 */
declare class PreTokenizer extends PreTokenizer_base {
    /**
     * Factory method that returns an instance of a subclass of `PreTokenizer` based on the provided configuration.
     *
     * @static
     * @param {Object} config A configuration object for the pre-tokenizer.
     * @returns {PreTokenizer} An instance of a subclass of `PreTokenizer`.
     * @throws {Error} If the provided configuration object does not correspond to any known pre-tokenizer.
     */
    static fromConfig(config: Object): PreTokenizer;
    /**
     * Method that should be implemented by subclasses to define the specific pre-tokenization logic.
     *
     * @abstract
     * @param {string} text The text to pre-tokenize.
     * @param {Object} [options] Additional options for the pre-tokenization logic.
     * @returns {string[]} The pre-tokenized text.
     * @throws {Error} If the method is not implemented in the subclass.
     */
    pre_tokenize_text(text: string, options?: Object | undefined): string[];
    /**
     * Tokenizes the given text into pre-tokens.
     * @param {string|string[]} text The text or array of texts to pre-tokenize.
     * @param {Object} [options] Additional options for the pre-tokenization logic.
     * @returns {string[]} An array of pre-tokens.
     */
    pre_tokenize(text: string | string[], options?: Object | undefined): string[];
    /**
     * Alias for {@link PreTokenizer#pre_tokenize}.
     * @param {string|string[]} text The text or array of texts to pre-tokenize.
     * @param {Object} [options] Additional options for the pre-tokenization logic.
     * @returns {string[]} An array of pre-tokens.
     */
    _call(text: string | string[], options?: Object | undefined): string[];
}
declare const PostProcessor_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * @typedef {Object} PostProcessedOutput
 * @property {string[]} tokens List of token produced by the post-processor.
 * @property {number[]} [token_type_ids] List of token type ids produced by the post-processor.
 */
/**
 * @typedef {Object} EncodingSingle
 * @property {number[]} input_ids List of token ids to be fed to a model.
 * @property {number[]} attention_mask List of token type ids to be fed to a model
 * @property {number[]} [token_type_ids] List of indices specifying which tokens should be attended to by the model
 */
/**
 * @extends Callable
 */
declare class PostProcessor extends PostProcessor_base {
    /**
     * Factory method to create a PostProcessor object from a configuration object.
     *
     * @param {Object} config Configuration object representing a PostProcessor.
     * @returns {PostProcessor} A PostProcessor object created from the given configuration.
     * @throws {Error} If an unknown PostProcessor type is encountered.
     */
    static fromConfig(config: Object): PostProcessor;
    /**
     * @param {Object} config The configuration for the post-processor.
     */
    constructor(config: Object);
    config: Object;
    /**
     * Method to be implemented in subclass to apply post-processing on the given tokens.
     *
     * @param {Array} tokens The input tokens to be post-processed.
     * @param {...*} args Additional arguments required by the post-processing logic.
     * @returns {PostProcessedOutput} The post-processed tokens.
     * @throws {Error} If the method is not implemented in subclass.
     */
    post_process(tokens: any[], ...args: any[]): PostProcessedOutput;
    /**
     * Alias for {@link PostProcessor#post_process}.
     * @param {Array} tokens The text or array of texts to post-process.
     * @param {...*} args Additional arguments required by the post-processing logic.
     * @returns {PostProcessedOutput} The post-processed tokens.
     */
    _call(tokens: any[], ...args: any[]): PostProcessedOutput;
}
declare const Decoder_base: new () => {
    (...args: any[]): any;
    _call(...args: any[]): any;
};
/**
 * The base class for token decoders.
 * @extends Callable
 */
declare class Decoder extends Decoder_base {
    /**
     * Creates a decoder instance based on the provided configuration.
     *
     * @param {Object} config The configuration object.
     * @returns {Decoder} A decoder instance.
     * @throws {Error} If an unknown decoder type is provided.
     */
    static fromConfig(config: Object): Decoder;
    /**
     * Creates an instance of `Decoder`.
     *
     * @param {Object} config The configuration object.
     */
    constructor(config: Object);
    config: Object;
    /** @type {AddedToken[]} */
    added_tokens: AddedToken[];
    end_of_word_suffix: any;
    trim_offsets: any;
    /**
     * Calls the `decode` method.
     *
     * @param {string[]} tokens The list of tokens.
     * @returns {string} The decoded string.
     */
    _call(tokens: string[]): string;
    /**
     * Decodes a list of tokens.
     * @param {string[]} tokens The list of tokens.
     * @returns {string} The decoded string.
     */
    decode(tokens: string[]): string;
    /**
     * Apply the decoder to a list of tokens.
     *
     * @param {string[]} tokens The list of tokens.
     * @returns {string[]} The decoded list of tokens.
     * @throws {Error} If the `decode_chain` method is not implemented in the subclass.
     */
    decode_chain(tokens: string[]): string[];
}
/**
 * Represent a token added by the user on top of the existing Model vocabulary.
 * AddedToken can be configured to specify the behavior they should have in various situations like:
 *   - Whether they should only match single words
 *   - Whether to include any whitespace on its left or right
 */
declare class AddedToken {
    /**
     * Creates a new instance of AddedToken.
     * @param {Object} config Added token configuration object.
     * @param {string} config.content The content of the added token.
     * @param {number} config.id The id of the added token.
     * @param {boolean} [config.single_word=false] Whether this token must be a single word or can break words.
     * @param {boolean} [config.lstrip=false] Whether this token should strip whitespaces on its left.
     * @param {boolean} [config.rstrip=false] Whether this token should strip whitespaces on its right.
     * @param {boolean} [config.normalized=false] Whether this token should be normalized.
     * @param {boolean} [config.special=false] Whether this token is special.
     */
    constructor(config: {
        content: string;
        id: number;
        single_word?: boolean | undefined;
        lstrip?: boolean | undefined;
        rstrip?: boolean | undefined;
        normalized?: boolean | undefined;
        special?: boolean | undefined;
    });
    content: string;
    id: number;
    single_word: boolean;
    lstrip: boolean;
    rstrip: boolean;
    special: boolean;
    normalized: boolean | null;
}

type tokenizers_AlbertTokenizer = AlbertTokenizer;
declare const tokenizers_AlbertTokenizer: typeof AlbertTokenizer;
type tokenizers_AutoTokenizer = AutoTokenizer;
declare const tokenizers_AutoTokenizer: typeof AutoTokenizer;
type tokenizers_BPENode = BPENode;
type tokenizers_BartTokenizer = BartTokenizer;
declare const tokenizers_BartTokenizer: typeof BartTokenizer;
type tokenizers_BertTokenizer = BertTokenizer;
declare const tokenizers_BertTokenizer: typeof BertTokenizer;
type tokenizers_BlenderbotSmallTokenizer = BlenderbotSmallTokenizer;
declare const tokenizers_BlenderbotSmallTokenizer: typeof BlenderbotSmallTokenizer;
type tokenizers_BlenderbotTokenizer = BlenderbotTokenizer;
declare const tokenizers_BlenderbotTokenizer: typeof BlenderbotTokenizer;
type tokenizers_BloomTokenizer = BloomTokenizer;
declare const tokenizers_BloomTokenizer: typeof BloomTokenizer;
type tokenizers_CLIPTokenizer = CLIPTokenizer;
declare const tokenizers_CLIPTokenizer: typeof CLIPTokenizer;
type tokenizers_CamembertTokenizer = CamembertTokenizer;
declare const tokenizers_CamembertTokenizer: typeof CamembertTokenizer;
type tokenizers_CodeGenTokenizer = CodeGenTokenizer;
declare const tokenizers_CodeGenTokenizer: typeof CodeGenTokenizer;
type tokenizers_CodeLlamaTokenizer = CodeLlamaTokenizer;
declare const tokenizers_CodeLlamaTokenizer: typeof CodeLlamaTokenizer;
type tokenizers_CohereTokenizer = CohereTokenizer;
declare const tokenizers_CohereTokenizer: typeof CohereTokenizer;
type tokenizers_ConvBertTokenizer = ConvBertTokenizer;
declare const tokenizers_ConvBertTokenizer: typeof ConvBertTokenizer;
type tokenizers_DebertaTokenizer = DebertaTokenizer;
declare const tokenizers_DebertaTokenizer: typeof DebertaTokenizer;
type tokenizers_DebertaV2Tokenizer = DebertaV2Tokenizer;
declare const tokenizers_DebertaV2Tokenizer: typeof DebertaV2Tokenizer;
type tokenizers_DistilBertTokenizer = DistilBertTokenizer;
declare const tokenizers_DistilBertTokenizer: typeof DistilBertTokenizer;
type tokenizers_ElectraTokenizer = ElectraTokenizer;
declare const tokenizers_ElectraTokenizer: typeof ElectraTokenizer;
type tokenizers_EncodingSingle = EncodingSingle;
type tokenizers_EsmTokenizer = EsmTokenizer;
declare const tokenizers_EsmTokenizer: typeof EsmTokenizer;
type tokenizers_FalconTokenizer = FalconTokenizer;
declare const tokenizers_FalconTokenizer: typeof FalconTokenizer;
type tokenizers_GPT2Tokenizer = GPT2Tokenizer;
declare const tokenizers_GPT2Tokenizer: typeof GPT2Tokenizer;
type tokenizers_GPTNeoXTokenizer = GPTNeoXTokenizer;
declare const tokenizers_GPTNeoXTokenizer: typeof GPTNeoXTokenizer;
type tokenizers_GemmaTokenizer = GemmaTokenizer;
declare const tokenizers_GemmaTokenizer: typeof GemmaTokenizer;
type tokenizers_Grok1Tokenizer = Grok1Tokenizer;
declare const tokenizers_Grok1Tokenizer: typeof Grok1Tokenizer;
type tokenizers_HerbertTokenizer = HerbertTokenizer;
declare const tokenizers_HerbertTokenizer: typeof HerbertTokenizer;
type tokenizers_LlamaTokenizer = LlamaTokenizer;
declare const tokenizers_LlamaTokenizer: typeof LlamaTokenizer;
type tokenizers_M2M100Tokenizer = M2M100Tokenizer;
declare const tokenizers_M2M100Tokenizer: typeof M2M100Tokenizer;
type tokenizers_MBart50Tokenizer = MBart50Tokenizer;
declare const tokenizers_MBart50Tokenizer: typeof MBart50Tokenizer;
type tokenizers_MBartTokenizer = MBartTokenizer;
declare const tokenizers_MBartTokenizer: typeof MBartTokenizer;
type tokenizers_MPNetTokenizer = MPNetTokenizer;
declare const tokenizers_MPNetTokenizer: typeof MPNetTokenizer;
type tokenizers_MarianTokenizer = MarianTokenizer;
declare const tokenizers_MarianTokenizer: typeof MarianTokenizer;
type tokenizers_Message = Message;
type tokenizers_MgpstrTokenizer = MgpstrTokenizer;
declare const tokenizers_MgpstrTokenizer: typeof MgpstrTokenizer;
type tokenizers_MobileBertTokenizer = MobileBertTokenizer;
declare const tokenizers_MobileBertTokenizer: typeof MobileBertTokenizer;
type tokenizers_NllbTokenizer = NllbTokenizer;
declare const tokenizers_NllbTokenizer: typeof NllbTokenizer;
type tokenizers_NougatTokenizer = NougatTokenizer;
declare const tokenizers_NougatTokenizer: typeof NougatTokenizer;
type tokenizers_PostProcessedOutput = PostProcessedOutput;
type tokenizers_PreTrainedTokenizer = PreTrainedTokenizer;
declare const tokenizers_PreTrainedTokenizer: typeof PreTrainedTokenizer;
type tokenizers_PretrainedTokenizerOptions = PretrainedTokenizerOptions;
type tokenizers_Qwen2Tokenizer = Qwen2Tokenizer;
declare const tokenizers_Qwen2Tokenizer: typeof Qwen2Tokenizer;
type tokenizers_RoFormerTokenizer = RoFormerTokenizer;
declare const tokenizers_RoFormerTokenizer: typeof RoFormerTokenizer;
type tokenizers_RobertaTokenizer = RobertaTokenizer;
declare const tokenizers_RobertaTokenizer: typeof RobertaTokenizer;
type tokenizers_SiglipTokenizer = SiglipTokenizer;
declare const tokenizers_SiglipTokenizer: typeof SiglipTokenizer;
type tokenizers_SpeechT5Tokenizer = SpeechT5Tokenizer;
declare const tokenizers_SpeechT5Tokenizer: typeof SpeechT5Tokenizer;
type tokenizers_SplitDelimiterBehavior = SplitDelimiterBehavior;
type tokenizers_SqueezeBertTokenizer = SqueezeBertTokenizer;
declare const tokenizers_SqueezeBertTokenizer: typeof SqueezeBertTokenizer;
type tokenizers_T5Tokenizer = T5Tokenizer;
declare const tokenizers_T5Tokenizer: typeof T5Tokenizer;
type tokenizers_TokenizerModel = TokenizerModel;
declare const tokenizers_TokenizerModel: typeof TokenizerModel;
type tokenizers_TokenizerProperties = TokenizerProperties;
type tokenizers_VitsTokenizer = VitsTokenizer;
declare const tokenizers_VitsTokenizer: typeof VitsTokenizer;
type tokenizers_Wav2Vec2CTCTokenizer = Wav2Vec2CTCTokenizer;
declare const tokenizers_Wav2Vec2CTCTokenizer: typeof Wav2Vec2CTCTokenizer;
type tokenizers_WhisperTokenizer = WhisperTokenizer;
declare const tokenizers_WhisperTokenizer: typeof WhisperTokenizer;
type tokenizers_XLMRobertaTokenizer = XLMRobertaTokenizer;
declare const tokenizers_XLMRobertaTokenizer: typeof XLMRobertaTokenizer;
type tokenizers_XLMTokenizer = XLMTokenizer;
declare const tokenizers_XLMTokenizer: typeof XLMTokenizer;
declare const tokenizers_is_chinese_char: typeof is_chinese_char;
declare namespace tokenizers {
  export { tokenizers_AlbertTokenizer as AlbertTokenizer, tokenizers_AutoTokenizer as AutoTokenizer, type tokenizers_BPENode as BPENode, tokenizers_BartTokenizer as BartTokenizer, tokenizers_BertTokenizer as BertTokenizer, tokenizers_BlenderbotSmallTokenizer as BlenderbotSmallTokenizer, tokenizers_BlenderbotTokenizer as BlenderbotTokenizer, tokenizers_BloomTokenizer as BloomTokenizer, tokenizers_CLIPTokenizer as CLIPTokenizer, tokenizers_CamembertTokenizer as CamembertTokenizer, tokenizers_CodeGenTokenizer as CodeGenTokenizer, tokenizers_CodeLlamaTokenizer as CodeLlamaTokenizer, tokenizers_CohereTokenizer as CohereTokenizer, tokenizers_ConvBertTokenizer as ConvBertTokenizer, tokenizers_DebertaTokenizer as DebertaTokenizer, tokenizers_DebertaV2Tokenizer as DebertaV2Tokenizer, tokenizers_DistilBertTokenizer as DistilBertTokenizer, tokenizers_ElectraTokenizer as ElectraTokenizer, type tokenizers_EncodingSingle as EncodingSingle, tokenizers_EsmTokenizer as EsmTokenizer, tokenizers_FalconTokenizer as FalconTokenizer, tokenizers_GPT2Tokenizer as GPT2Tokenizer, tokenizers_GPTNeoXTokenizer as GPTNeoXTokenizer, tokenizers_GemmaTokenizer as GemmaTokenizer, tokenizers_Grok1Tokenizer as Grok1Tokenizer, tokenizers_HerbertTokenizer as HerbertTokenizer, tokenizers_LlamaTokenizer as LlamaTokenizer, tokenizers_M2M100Tokenizer as M2M100Tokenizer, tokenizers_MBart50Tokenizer as MBart50Tokenizer, tokenizers_MBartTokenizer as MBartTokenizer, tokenizers_MPNetTokenizer as MPNetTokenizer, tokenizers_MarianTokenizer as MarianTokenizer, type tokenizers_Message as Message, tokenizers_MgpstrTokenizer as MgpstrTokenizer, tokenizers_MobileBertTokenizer as MobileBertTokenizer, tokenizers_NllbTokenizer as NllbTokenizer, tokenizers_NougatTokenizer as NougatTokenizer, type tokenizers_PostProcessedOutput as PostProcessedOutput, tokenizers_PreTrainedTokenizer as PreTrainedTokenizer, type tokenizers_PretrainedTokenizerOptions as PretrainedTokenizerOptions, tokenizers_Qwen2Tokenizer as Qwen2Tokenizer, tokenizers_RoFormerTokenizer as RoFormerTokenizer, tokenizers_RobertaTokenizer as RobertaTokenizer, tokenizers_SiglipTokenizer as SiglipTokenizer, tokenizers_SpeechT5Tokenizer as SpeechT5Tokenizer, type tokenizers_SplitDelimiterBehavior as SplitDelimiterBehavior, tokenizers_SqueezeBertTokenizer as SqueezeBertTokenizer, tokenizers_T5Tokenizer as T5Tokenizer, tokenizers_TokenizerModel as TokenizerModel, type tokenizers_TokenizerProperties as TokenizerProperties, tokenizers_VitsTokenizer as VitsTokenizer, tokenizers_Wav2Vec2CTCTokenizer as Wav2Vec2CTCTokenizer, tokenizers_WhisperTokenizer as WhisperTokenizer, tokenizers_XLMRobertaTokenizer as XLMRobertaTokenizer, tokenizers_XLMTokenizer as XLMTokenizer, tokenizers_is_chinese_char as is_chinese_char };
}

type TokenizerMapping = typeof AutoTokenizer.TOKENIZER_CLASS_MAPPING;
type SupportedTokenizerClasses = keyof TokenizerMapping;
type TokenizerClassNameMapping<T extends string> = T extends SupportedTokenizerClasses ? InstanceType<TokenizerMapping[T]> : PreTrainedTokenizer;
type TokenizerConfigMapping<Config extends {
    tokenizer_class: string;
}> = Config["tokenizer_class"] extends SupportedTokenizerClasses ? TokenizerMapping[Config["tokenizer_class"]] : PreTrainedTokenizer;
type ValueOf<T> = T[keyof T];
type FromPreTrainedFn<M extends InstanceType<ValueOf<TokenizerMapping>>> = (params?: {
    tokenizerJSON?: Partial<NSTokenizerJSON.Root>;
    tokenizerConfig?: Partial<NSTokenizerConfig.Root>;
}) => M;
declare namespace NSTokenizerConfig {
    type Root = {
        add_prefix_space?: any;
        bos_token?: any;
        clean_up_tokenization_spaces: boolean;
        eos_token: any;
        model_max_length: number;
        tokenizer_class: string;
        unk_token: any;
        chat_template?: any;
        add_bos_token?: boolean;
        add_eos_token?: boolean;
        added_tokens_decoder?: {
            [key: string]: AddedTokensDecoder;
        };
        legacy?: boolean | null;
        merges_file?: null;
        pad_token?: any;
        sp_model_kwargs?: any;
        spaces_between_special_tokens?: boolean;
        use_default_system_prompt?: boolean;
        vocab_file?: null;
        auto_map?: any;
        do_lower_case?: boolean;
        padding_side?: string;
        remove_space?: boolean;
        additional_special_tokens?: string[];
        errors?: string;
        split_special_tokens?: boolean;
    };
    type AddedTokensDecoder = {
        content: string;
        lstrip: boolean;
        normalized: boolean;
        rstrip: boolean;
        single_word: boolean;
        special: boolean;
    };
    type AutoMap = {
        AutoTokenizer: Array<null | string>;
    };
    type ChatTemplateElement = {
        name: string;
        template: string;
    };
    type SPModelKwargs = {};
}
declare namespace NSTokenizerJSON {
    type Root = {
        version: string;
        truncation: null;
        padding: null;
        added_tokens: any[];
        normalizer: any;
        pre_tokenizer: any;
        post_processor: any;
        decoder: any;
        model: any;
    };
    type AddedToken = {
        id: number;
        content: string;
        single_word: boolean;
        lstrip: boolean;
        rstrip: boolean;
        normalized: boolean;
        special: boolean;
    };
    type PretokenizerElement = {
        type: string;
        decoders?: DecoderDecoder[];
        add_prefix_space?: boolean;
        trim_offsets?: boolean;
        use_regex?: boolean;
        individual_digits?: boolean;
    };
    type DecoderDecoder = {
        type: string;
        pattern?: Pattern;
        content?: string;
        start?: number;
        stop?: number;
    };
    type Pattern = {
        String: string;
    };
    type Model = {
        type: string;
        dropout: null;
        unk_token: any;
        continuing_subword_prefix: null;
        end_of_word_suffix: null;
        fuse_unk: boolean;
        byte_fallback: boolean;
        vocab: {
            [key: string]: number;
        };
        merges: string[];
    };
    type TopLevelNormalizer = {
        type: string;
        normalizers?: NormalizerElement[];
    };
    type NormalizerElement = {
        type: string;
        prepend?: string;
        pattern?: Pattern;
        content?: string;
    };
    type PostProcessor = {
        type: string;
        single: Pair[];
        pair: Pair[];
        special_tokens: {
            [key: string]: SpecialToken;
        };
        add_prefix_space?: boolean;
        trim_offsets?: boolean;
        use_regex?: boolean;
    };
    type Pair = {
        SpecialToken?: Sequence;
        Sequence?: Sequence;
    };
    type Sequence = {
        id: string;
        type_id: number;
    };
    type SpecialToken = {
        id: string;
        ids: number[];
        tokens: string[];
    };
    type PreTokenizer = {
        type: string;
        pretokenizers?: PretokenizerElement[];
    };
}

interface ITokenizerModelJsonData {
    tokenizerJSON: Partial<NSTokenizerJSON.Root>;
    tokenizerConfig: Partial<NSTokenizerConfig.Root>;
}
interface ITokenizerModelUrls {
    tokenizerJSON: string;
    tokenizerConfig: string;
}
declare class TokenizerLoader {
    /**
     * Creates a pre-trained tokenizer from the provided model data.
     *
     * @param {ITokenizerModelJsonData} model - The model data containing the tokenizer JSON and configuration.
     * @return {PreTrainedTokenizer} pre-trained tokenizer.
     * @throws {Error} If the tokenizer JSON or configuration is missing.
     */
    static fromPreTrained(model: ITokenizerModelJsonData): PreTrainedTokenizer;
    /**
     * Creates a pre-trained tokenizer from the provided model URLs.
     *
     * @param {ITokenizerModelUrls} model - The model URLs containing the tokenizer JSON and configuration.
     * @param {Object} [options] - Optional parameters.
     * @param {any} [options.fetch] - The fetch function to use for making HTTP requests. Defaults to global.fetch.
     * @param {Partial<ITokenizerModelJsonData>} [options.tokenizerJSON] - Additional tokenizer JSON data to merge with the fetched data.
     * @param {Partial<ITokenizerModelJsonData>} [options.tokenizerConfig] - Additional tokenizer configuration data to merge with the fetched data.
     * @return {Promise<PreTrainedTokenizer>} A promise that resolves to the pre-trained tokenizer.
     */
    static fromPreTrainedUrls(model: ITokenizerModelUrls, options?: {
        fetch?: any;
    } & Partial<ITokenizerModelJsonData>): Promise<PreTrainedTokenizer>;
}

export { type FromPreTrainedFn, NSTokenizerConfig, NSTokenizerJSON, type SupportedTokenizerClasses, type TokenizerClassNameMapping, type TokenizerConfigMapping, TokenizerLoader, type TokenizerMapping, tokenizers };
