import { PreTrainedTokenizer } from "@huggingface/transformers";
type TransformersJsTokenizer = PreTrainedTokenizer;
type CallableTokenizerCounter = (text: string) => number;
type CallableTokenizerEncoder = (text: string) => number[];
type CallableTokenizerDecoder = (tokens: number[]) => string;
type SupportedTokenizerInstance = BaseTokenizer | TransformersJsTokenizer;
export type CallableTokenizer = {
    encode?: CallableTokenizerEncoder;
    decode?: CallableTokenizerDecoder;
    countTokens: CallableTokenizerCounter;
};
/**
 * @abstract
 * Base class for tokenizers, providing a foundational structure for text tokenization.
 * It manages a vocabulary of tokens and their corresponding IDs.
 * This class is intended to be extended by specific tokenizer implementations
 * (e.g., CharacterTokenizer, WordTokenizer).
 *
 * @property {string[]} vocab - An array storing the tokens in the vocabulary.
 * @property {Map<string, number>} token2id - A map that stores the mapping from tokens (strings) to their unique IDs (numbers).
 */
export declare abstract class BaseTokenizer {
    vocab: string[];
    token2id: Map<string, number>;
    /**
     * Initializes the BaseTokenizer.
     * Sets up an empty vocabulary (`this.vocab`) and an empty token-to-ID map (`this.token2id`).
     * It also adds a space character (" ") as the initial token to the vocabulary.
     */
    constructor();
    /**
     * Adds a token to the vocabulary if it's not already present.
     * @param token The token to add.
     * @returns The ID of the token.
     */
    protected addTokenToVocab(token: string): number;
    /**
     * Return a string representation of the BaseTokenizer.
     * @returns String representation.
     */
    toString(): string;
    /**
     * Return the vocabulary.
     * @returns The vocabulary.
     */
    getVocab(): readonly string[];
    /**
     * Return token-to-id mapping.
     * @returns The token-to-ID map.
     */
    getToken2id(): ReadonlyMap<string, number>;
    /**
     * Encode the given text into tokens.
     * @param text The text to encode.
     * @returns Encoded sequence of token IDs.
     */
    abstract encode(text: string): number[];
    /**
     * Decode the given tokens back into text.
     * @param tokens The tokens to decode.
     * @returns Decoded text.
     */
    abstract decode(tokens: number[]): string;
    /**
     * Count the number of tokens in the given text.
     * @param text The text to count tokens in.
     * @returns Number of tokens.
     */
    abstract countTokens(text: string): number;
    /**
     * Batch encode a list of texts into tokens.
     * @param texts The texts to encode.
     * @returns List of encoded sequences.
     */
    encodeBatch(texts: string[]): number[][];
    /**
     * Batch decode a list of tokens back into text.
     * @param tokenSequences The tokens to decode.
     * @returns List of decoded texts.
     */
    decodeBatch(tokenSequences: number[][]): string[];
    /**
     * Count the number of tokens in a batch of texts.
     * @param texts The texts to count tokens in.
     * @returns List of token counts.
     */
    countTokensBatch(texts: string[]): number[];
}
/**
 * Character-based tokenizer.
 */
export declare class CharacterTokenizer extends BaseTokenizer {
    /**
     * Encode the given text into tokens.
     * @param text The text to encode.
     * @returns Encoded sequence of character IDs.
     */
    encode(text: string): number[];
    /**
     * Decode the given tokens back into text.
     * @param tokens The tokens to decode.
     * @returns Decoded text.
     */
    decode(tokens: number[]): string;
    /**
     * Count the number of tokens in the given text.
     * For CharacterTokenizer, this is the length of the text.
     * @param text The text to count tokens in.
     * @returns Number of characters (tokens).
     */
    countTokens(text: string): number;
}
/**
 * Word-based tokenizer.
 */
export declare class WordTokenizer extends BaseTokenizer {
    /**
     * Tokenize the given text into words.
     * Splits the text by spaces.
     * @param text The text to tokenize.
     * @returns List of word tokens.
     */
    tokenize(text: string): string[];
    /**
     * Encode the given text into tokens.
     * @param text The text to encode.
     * @returns Encoded sequence of word IDs.
     */
    encode(text: string): number[];
    /**
     * Decode token ids back to text.
     * Joins tokens with spaces.
     * @param tokens The tokens to decode.
     * @returns Decoded text.
     */
    decode(tokens: number[]): string;
    /**
     * Count the number of tokens in the given text.
     * For WordTokenizer, this is the number of words after splitting by space.
     * @param text The text to count tokens in.
     * @returns Number of words (tokens).
     */
    countTokens(text: string): number;
}
/**
 * Unified tokenizer interface for Chonkie.
 * This class provides a consistent API for various tokenization backends.
 */
export declare class Tokenizer {
    private tokenizerInstance;
    private _backend;
    /**
     * Private constructor. Use `Tokenizer.create()` to instantiate.
     * @param tokenizerInstance The underlying tokenizer instance.
     * @param backend The name of the backend being used.
     */
    private constructor();
    /**
     * Creates and initializes a Tokenizer instance.
     * @param tokenizer Tokenizer identifier (e.g., "google-bert/bert-base-uncased", "character", "word"),
     *                  a pre-initialized tokenizer instance, or a custom callable tokenizer.
     *                  Defaults to "google-bert/bert-base-uncased".
     * @returns A promise that resolves to a Tokenizer instance.
     * @throws Error if the specified tokenizer cannot be loaded or is unsupported.
     */
    static create(tokenizer?: string | SupportedTokenizerInstance | CallableTokenizer): Promise<Tokenizer>;
    /**
     * Loads the tokenizer based on the identifier string.
     * Tries loading from 'tokenizers', then 'transformers'.
     * Also supports 'character' and 'word' for basic tokenizers.
     * @param tokenizerName The name or path of the tokenizer to load.
     * @returns A promise that resolves to a supported tokenizer instance.
     * @throws Error if the tokenizer cannot be found or loaded.
     */
    private static _loadTokenizer;
    /**
     * Determines the backend name from a tokenizer instance.
     * @param instance The tokenizer instance.
     * @returns The backend name (e.g., "chonkie", "tokenizers", "transformers", "callable").
     * @throws Error if the instance type is unsupported.
     */
    private static _getBackendFromInstance;
    /**
     * Gets the name of the backend currently used by this tokenizer instance.
     * @returns The backend name.
     */
    get backend(): string;
    /**
     * Encode the text into tokens.
     * @param text The text to encode.
     * @returns A promise that resolves to an array of token IDs.
     * @throws Error if encoding is not supported by the backend or fails.
     */
    encode(text: string): Promise<number[]>;
    /**
     * Decode the tokens back into text.
     * @param tokens An array of token IDs.
     * @returns A promise that resolves to the decoded string.
     * @throws Error if decoding is not supported by the backend or fails.
     */
    decode(tokens: number[]): Promise<string>;
    /**
     * Count the number of tokens in the text.
     * @param text The text to count tokens in.
     * @returns A promise that resolves to the number of tokens.
     * @throws Error if token counting is not supported by the backend or fails.
     */
    countTokens(text: string): Promise<number>;
    /**
     * Batch encode a list of texts into tokens.
     * @param texts An array of strings to encode.
     * @returns A promise that resolves to a list of encoded token ID sequences.
     * @throws Error if batch encoding is not supported by the backend or fails.
     */
    encodeBatch(texts: string[]): Promise<number[][]>;
    /**
     * Batch decode a list of token sequences back into text.
     * @param tokenSequences An array of token ID sequences.
     * @returns A promise that resolves to a list of decoded strings.
     * @throws Error if batch decoding is not supported by the backend or fails.
     */
    decodeBatch(tokenSequences: number[][]): Promise<string[]>;
    /**
     * Count the number of tokens in a batch of texts.
     * @param texts An array of strings to count tokens in.
     * @returns A promise that resolves to a list of token counts.
     * @throws Error if batch token counting is not supported by the backend or fails.
     */
    countTokensBatch(texts: string[]): Promise<number[]>;
}
export {};
