import { SentenceSplitter } from "../misc/Misc";
import { DTYPE } from "./types/qantization-types";
import { GenerateOptions, MessageStatus, Message } from "./types/types";
/**
 * High-level client wrapper for the LLM web worker.
 *
 * Features:
 * - Initialization with progress handled by the worker.
 * - Text generation with streaming and per-request callbacks.
 * - Status tracking: each request ID is recorded as `pending | streaming | completed | error | cancelled`.
 * - Auto-cancel previous in-flight request before starting a new one.
 * - Streams **only complete sentences** to onDelta.
 */
export declare class LlmClient {
    private _mediapipeClient;
    private _currentPipeline;
    private _worker;
    private _syncWorker;
    private _sentenceSplitter;
    private _silencedCancels;
    private _cancelledIds;
    /**
     * In-flight request state.
     * - streamBuffer: raw text accumulated from worker deltas (or final text on 'response')
     * - emittedCount: number of complete sentences already forwarded to onDelta
     */
    private _pending;
    messageStatus: Record<string, {
        id: string;
        status: MessageStatus;
        error?: string;
    }>;
    private _activeId;
    static readonly builtinModels: {
        readonly "gemma3-1b": {
            readonly model: "gemma3-1b";
            readonly dtype: "int8";
            readonly pipeline: "text";
        };
        readonly "gemma3-270M": {
            readonly model: "gemma-3-270m-it-ONNX";
            readonly dtype: "fp16";
            readonly pipeline: "text";
        };
        readonly "Qwen2.5-0.5B-Instruct": {
            readonly model: "Qwen2.5-0.5B-Instruct";
            readonly dtype: "q4";
            readonly pipeline: "text";
        };
        readonly "mediapipe-gemma3n-E4B-it": {
            readonly model: "/aic-runtime-deps/llm-deps/dist-mediapipe-genai/gemma-3n-E4B-it-int4-Web.litertlm";
            readonly dtype: "int4";
            readonly pipeline: "mediapipe";
        };
    };
    static readonly defaultModelProvider: {
        readonly model: "gemma-3-270m-it-ONNX";
        readonly dtype: "fp16";
        readonly pipeline: "text";
    };
    constructor(workerPath?: string, sync?: boolean, sentenceSplitter?: SentenceSplitter);
    /**
     * Try to emit any newly-completed sentences from entry.streamBuffer.
     * Updates entry.emittedCount and calls onDelta with the new text (joined).
     */
    private _emitCompletedIfAny;
    private _handleMessage;
    /** Initialize a given model
     *
     * @param modelPath - one of the predefined models or a huggingface id to an llm,
     *  like onnx-community/gemma-3-270m-it-ONNX. Some hf ones work, some dont.
     * @param dtype - The quantization level like int8 or q4f16.
     * @param pipeline - different models need different pipelines. For example, and in theory, the
     * gemma3n models require the AutoProcessor and the ImageTextToText transformersjs onnx engine,
     * which I called "auto" here. The normal Gemma 3 onnx models use the "text" pipeline. The Gemma3n
     * onnx models don't work with transformersjs onnx backend yet (if ever, it's a constant struggle with that thing),
     * so use the mediapipe litertlm one instead.
     * @returns
     */
    initialize(modelPath: (typeof LlmClient)["builtinModels"][keyof (typeof LlmClient)["builtinModels"]]["model"] | string, dtype?: DTYPE, pipeline?: "text" | "auto" | "mediapipe"): Promise<{
        type: "response" | "error";
        data: string;
    }>;
    cancel(requestId: string): Promise<void>;
    generateResponse(messages: Message[], maxTokens?: number, options?: GenerateOptions): Promise<string>;
    terminate(): void;
    getStatus(id: string): MessageStatus | undefined;
}
//# sourceMappingURL=llm-client.d.ts.map