/**
 * Simple eval runner for AI Functions
 *
 * Runs evals across multiple models and collects results.
 * Does not depend on evalite - uses our own infrastructure.
 */
import { generateObject, generateText } from '../generate.js';
import { schema } from '../schema.js';
import { type EvalModel, type ModelTier } from './models.js';
/**
 * Output function type for eval progress reporting
 */
export type EvalOutputFn = (message: string) => void;
export interface EvalCase<TInput = unknown, TExpected = unknown> {
    name: string;
    input: TInput;
    expected?: TExpected;
}
export interface EvalScore {
    name: string;
    score: number;
    description?: string;
    metadata?: unknown;
}
export interface EvalResult<TOutput = unknown> {
    model: EvalModel;
    case: EvalCase;
    /** The output from the task. Will be null if an error occurred. */
    output: TOutput | null;
    scores: EvalScore[];
    latencyMs: number;
    cost: number;
    error?: string;
}
export interface EvalSummary {
    name: string;
    results: EvalResult[];
    avgScore: number;
    byModel: Record<string, {
        avgScore: number;
        count: number;
    }>;
    totalCost: number;
    totalTime: number;
}
export interface RunEvalOptions<TInput, TOutput, TExpected> {
    name: string;
    cases: EvalCase<TInput, TExpected>[];
    task: (input: TInput, model: EvalModel) => Promise<TOutput>;
    scorers: Array<{
        name: string;
        description?: string;
        scorer: (args: {
            input: TInput;
            output: TOutput;
            expected?: TExpected;
        }) => number | Promise<number>;
    }>;
    models?: EvalModel[];
    tiers?: ModelTier[];
    providers?: string[];
    concurrency?: number;
    /** Custom output function for progress reporting (defaults to logger.info) */
    output?: EvalOutputFn;
    /** Whether to suppress progress output (defaults to false) */
    quiet?: boolean;
}
/**
 * Run an eval suite across models
 */
export declare function runEval<TInput, TOutput, TExpected>(options: RunEvalOptions<TInput, TOutput, TExpected>): Promise<EvalSummary>;
export { generateObject, generateText, schema };
//# sourceMappingURL=runner.d.ts.map