import type { Data, DataStructure, DataValue } from "../models/dataset";
import type { CombinedLocalEvaluatorType, HumanEvaluationConfig, LocalEvaluationResult, LocalEvaluatorType, PassFailCriteriaType } from "./evaluator";
export interface TestRunLogger<T extends DataStructure | undefined = undefined> {
    info: (message: string) => void;
    error: (message: string) => void;
    processed: (message: string, data: {
        datasetEntry: Data<T>;
        output?: YieldedOutput;
        evaluationResults?: LocalEvaluationResult[];
    }) => void;
}
export type YieldedOutput = {
    data: string;
    retrievedContextToEvaluate?: string | string[];
    meta?: {
        usage?: {
            promptTokens: number;
            completionTokens: number;
            totalTokens: number;
            latency?: number;
        } | {
            latency: number;
        };
        cost?: {
            input: number;
            output: number;
            total: number;
        };
    };
};
export type TestRunResult = {
    link: string;
    result: {
        name: string;
        individualEvaluatorMeanScore: {
            [key: string]: {
                pass?: boolean;
            } & ({
                score: number;
                outOf?: number;
            } | {
                score: boolean | string;
            });
        };
        usage?: {
            total: number;
            input: number;
            completion: number;
        };
        cost?: {
            total: number;
            input: number;
            completion: number;
        };
        latency?: {
            min: number;
            max: number;
            p50: number;
            p90: number;
            p95: number;
            p99: number;
            mean: number;
            standardDeviation: number;
            total: number;
        };
    }[];
};
export type TestRunConfig<T extends DataStructure | undefined = undefined> = {
    baseUrl: string;
    apiKey: string;
    workspaceId: string;
    name: string;
    testConfigId?: string;
    dataStructure?: T;
    data?: DataValue<T>;
    evaluators: (LocalEvaluatorType<T> | CombinedLocalEvaluatorType<T, Record<string, PassFailCriteriaType>> | string)[];
    humanEvaluationConfig?: HumanEvaluationConfig;
    outputFunction?: (data: Data<T>) => YieldedOutput | Promise<YieldedOutput>;
    promptVersion?: {
        id: string;
        contextToEvaluate?: string;
    };
    promptChainVersion?: {
        id: string;
        contextToEvaluate?: string;
    };
    workflow?: {
        id: string;
        contextToEvaluate?: string;
    };
    logger?: TestRunLogger<T>;
    concurrency?: number;
};
export type TestRunBuilder<T extends DataStructure | undefined = undefined> = {
    withDataStructure: <U extends DataStructure>(dataStructure: U) => TestRunBuilder<U>;
    withData: (data: TestRunConfig<T>["data"]) => TestRunBuilder<T>;
    withEvaluators: (...evaluators: TestRunConfig<T>["evaluators"]) => TestRunBuilder<T>;
    withHumanEvaluationConfig: (humanEvaluationConfig: HumanEvaluationConfig) => TestRunBuilder<T>;
    yieldsOutput: (outputFunction: TestRunConfig<T>["outputFunction"]) => TestRunBuilder<T>;
    withPromptVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
    withPromptChainVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
    withWorkflowId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
    withLogger: (logger: TestRunConfig<T>["logger"]) => TestRunBuilder<T>;
    withConcurrency: (concurrency: TestRunConfig<T>["concurrency"]) => TestRunBuilder<T>;
    getConfig: () => TestRunConfig<T>;
    run: (timeoutInMinutes?: number) => Promise<{
        testRunResult: TestRunResult;
        failedEntryIndices: number[];
    }>;
};
export type MaximAPICreateTestRunResponse = {
    data: {
        id: string;
        workspaceId: string;
        humanEvaluationConfig?: {
            emails: string[];
            instructions: string;
            requester: string;
        };
        evalConfig: unknown;
        parentTestRunId?: string;
    };
} | {
    error: {
        message: string;
    };
};
export type MaximAPITestRunEntryPushPayload<T extends DataStructure | undefined = undefined> = {
    testRun: {
        id: string;
        datasetEntryId?: string;
        datasetId?: string;
        workspaceId: string;
        humanEvaluationConfig?: {
            emails: string[];
            instructions: string;
            requester: string;
        };
        evalConfig: unknown;
        parentTestRunId?: string;
    };
    runConfig?: {
        usage?: {
            prompt_tokens: number;
            completion_tokens: number;
            total_tokens: number;
            latency?: number;
        } | {
            latency?: number;
        };
        cost?: {
            input: number;
            output: number;
            total: number;
        };
    };
    entry: MaximAPITestRunEntry;
};
export type MaximAPITestRunEntry = {
    input?: string;
    expectedOutput?: string;
    contextToEvaluate?: string | string[];
    output?: string;
    dataEntry: Record<string, string | string[] | null | undefined>;
    localEvaluationResults?: (LocalEvaluationResult & {
        id: string;
    })[];
};
export type MaximAPITestRunStatusResponse = {
    data: {
        entryStatus: {
            total: number;
            running: number;
            completed: number;
            failed: number;
            queued: number;
            stopped: number;
        };
        testRunStatus: "QUEUED" | "RUNNING" | "FAILED" | "COMPLETE" | "STOPPED";
    };
} | {
    error: {
        message: string;
    };
};
export type MaximAPITestRunResultResponse = {
    data: TestRunResult;
} | {
    error: {
        message: string;
    };
};
export type MaximAPITestRunEntryExecuteWorkflowForDataPayload = {
    workflowId: string;
    dataEntry: Record<string, string | string[] | null | undefined>;
    contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecuteWorkflowForDataResponse = {
    data: {
        output?: string;
        contextToEvaluate?: string;
        latency: number;
    };
} | {
    error: {
        message: string;
    };
};
export type MaximAPITestRunEntryExecutePromptForDataPayload = {
    promptVersionId: string;
    input: string;
    dataEntry?: Record<string, string | string[] | null | undefined>;
    contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecutePromptForDataResponse = {
    data: {
        output?: string;
        contextToEvaluate?: string;
        usage?: {
            promptTokens: number;
            completionTokens: number;
            totalTokens: number;
            latency?: number;
        };
        cost?: {
            input: number;
            output: number;
            total: number;
        };
    };
} | {
    error: {
        message: string;
    };
};
export type MaximAPITestRunEntryExecutePromptChainForDataPayload = {
    promptChainVersionId: string;
    input: string;
    dataEntry?: Record<string, string | string[] | null | undefined>;
    contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecutePromptChainForDataResponse = {
    data: {
        output?: string;
        contextToEvaluate?: string;
        usage?: {
            promptTokens: number;
            completionTokens: number;
            totalTokens: number;
            latency?: number;
        };
        cost?: {
            input: number;
            output: number;
            total: number;
        };
    };
} | {
    error: {
        message: string;
    };
};
