/**
 * GAIA Agent — ADR-133-PR3 / ADR-135 (planning interval)
 *
 * Multi-turn Anthropic Messages API loop that drives Claude through the
 * GAIA benchmark questions using a tool-use agent pattern.
 *
 * Loop algorithm:
 *   1. Build initial message with the question and a system prompt that
 *      instructs Claude to output `FINAL_ANSWER: <value>` when done.
 *   2. Call Anthropic Messages API with the registered tool definitions.
 *   3. On `stop_reason === 'tool_use'`: execute all tool_use blocks in
 *      parallel, append results as a `user` turn, and repeat.
 *      Every PLANNING_INTERVAL turns, inject a planning-checkpoint text
 *      alongside the tool results to force strategy re-evaluation.
 *   4. On `stop_reason === 'end_turn'`: scan content for the final answer
 *      pattern and return the result.
 *   5. On timeout (maxTurns exceeded): return `{ timedOut: true }`.
 *
 * API key resolution order (mirrors resolveHfToken from gaia-loader.ts):
 *   1. `options.apiKey` (caller-supplied)
 *   2. `ANTHROPIC_API_KEY` env var
 *   3. `gcloud secrets versions access latest --secret=ANTHROPIC_API_KEY`
 *
 * Cost discipline: smoke runs use `claude-haiku-4-5` only.  The smoke
 * runner at the bottom of this file enforces that model.
 *
 * Planning interval (iter 30 finding #3):
 *   smolagents CodeAgent uses planning_interval=4 — replans every 4 steps
 *   to prevent tunnel-vision on bad strategies. Adds ~80 tokens per
 *   replan event (~$0.0001 each), negligible cost.
 *
 * Iter 53a T2 narrowing:
 *   Three precise changes from iter 52 T2 (which had net -1q: +6 recoveries, -7 regressions):
 *   1. extractFinalAnswer uses Stage 1 only (no Stage 2/3 prose fallback).
 *      Stage 2/3 fired too aggressively: overwriting correct Stage 1 answers and
 *      extracting wrong prose fragments. Now Stage 1 is the only extraction path.
 *   2. System prompt removes surrender instruction ("FINAL_ANSWER: unknown / I don't know").
 *      That instruction caused the agent to give up on questions it would have figured out.
 *      Replaced with: "When you reach a final answer, output FINAL_ANSWER: <value>."
 *   3. Reversed-text preprocessor is preserved (iter 52 T2 finding: 2d83110e has reversed text).
 *
 * Refs: ADR-133, ADR-135, iter 30, iter 52, iter 53a, #2156
 */
import { GaiaQuestion } from './gaia-loader.js';
import { GaiaToolCatalogue, ContentBlock } from './gaia-tools/index.js';
/**
 * Every PLANNING_INTERVAL tool_use turns, inject a planning-checkpoint
 * message to force the agent to reassess its strategy.
 *
 * Based on iter 30 research: smolagents CodeAgent uses planning_interval=4.
 * HAL reliability analysis showed agents fail when they exhaust step
 * budgets without recalibrating.
 */
export declare const PLANNING_INTERVAL = 4;
/**
 * Build the planning-checkpoint text injected every PLANNING_INTERVAL turns.
 * Exported so tests can snapshot the exact wording.
 */
export declare function buildPlanningCheckpoint(turn: number, maxTurns: number): string;
export interface GaiaAgentResult {
    questionId: string;
    finalAnswer: string | null;
    turns: number;
    toolCallsByName: Record<string, number>;
    totalInputTokens: number;
    totalOutputTokens: number;
    wallMs: number;
    /** Number of planning-checkpoint injections during this run (0 when planning is disabled). */
    replanCount?: number;
    timedOut?: boolean;
    /** Set when the convergence layer fired and committed the final answer. */
    convergenceTrigger?: string;
    /** True when the convergence layer recovered the answer from prior message history. */
    convergenceUsedFallback?: boolean;
    error?: string;
}
export interface GaiaAgentOptions {
    /** Model to use (default: 'claude-haiku-4-5'). */
    model?: string;
    /** Maximum number of agent turns before giving up (default: 8). */
    maxTurns?: number;
    /** Maximum tokens per Anthropic API call (default: 2048). */
    maxTokensPerTurn?: number;
    /** Per-turn HTTP timeout in milliseconds (default: 60 000). */
    perTurnTimeoutMs?: number;
    /**
     * Inject a planning-checkpoint every N tool_use turns (default: PLANNING_INTERVAL = 4).
     * Set to 0 to disable planning checkpoints.
     */
    planningInterval?: number;
    /**
     * Anthropic API key.  Resolved automatically via env var + gcloud fallback
     * if omitted.
     */
    apiKey?: string;
    /**
     * Pre-built tool catalogue.  Defaults to `createDefaultToolCatalogue()`.
     * Exposed so callers can inject mocks for testing.
     */
    catalogue?: GaiaToolCatalogue;
    /**
     * Enable the convergence layer (default: true).
     *
     * When enabled, the convergence layer monitors for three failure modes:
     *   1. max_turns hit without FINAL_ANSWER
     *   2. Loop (same tool+args 3× in a 5-turn window)
     *   3. Token overflow (>120k input tokens)
     *
     * On detection, a forced-commit phase is run: one API call with a
     * directive prompt, no tools, then a fallback scan of prior messages.
     * Set to false to disable (e.g. for ablation testing).
     */
    enableConvergence?: boolean;
}
/**
 * Resolve the Anthropic API key.
 *
 * Resolution order:
 *   1. Caller-supplied `apiKey`
 *   2. `ANTHROPIC_API_KEY` env var
 *   3. `gcloud secrets versions access latest --secret=ANTHROPIC_API_KEY`
 *
 * Throws with a clear message if none of the above is available.
 */
export declare function resolveAnthropicApiKey(apiKey?: string): string;
/**
 * If the question text appears to be reversed English, prepend a de-reversed
 * version so the agent sees both the original and the decoded form.
 *
 * Iter 52 T2 — gate 1 finding: task 2d83110e has a reversed sentence.
 * Kept in iter 53a (this is not the source of the iter 52 regressions).
 */
declare function buildUserMessage(question: string): string;
/** Anthropic image content block for vision API. */
interface ImageContentBlock {
    type: 'image';
    source: {
        type: 'base64';
        media_type: string;
        data: string;
    };
}
/**
 * Parse an IMAGE_BASE64 marker returned by file_read's extractImage().
 * Returns an Anthropic image content block, or null if the marker is invalid.
 *
 * Marker format: [IMAGE_BASE64:{"mediaType":"image/png","base64":"...","path":"..."}]
 */
export declare function parseImageMarker(marker: string): ImageContentBlock | null;
/** Minimal types for the Anthropic Messages API response. */
interface AnthropicResponse {
    id: string;
    model: string;
    stop_reason: 'end_turn' | 'tool_use' | 'max_tokens' | string;
    content: ContentBlock[];
    usage: {
        input_tokens: number;
        output_tokens: number;
    };
}
declare function extractFinalAnswer(resp: AnthropicResponse): string | null;
/**
 * Run a GAIA question through Claude with tool use.
 *
 * @returns GaiaAgentResult with the final answer (or null if timed out),
 * turn count, token totals, and per-tool call counts.
 */
export declare function runGaiaAgent(question: GaiaQuestion, options?: GaiaAgentOptions): Promise<GaiaAgentResult>;
/**
 * Check whether a model answer matches the expected ground-truth answer.
 *
 * Matching rules (mirrors GAIA evaluation):
 * - Normalise: trim whitespace, lowercase.
 * - Substring match: expected is contained in model answer (handles "Paris" vs "Paris, France").
 * - Direct equality after normalisation.
 * - Numeric: parse as floats and compare with ±1% tolerance.
 */
export declare function isAnswerCorrect(modelAnswer: string, expected: string): boolean;
/**
 * Run all 5 SMOKE_FIXTURE questions and report results to stdout.
 *
 * Pass criteria: ≥3/5 correct (60% pass rate).
 *
 * Cost estimate is printed at the end using Haiku pricing.
 *
 * This function is exported so tests can call it directly and capture output;
 * it also runs when this file is executed directly via `node gaia-agent.js --smoke`.
 */
export declare function runSmokeTest(opts?: {
    verbose?: boolean;
    apiKey?: string;
}): Promise<{
    passRate: number;
    passed: number;
    total: number;
}>;
export { extractFinalAnswer as _extractFinalAnswerForTest, buildUserMessage as _buildUserMessageForTest, };
//# sourceMappingURL=gaia-agent.d.ts.map