/**
 * GAIA CodeAgent — ADR-138 iter 54 (FINAL design)
 *
 * smolagents-style CodeAgent harness for the GAIA L1 benchmark.
 * NOT the smolagents library — this is the PATTERN implemented natively in ruflo TS.
 *
 * Architecture (HAL replication):
 *   Instead of Anthropic native tool_use blocks (JSON → execute → repeat),
 *   the agent writes Python code that calls tools as functions.  The code is
 *   parsed from markdown code blocks, executed in gaia-codeagent-runner.py,
 *   and the stdout is fed back as the next user turn.  The agent commits its
 *   answer by calling final_answer("value") in Python.
 *
 * Why this beats ToolCallingAgent on GAIA (from HAL-DEEP-STUDY.md):
 *   - 30% fewer steps for the same task (Python is more expressive than JSON)
 *   - Variables persist across steps in the agent's mental model
 *   - Complex control flow (loops, try/except) is native
 *   - final_answer() is deterministic — no regex extraction fragility
 *
 * Loop algorithm:
 *   1. Build system prompt with tool signatures and GAIA instruction template.
 *   2. Call Anthropic Messages API (text-in / text-out — NO tools array).
 *   3. Parse the response for a ```python ... ``` code block.
 *   4. If code block found: run gaia-codeagent-runner.py subprocess.
 *      - Runner pre-defines tool functions (web_search, visit_webpage, etc.)
 *      - If final_answer("X") is called in the code: extract X, return result.
 *      - Otherwise: feed stdout back as user turn, continue.
 *   5. If no code block: prompt agent to produce one (max 3 retries).
 *   6. If maxTurns exceeded: return timedOut=true.
 *   7. Every planningInterval turns: inject a planning checkpoint.
 *
 * Tool routing (via gaia-codeagent-runner.py):
 *   web_search(query)      → claude -p with WebSearch (best web coverage)
 *   visit_webpage(url)     → requests + bs4 HTML extraction
 *   grounded_query(query)  → Gemini with Google Search grounding (ruflo unique)
 *   read_file(path)        → Python direct (text/csv/json/xlsx) or subprocess
 *   describe_image(path)   → claude -p with vision
 *   final_answer(x)        → writes sentinel JSON and exits runner
 *
 * Key parameters:
 *   model:            claude-sonnet-4-6 (default; ADR-138 targets Sonnet 4.5+)
 *   maxTurns:         20 (HAL uses 200; 20 is cost-controlled for L1)
 *   planningInterval: 4 (match HAL's planning_interval=4)
 *   maxTokensPerTurn: 4096 (code generation needs more space than ToolCalling)
 *
 * Refs: ADR-138, HAL-DEEP-STUDY.md, smolagents CodeAgent, #2156, iter 54
 */
import { GaiaQuestion } from './gaia-loader.js';
import { isAnswerCorrect } from './gaia-agent.js';
export { isAnswerCorrect };
export interface CodeAgentResult {
    questionId: string;
    finalAnswer: string | null;
    turns: number;
    toolCallsByName: Record<string, number>;
    totalInputTokens: number;
    totalOutputTokens: number;
    wallMs: number;
    replanCount: number;
    timedOut?: boolean;
    error?: string;
    /** Steps log for debugging — each entry is one turn's code + output. */
    steps?: Array<{
        code: string;
        output: string;
    }>;
}
export interface CodeAgentOptions {
    model?: string;
    maxTurns?: number;
    maxTokensPerTurn?: number;
    perTurnTimeoutMs?: number;
    /** Timeout for each Python step execution (default: 30s). */
    perStepTimeoutMs?: number;
    planningInterval?: number;
    apiKey?: string;
    /** If true, include step-by-step code/output log in the result. */
    verbose?: boolean;
    /**
     * Optional tool catalogue override — used by unit tests to inject mock tools.
     * In production (Python runner mode) this is ignored; the runner defines its own tools.
     */
    catalogue?: unknown[];
}
/**
 * Extract the first ```python ... ``` code block from assistant output.
 * Returns null if no code block is found.
 *
 * Exported for use in unit tests (gaia-codeagent.smoke.ts T1).
 */
export declare function extractCodeBlock(text: string): string | null;
/**
 * Execute a single Python code step via the gaia-codeagent-runner.py subprocess.
 *
 * This is a thin public wrapper around `executeAgentCodeStep` that:
 * - Exposes a clean typed signature for unit tests (gaia-codeagent.smoke.ts T2-T4)
 * - Renames `output` → `observation` to match the smolagents naming convention
 *
 * @param code       Python code to execute
 * @param attachmentPath  Path to attached file, or null
 * @param timeoutMs  Subprocess timeout in ms (default: 30s)
 * @param apiKey     Anthropic API key (passed to runner for claude -p tool calls)
 */
export declare function runCodeAgentStep(code: string, attachmentPath: string | null, timeoutMs?: number, apiKey?: string): {
    observation: string;
    finalAnswer: string | null;
};
/**
 * Run a GAIA question through the smolagents-style CodeAgent harness.
 *
 * The agent writes Python code, we execute it with tool stubs, and feed
 * the output back.  The loop continues until final_answer() is called
 * or maxTurns is exhausted.
 */
export declare function runGaiaCodeAgent(question: GaiaQuestion, options?: CodeAgentOptions): Promise<CodeAgentResult>;
/**
 * Run all 5 SMOKE_FIXTURE questions through the CodeAgent harness.
 * Pass criteria: ≥3/5 correct (60%).
 */
export declare function runCodeAgentSmokeTest(opts?: {
    verbose?: boolean;
    apiKey?: string;
    model?: string;
}): Promise<{
    passRate: number;
    passed: number;
    total: number;
}>;
//# sourceMappingURL=gaia-codeagent.d.ts.map