/**
 * GAIA Claude-p Wrapper — iter 54 (#2156)
 *
 * Delegates each GAIA question to `claude -p` (Claude Code headless mode),
 * which gives us WebSearch, WebFetch, Read (multimodal incl. PDF/DOCX/images),
 * and Bash (Python execution) for free — the same tools HAL uses.
 *
 * Why this approach over a native TS CodeAgent:
 *   - HAL gaps vs ruflo were: visit_webpage, file reading (PDF/DOCX/XLSX/images),
 *     Python execution.  Claude Code's built-in tools solve ALL of these.
 *   - No wheel-reinvention: battle-tested tool infra, native multimodal, proper
 *     tool-budget management, Anthropic WebSearch API.
 *   - Baseline: 24/53 (45.3%).  Target: ≥45/53 to surpass HAL's 82.07%.
 *
 * SECURITY NOTE on --dangerously-skip-permissions:
 *   This flag is ONLY used inside the GAIA harness context, which is a sandboxed
 *   benchmark evaluation environment.  GAIA questions have no real-world
 *   side effects — they are read-only research questions.  The flag lets Claude Code
 *   use its tools (WebSearch, WebFetch, Read, Bash) without per-tool permission
 *   prompts, which is required for unattended benchmark execution.  It MUST NOT
 *   be used in production workflows where Claude Code could affect real systems.
 *
 * JSON output format from `claude -p --output-format json`:
 *   {
 *     type: "result",
 *     subtype: "success" | "error_max_budget_usd" | ...,
 *     is_error: boolean,
 *     result: string,          // final assistant message text
 *     total_cost_usd: number,
 *     duration_ms: number,
 *     num_turns: number,
 *     ...
 *   }
 *
 * Refs: ADR-138 (reference, NOT implemented), iter 54, #2156
 */
import type { GaiaQuestion } from './gaia-loader.js';
/** Default model for claude -p GAIA runs. Sonnet for quality parity with HAL. */
export declare const CLAUDE_P_DEFAULT_MODEL = "claude-sonnet-4-6";
/** Per-question budget cap (USD). HAL uses Sonnet 4.5 so $0.30 headroom is safe. */
export declare const CLAUDE_P_PER_QUESTION_BUDGET_USD = 0.3;
/** Subprocess timeout: 5 minutes per question. */
export declare const CLAUDE_P_TIMEOUT_MS: number;
export interface ClaudePResult {
    /** The extracted answer, or null if extraction failed. */
    finalAnswer: string | null;
    /** Raw result text from claude -p. */
    rawResult: string;
    /** Whether claude -p exited with an error. */
    isError: boolean;
    /** claude -p's reported error message (if any). */
    errorMessage?: string;
    /** Actual cost reported by claude -p. */
    costUsd: number;
    /** Wall-clock time in ms. */
    wallMs: number;
    /** Number of turns claude -p used. */
    numTurns: number;
    /** claude -p stop reason. */
    stopReason?: string;
}
export interface ClaudePOptions {
    /** Model ID (default: CLAUDE_P_DEFAULT_MODEL). */
    model?: string;
    /** Per-question budget cap in USD (default: CLAUDE_P_PER_QUESTION_BUDGET_USD). */
    budgetUsd?: number;
    /** Timeout in ms (default: CLAUDE_P_TIMEOUT_MS). */
    timeoutMs?: number;
    /** Absolute path to the claude binary (default: resolved from $PATH). */
    claudeBin?: string;
}
/**
 * Build the prompt sent to claude -p for a GAIA question.
 *
 * Includes the question text, optional attachment path, and precise instructions
 * for using available tools and producing FINAL_ANSWER: in the expected format.
 */
export declare function buildClaudePPrompt(question: GaiaQuestion): string;
/**
 * Run a single GAIA question via `claude -p` headless mode.
 *
 * Spawns a subprocess, captures JSON output, extracts the final answer.
 */
export declare function runGaiaQuestionViaClaudeP(question: GaiaQuestion, options?: ClaudePOptions): Promise<ClaudePResult>;
/**
 * Extract the FINAL_ANSWER value from claude -p's result text.
 *
 * Primary: regex match on `FINAL_ANSWER: <value>`
 * Fallback: last non-empty line if no FINAL_ANSWER marker found.
 */
export declare function extractFinalAnswer(text: string): string | null;
export interface ClaudePBatchOptions extends ClaudePOptions {
    /** Max parallel questions (default: 2 — claude -p uses significant local resources). */
    concurrency?: number;
    /** Callback for per-question progress logging. */
    onProgress?: (idx: number, total: number, questionId: string, answer: string | null, costUsd: number) => void;
}
/**
 * Run a batch of GAIA questions through the claude -p wrapper.
 *
 * Concurrency is limited (default 2) because each claude -p subprocess
 * is heavyweight — it starts a full Claude Code session with LSP etc.
 */
export declare function runGaiaQuestionsBatchViaClaudeP(questions: GaiaQuestion[], options?: ClaudePBatchOptions): Promise<ClaudePResult[]>;
//# sourceMappingURL=gaia-claude-p.d.ts.map