#!/usr/bin/env python3
"""
gonext_agent_chat.py — streaming agent chat for the gonext local worker.

Reads on stdin:
  {
    "messages": [{"role": "system"|"user"|"assistant", "content": str}, ...],
    "agentBaseURL": str,
    "agentApiKey": str,
    "agentModelId": str,
    "codingBaseURL": str,        # optional: dedicated coding/reasoning model for the
    "codingModelId": str,        #   CodeAgent's tool-use loop; empty = reuse agentModelId
    "tools": ["http_request"],   # v1: only http_request
    "maxSteps": int              # default 10
  }

Emits NDJSON lines on stdout:
  {"type": "log",   "text": "..."}   — worker logs to console, not shown in chat
  {"type": "step",  "text": "..."}   — shown in <think> area
  {"type": "final", "text": "..."}   — assistant answer
"""
import contextlib
import json
import re
import sys
import traceback
import urllib.request
import urllib.error

# Capture stdout before anything can redirect it.  _emit() must always write
# to the real fd-1 so the Node worker's readline loop sees NDJSON even while
# contextlib.redirect_stdout(sys.stderr) is active inside agent.run().
_REAL_STDOUT = sys.stdout


def _ssl_context():
    import ssl
    # Disable cert verification — this agent runs locally against dev tunnels
    # (gorok, ngrok) whose certs may not chain correctly in Python's SSL store.
    ctx = ssl.create_default_context()
    ctx.check_hostname = False
    ctx.verify_mode = ssl.CERT_NONE
    return ctx


def _http_request_impl(method, url, headers=None, body=None, timeout=25):
    # Merge caller headers on top of sensible defaults.
    merged = {"User-Agent": "gonext-agent/1.0", "Accept": "*/*"}
    if headers:
        merged.update(headers)
    req = urllib.request.Request(url, method=method.upper(), headers=merged)
    data = body.encode() if isinstance(body, str) and body else (body or None)
    try:
        ctx = _ssl_context()
        with urllib.request.urlopen(req, data=data, timeout=timeout, context=ctx) as resp:
            status = resp.status
            raw = resp.read(4096)
            snippet = raw.decode("utf-8", errors="replace")[:2000]
            return f"HTTP {status}\n{snippet}"
    except urllib.error.HTTPError as e:
        raw = e.read(512)
        snippet = raw.decode("utf-8", errors="replace")
        return f"HTTP {e.code} {e.reason}\n{snippet}"
    except Exception as e:  # noqa: BLE001
        return f"Error: {e}"


def _get_json(url, timeout=15):
    """GET a URL and parse the JSON body. Returns dict/list, or None on failure.

    Used by web_search against free no-key APIs (DuckDuckGo, Wikipedia). Wikipedia
    requires a descriptive User-Agent, so we send one.
    """
    req = urllib.request.Request(url, method="GET", headers={
        "User-Agent": "gonext-agent/1.0 (local API testing assistant)",
        "Accept": "application/json",
    })
    try:
        with urllib.request.urlopen(req, timeout=timeout, context=_ssl_context()) as resp:
            return json.loads(resp.read().decode("utf-8", errors="replace"))
    except Exception as e:  # noqa: BLE001
        _log(f"web_search fetch failed {url}: {e}")
        return None


def _web_search_impl(query):
    """Look up factual info via free no-key JSON APIs (DuckDuckGo + Wikipedia).

    Returns a short text summary with a source URL, or a 'no results' message.
    Tries DuckDuckGo Instant Answer first, then falls back to a Wikipedia search +
    REST summary. Never fabricates — callers should surface 'no results' honestly.
    """
    from urllib.parse import quote
    q = (query or "").strip()
    if not q:
        return "web_search: empty query."

    # 1) DuckDuckGo Instant Answer API.
    ddg = _get_json(
        f"https://api.duckduckgo.com/?q={quote(q)}&format=json&no_html=1&skip_disambig=1"
    )
    if isinstance(ddg, dict):
        abstract = (ddg.get("AbstractText") or "").strip()
        if abstract:
            src = (ddg.get("AbstractURL") or "").strip()
            return f"{abstract[:1500]}\nSource: {src}" if src else abstract[:1500]
        # No abstract — use the first related topic that has text.
        for topic in ddg.get("RelatedTopics") or []:
            if isinstance(topic, dict) and topic.get("Text"):
                src = (topic.get("FirstURL") or "").strip()
                text = topic["Text"][:1500]
                return f"{text}\nSource: {src}" if src else text

    # 2) Wikipedia: find the best-matching title, then fetch its summary extract.
    search = _get_json(
        "https://en.wikipedia.org/w/api.php?action=query&list=search"
        f"&srsearch={quote(q)}&format=json&srlimit=1"
    )
    title = ""
    try:
        title = search["query"]["search"][0]["title"]
    except Exception:  # noqa: BLE001
        title = ""
    if title:
        slug = quote(title.replace(" ", "_"))
        summary = _get_json("https://en.wikipedia.org/api/rest_v1/page/summary/" + slug)
        if isinstance(summary, dict):
            extract = (summary.get("extract") or "").strip()
            if extract:
                src = (
                    (summary.get("content_urls") or {}).get("desktop", {}).get("page", "")
                    or f"https://en.wikipedia.org/wiki/{slug}"
                )
                return f"{extract[:1500]}\nSource: {src}"

    return (
        f"No results found for '{q}'. Tell the user you couldn't find this — "
        "do NOT invent an answer or a URL."
    )


def _detect_model_id(base_url, api_key=""):
    """Ask an OpenAI-compatible server which model it serves.

    Queries GET {base_url}/models and returns the first reported model id.
    `base_url` already ends with /v1. Returns "" on any failure so callers can
    fall back. Used when the user supplies a coding-model URL but no model name.
    """
    url = base_url.rstrip("/") + "/models"
    headers = {"Accept": "application/json"}
    if api_key and api_key != "local":
        headers["Authorization"] = f"Bearer {api_key}"
    req = urllib.request.Request(url, method="GET", headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=10, context=_ssl_context()) as resp:
            payload = json.loads(resp.read().decode("utf-8", errors="replace"))
    except Exception as e:  # noqa: BLE001
        _log(f"model detect failed {url}: {e}")
        return ""
    data = payload.get("data") if isinstance(payload, dict) else None
    if isinstance(data, list) and data:
        first = data[0]
        if isinstance(first, dict) and isinstance(first.get("id"), str):
            return first["id"].strip()
    return ""


def _summarise_step(step_log):
    """Return a short human-readable description of an agent step."""
    tool_calls = getattr(step_log, "tool_calls", None) or []
    observations = getattr(step_log, "observations", None)
    error = getattr(step_log, "error", None)
    step_num = getattr(step_log, "step_number", None)

    parts = []
    for tc in tool_calls:
        name = getattr(tc, "name", "")
        args = getattr(tc, "arguments", None)

        if name == "python_interpreter" and isinstance(args, dict):
            code = args.get("code", "")
            # Show the http_request call if present, else first meaningful line
            m = re.search(r'http_request\s*\(\s*(?:method\s*=\s*)?[\'"]?(\w+)[\'"]?\s*,\s*(?:url\s*=\s*)?[\'"]([^\'"]+)', code)
            if m:
                parts.append(f"HTTP {m.group(1).upper()} {m.group(2)}")
            else:
                first = next(
                    (l.strip() for l in code.splitlines()
                     if l.strip() and not l.strip().startswith("#")),
                    code[:80],
                )
                parts.append(first[:120])
        else:
            if isinstance(args, dict):
                method = args.get("method", "")
                url = args.get("url", "")
                if method and url:
                    parts.append(f"HTTP {method.upper()} {url}")
                else:
                    parts.append(f"{name}()")
            else:
                parts.append(name or "tool call")

    if observations:
        obs = str(observations).strip()
        # smolagents prefixes output with "Execution logs:" — skip that header line
        # and show the first line of actual content.
        lines = [l for l in obs.splitlines() if l.strip() and l.strip() != "Execution logs:"]
        if lines:
            parts.append(f"→ {lines[0][:200]}")

    if error:
        err = str(error)
        if "Import of" in err and "not allowed" in err:
            parts.append("→ (import blocked — using http_request tool instead)")
        else:
            parts.append(f"→ Error: {err[:120]}")

    # No numeric "Step N:" prefix — show only the semantic action.
    return (" | ".join(parts) if parts else "thinking…")


# Keywords that strongly indicate the user wants to make an HTTP/network request,
# regardless of what the final output is (time, text, data, etc.).
_AGENT_KEYWORDS = re.compile(
    r"\b("
    r"request|fetch|call|hit|ping|curl|wget|GET|POST|PUT|DELETE|PATCH"
    r"|api|endpoint|url|http|https"
    r"|external\s+source|external\s+api|external\s+service"
    r"|web\s+service|rest\s+api|rest\s+call"
    r"|download|scrape|crawl"
    r"|search|find|look\s*up|lookup|weather|news|latest|current|today|tonight"
    r"|date|time|what\s+day|what\s+time"
    r"|pdf|\.pdf|create\s+a\s+pdf|generate\s+a\s+pdf|make\s+a\s+pdf|export.*pdf|make\s+a\s+document"
    r")\b",
    re.IGNORECASE,
)


def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool:
    """Decide if the task needs the HTTP agent (True) or a plain chat reply (False).

    Fast-path: if the user explicitly mentions network/request keywords → agent.
    Otherwise: ask the model to classify.
    """
    # Show the routing stage in the web Thinking panel.
    _emit({"type": "step", "text": "Routing your request…"})

    # Fast-path: explicit HTTP/network intent overrides the model classifier.
    if _AGENT_KEYWORDS.search(task_text):
        _log(f"router → YES (keyword match)")
        _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
        return True

    try:
        from openai import OpenAI
        client = OpenAI(base_url=base_url, api_key=api_key or "local",
                        max_retries=0, timeout=20)
        resp = client.chat.completions.create(
            model=model_id,
            messages=[
                {"role": "system", "content": (
                    "You are a task classifier. Reply YES or NO only, no punctuation.\n"
                    "Answer YES if the task requires fetching data from an external network source "
                    "(URL, API, website, remote server), a web search / factual lookup, or the "
                    "current date or time.\n"
                    "Answer NO only if it is pure conversation, opinion, or simple text the "
                    "assistant can answer directly without looking anything up."
                )},
                {"role": "user", "content": (
                    f"Does this task require fetching data from an external network source?\n\n"
                    f"Task: {task_text}\n\nYES or NO:"
                )},
            ],
            max_tokens=3,
            temperature=0,
        )
        answer = (resp.choices[0].message.content or "").strip().upper()
        _log(f"router → {answer!r} (model)")
        is_agent = answer.startswith("Y")
        _emit({"type": "step", "text": "→ Agent mode (needs tools)" if is_agent else "→ Chat reply"})
        return is_agent
    except Exception as e:  # noqa: BLE001
        _log(f"router error: {e} — defaulting to agent")
        _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
        return True


def _summarize_result(task_text: str, agent_output: str,
                       base_url: str, api_key: str, model_id: str) -> str:
    """Always call the model to turn the raw agent output into a clean reply."""
    _log(f"summarizing agent output ({len(agent_output)} chars)")
    try:
        from openai import OpenAI
        client = OpenAI(base_url=base_url, api_key=api_key or "local",
                        max_retries=0, timeout=30)
        resp = client.chat.completions.create(
            model=model_id,
            messages=[
                {"role": "system", "content": (
                    "You are a helpful assistant. An agent ran HTTP tools to answer the user's "
                    "request. Write a clear, concise reply (1-3 sentences) explaining what was "
                    "found. Do not include raw code, tool names, or error traces."
                )},
                {"role": "user", "content": (
                    f"User asked: {task_text}\n\n"
                    f"Agent result: {agent_output[:2000]}\n\n"
                    "Reply to the user:"
                )},
            ],
            max_tokens=200,
            temperature=0.3,
        )
        summary = (resp.choices[0].message.content or "").strip()
        _log(f"summary: {summary[:120]}")
        return summary or agent_output
    except Exception as e:  # noqa: BLE001
        _log(f"summarize error: {e}")
        return agent_output


def _plain_reply(messages: list, base_url: str, api_key: str, model_id: str) -> str:
    """Plain chat completion using the full conversation history."""
    _THINK_RE_LOCAL = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
    chat_messages = [{"role": "system", "content": "You are a helpful assistant."}]
    for m in messages:
        role = m.get("role", "")
        content = m.get("content", "")
        if role not in ("user", "assistant"):
            continue
        if role == "assistant":
            content = _THINK_RE_LOCAL.sub("", content).strip()
            if not content:
                continue
        chat_messages.append({"role": role, "content": content})
    try:
        from openai import OpenAI
        client = OpenAI(base_url=base_url, api_key=api_key or "local",
                        max_retries=0, timeout=60)
        resp = client.chat.completions.create(
            model=model_id,
            messages=chat_messages,
            temperature=0.7,
            max_tokens=512,
        )
        return (resp.choices[0].message.content or "").strip()
    except Exception as e:  # noqa: BLE001
        return f"[Error: {e}]"


def _strip_tool_tags(text: str) -> str:
    """Remove the internal hint tags we append to tool output (e.g. '[SUCCESS …]',
    '[NOTE: …]', 'Note: This URL failed …') so they never leak into the user reply."""
    out = []
    for ln in (text or "").splitlines():
        s = ln.strip()
        if s.startswith("[SUCCESS") or s.startswith("[NOTE:") or s.startswith("Note: This URL failed"):
            continue
        out.append(ln)
    return "\n".join(out).strip()


_PDF_INSTALL_HINT = (
    "PDF engine not installed on the worker. On macOS:\n"
    "    brew install pango libffi\n"
    "    python3 -m pip install weasyprint markdown\n"
    "(optional, for color emoji/flags: brew install --cask font-noto-color-emoji)\n"
    "then restart the worker. (See the Instructions page in the web app.)"
)

# WeasyPrint IS installed, but its system libraries (pango/cairo/gobject) couldn't be
# dlopen'd — almost always because the worker process predates the build that injects
# the Homebrew library path. A worker restart fixes it.
_PDF_LIBS_HINT = (
    "PDF engine is installed but its system libraries could not be loaded. "
    "Please RESTART the local worker — it automatically points WeasyPrint at the "
    "Homebrew libraries (/opt/homebrew/lib on Apple Silicon, /usr/local/lib on Intel). "
    "If it still fails, confirm `brew install pango libffi` succeeded."
)

# Font stack with color-emoji fallbacks last, so WeasyPrint resolves emoji/flag
# codepoints (🏆 🇿🇦) to a color font per-glyph while text uses a clean sans-serif.
_PDF_FONT_STACK = (
    "'Helvetica Neue', Helvetica, Arial, 'DejaVu Sans', "
    "'Noto Color Emoji', 'Apple Color Emoji', sans-serif"
)


def _render_pdf_bytes(markdown_text: str, title: str = "") -> bytes:
    """Render Markdown text to PDF bytes using WeasyPrint (renders color emoji/flags).

    Requires the WeasyPrint system libs (pango/cairo via Homebrew on macOS). Raises
    RuntimeError with an install hint if the engine/libs aren't available, or on a
    render failure. The model is expected to pass already-formatted Markdown.
    """
    try:
        import markdown as _md
    except Exception:  # noqa: BLE001 — markdown pip pkg missing
        raise RuntimeError(_PDF_INSTALL_HINT)
    try:
        from weasyprint import HTML as _HTML  # pulls pango/cairo via cffi at import
    except ModuleNotFoundError:
        # WeasyPrint itself isn't pip-installed.
        raise RuntimeError(_PDF_INSTALL_HINT)
    except Exception as e:  # noqa: BLE001 — installed but cffi can't dlopen the system libs
        _log(f"weasyprint lib load failed: {type(e).__name__}: {str(e)[:200]}")
        raise RuntimeError(_PDF_LIBS_HINT)

    body_html = _md.markdown(
        markdown_text or "",
        extensions=["extra", "sane_lists", "tables", "nl2br"],
    )
    html = (
        "<html><head><meta charset='utf-8'><style>"
        "@page { size: A4; margin: 2cm; }"
        f"body {{ font-family: {_PDF_FONT_STACK}; font-size: 11pt; line-height: 1.5; color: #18181b; }}"
        "h1 { font-size: 20pt; margin: 0 0 12pt; }"
        "h2 { font-size: 15pt; margin: 16pt 0 8pt; }"
        "h3 { font-size: 12pt; margin: 12pt 0 6pt; }"
        "code, pre { font-family: 'DejaVu Sans Mono', Courier, monospace; background: #f4f4f5; }"
        "pre { padding: 8pt; white-space: pre-wrap; }"
        "table { border-collapse: collapse; width: 100%; }"
        "th, td { border: 1px solid #d4d4d8; padding: 4pt 6pt; text-align: left; }"
        "</style></head><body>"
        f"{body_html}"
        "</body></html>"
    )
    try:
        return _HTML(string=html).write_pdf()
    except Exception as e:  # noqa: BLE001 — surface a clean render failure
        raise RuntimeError(f"PDF rendering failed (WeasyPrint: {type(e).__name__}: {str(e)[:160]}).")


def _ssl_context():
    """SSL context backed by certifi's CA bundle.

    macOS Python (python.org / Homebrew) doesn't use the system keychain, so urllib's
    default verification fails with CERTIFICATE_VERIFY_FAILED. certifi ships with the
    openai/httpx stack the worker already depends on.
    """
    import ssl
    try:
        import certifi
        return ssl.create_default_context(cafile=certifi.where())
    except Exception:  # noqa: BLE001 — fall back to system defaults
        return ssl.create_default_context()


def _pdf_upload_via_api(api_base: str, worker_key: str, file_name: str,
                        pdf_bytes: bytes) -> str:
    """Ask the API to presign an S3 upload, PUT the PDF bytes, return the download URL.

    Keeps all AWS credentials on the API/Lambda — the worker only holds its worker key.
    """
    import urllib.request as _u
    import urllib.error as _ue

    ctx = _ssl_context()
    base = (api_base or "").rstrip("/")
    if not base or not worker_key:
        raise RuntimeError("PDF upload is not available: worker API base/key missing.")

    # 1) Presign.
    req = _u.Request(
        f"{base}/api/worker/pdf-upload-url",
        data=json.dumps({"fileName": file_name}).encode("utf-8"),
        headers={
            "Content-Type": "application/json",
            "X-Worker-Key": worker_key,
        },
        method="POST",
    )
    try:
        with _u.urlopen(req, timeout=30, context=ctx) as resp:
            ref = json.loads(resp.read().decode("utf-8"))
    except _ue.HTTPError as e:  # noqa: PERF203
        detail = e.read().decode("utf-8", "replace")[:300]
        raise RuntimeError(f"Could not get a PDF upload URL (HTTP {e.code}): {detail}")
    except _ue.URLError as e:
        raise RuntimeError(
            f"Could not reach the API at {base} to presign the upload "
            f"({getattr(e, 'reason', e)}). Is the worker API deployed/online?"
        )
    put_url = ref.get("putUrl")
    get_url = ref.get("getUrl")
    if not put_url or not get_url:
        raise RuntimeError("PDF upload URL response was incomplete.")

    # 2) Upload the bytes to the presigned PUT URL.
    put_req = _u.Request(
        put_url, data=pdf_bytes,
        headers={"Content-Type": "application/pdf"},
        method="PUT",
    )
    try:
        with _u.urlopen(put_req, timeout=60, context=ctx) as up:
            if up.status not in (200, 201, 204):
                raise RuntimeError(f"S3 upload failed (HTTP {up.status}).")
    except _ue.HTTPError as e:  # noqa: PERF203
        detail = e.read().decode("utf-8", "replace")[:200]
        raise RuntimeError(f"S3 upload failed (HTTP {e.code}): {detail}")
    except _ue.URLError as e:
        raise RuntimeError(f"S3 upload could not connect ({getattr(e, 'reason', e)}).")
    return get_url


def _format_text_for_pdf(text: str, title: str, base_url: str, api_key: str,
                         model_id: str) -> str:
    """Use the model to clean/structure raw text into well-formed Markdown for the PDF.

    Falls back to the original text (lightly wrapped) if the model call fails, so a
    PDF is still produced.
    """
    fallback = text or ""
    if title and not fallback.lstrip().startswith("#"):
        fallback = f"# {title}\n\n{fallback}"
    try:
        from openai import OpenAI
        client = OpenAI(base_url=base_url, api_key=api_key or "local",
                        max_retries=0, timeout=60)
        resp = client.chat.completions.create(
            model=model_id,
            messages=[
                {"role": "system", "content": (
                    "You format raw text/data into clean Markdown for a PDF document. "
                    "Add a single top-level '# Title', sensible headings, bullet lists, "
                    "and tables where appropriate. Do NOT invent facts, do NOT add "
                    "commentary, and preserve all numbers and wording. Output ONLY the "
                    "Markdown — no code fences, no explanations."
                )},
                {"role": "user", "content": (
                    (f"Title: {title}\n\n" if title else "")
                    + f"Content to format:\n{text}"
                )},
            ],
            max_tokens=1500,
            temperature=0.2,
        )
        out = (resp.choices[0].message.content or "").strip()
        # Strip accidental ```markdown fences.
        if out.startswith("```"):
            out = re.sub(r"^```[a-zA-Z]*\n?|\n?```$", "", out).strip()
        return out or fallback
    except Exception as e:  # noqa: BLE001
        _log(f"pdf format error: {e} — using raw text")
        return fallback


# A clear "make a PDF from this" request. Matched against the raw user message so we
# can run the PDF pipeline deterministically — a small model cannot reliably echo a
# long document back into a Python string literal for the create_pdf() tool call.
_PDF_INTENT = re.compile(
    r"(create|make|generate|export|build|produce|convert|turn)\b[\s\S]{0,40}\bpdf\b"
    r"|\bpdf\b[\s\S]{0,40}\b(from|for|of|with|out of)\b",
    re.IGNORECASE,
)


def _extract_pdf_doc_text(user_text: str) -> str:
    """Pull the document body out of a 'create a PDF from this text "…"' message.

    Prefers a quoted span; otherwise strips the leading instruction clause.
    """
    t = user_text or ""
    # 1) Largest quoted span (straight, smart, or single quotes).
    best = ""
    for pat in (r'"([\s\S]+)"', r"“([\s\S]+)”", r"'([\s\S]+)'"):
        m = re.search(pat, t)
        if m and len(m.group(1).strip()) > len(best):
            best = m.group(1).strip()
    if best:
        return best
    # 2) Everything after a 'text:'/'following:'/'content:' lead-in.
    m = re.search(r"\b(texts?|following|below|content|data)\b\s*[:\-]?\s*\n?", t, re.IGNORECASE)
    if m and t[m.end():].strip():
        return t[m.end():].strip()
    # 3) Drop a leading 'please create a pdf (file) from this text:' verb phrase.
    stripped = re.sub(
        r"^\s*(please\s+)?(create|make|generate|export|build|produce|convert|turn)\s+"
        r"(a\s+|an\s+|this\s+|the\s+)?(pdf|document)\s*(file|doc)?\s*"
        r"(from|for|of|with|using|out of)?\s*(this|the|following)?\s*"
        r"(text|texts|data|content)?\s*[:\-]?\s*",
        "",
        t,
        flags=re.IGNORECASE,
    )
    return stripped.strip() or t.strip()


def _derive_pdf_title(doc_text: str) -> str:
    """Use the first meaningful line as the document title (strip leading symbols/emoji)."""
    for line in (doc_text or "").splitlines():
        s = line.strip().lstrip("#").strip()
        if not s:
            continue
        # Drop leading non-letter symbols/emoji (keep Latin + Vietnamese letters/digits).
        s2 = re.sub(r"^[^\wÀ-ỹ]+", "", s).strip()
        return (s2 or s)[:60]
    return "Document"


def run_agent_chat(cfg):
    try:
        from smolagents import CodeAgent, OpenAIServerModel, tool
    except Exception as e:  # noqa: BLE001
        _emit({"type": "final", "text": f"[smolagents not installed: {e}]"})
        return

    messages = cfg.get("messages") or []
    agent_base_url = cfg.get("agentBaseURL") or ""
    agent_api_key = cfg.get("agentApiKey") or "local"
    agent_model_id = cfg.get("agentModelId") or ""
    # For the create_pdf tool: API base + worker key to request a presigned S3 upload.
    pdf_api_base = (cfg.get("apiBaseURL") or "").strip()
    pdf_worker_key = (cfg.get("workerKey") or "").strip()
    # Optional dedicated coding/reasoning model for the CodeAgent's tool-use loop.
    # Routing, plain replies and summarization stay on the chat model (better at
    # natural language); the code model only drives http_request reasoning.
    raw_coding_base = (cfg.get("codingBaseURL") or "").strip()
    raw_coding_model = (cfg.get("codingModelId") or "").strip()
    if raw_coding_base:
        # A dedicated coding server is configured. If no model name was given,
        # ask the server which model it serves (mlx_lm.server otherwise tries to
        # download a mismatched name from HF and 404s).
        detected = raw_coding_model or _detect_model_id(raw_coding_base, agent_api_key)
        if detected:
            coding_base_url = raw_coding_base
            coding_model_id = detected
        else:
            _log(
                f"coding model id unresolved for {raw_coding_base!r}; "
                "falling back to chat model"
            )
            coding_base_url = agent_base_url
            coding_model_id = agent_model_id
    else:
        coding_base_url = agent_base_url
        coding_model_id = agent_model_id
    # Strict single-shot: exactly ONE agent model call per message. The single code
    # block must call a tool AND final_answer together — no multi-step ReAct loop.
    # If the model fails to call final_answer, the max-steps fallback below returns
    # the last tool observation deterministically (no extra model call).
    max_steps = 1

    _log(
        f"start model={agent_model_id!r} base={agent_base_url!r} "
        f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}"
    )

    # Build the task from the conversation history. We include the FULL conversation
    # (both user AND assistant turns) so the agent remembers what it already did —
    # e.g. data it fetched on a previous turn. Assistant turns are condensed (drop
    # <think> reasoning; clip long raw HTTP dumps), and we keep the most recent turns
    # within a character budget so we never overflow the model's context window.
    # ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving
    # ample room for smolagents' own system prompt + step memory (HTTP observations).
    HISTORY_CHAR_BUDGET = 8000
    think_re = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)

    def _condense(role, content):
        text = (content or "").strip()
        if role == "assistant":
            text = think_re.sub("", text).strip()
            # Raw HTTP dumps add little conversational value — keep only a snippet.
            if text.startswith("HTTP "):
                text = text[:500]
        return text

    # The latest user message is the current task; everything before it is history.
    last_user_idx = -1
    for i, m in enumerate(messages):
        if m.get("role") == "user":
            last_user_idx = i
    if last_user_idx < 0:
        _emit({"type": "final", "text": "[No user message found in history]"})
        return
    task_text = (messages[last_user_idx].get("content") or "").strip()
    # Routing must look at the CURRENT message alone — not the history-laden blob
    # below. Otherwise the keyword router matches URLs/"api"/"GET" from prior turns
    # and fires the agent on trivial replies like "thanks" or "good".
    latest_user_text = task_text

    # Walk prior turns newest-first, keeping condensed lines until the budget is
    # spent, then restore chronological (oldest→newest) order.
    history_lines = []
    used = 0
    for m in reversed(messages[:last_user_idx]):
        role = m.get("role", "")
        if role not in ("user", "assistant"):
            continue
        text = _condense(role, m.get("content", ""))
        if not text:
            continue
        line = f"{'User' if role == 'user' else 'Assistant'}: {text}"
        if used + len(line) > HISTORY_CHAR_BUDGET:
            break
        history_lines.append(line)
        used += len(line)
    history_lines.reverse()

    if history_lines:
        convo = "\n".join(history_lines)
        task_text = (
            "Conversation so far (oldest to newest):\n"
            f"{convo}\n\nCurrent task: {task_text}"
        )

    _log(
        f"history: {len(history_lines)} prior turn(s), {used} chars "
        f"(budget {HISTORY_CHAR_BUDGET}) — exact turns sent to the agent below:"
    )
    for j, ln in enumerate(history_lines):
        _log(f"  history[{j}]: {ln[:240]}")
    _log(f"current task (latest user message): {task_text.rsplit('Current task: ', 1)[-1][:240]!r}")

    # Route: ask the model if this task needs HTTP tool use.
    needs_agent = _route(latest_user_text, agent_base_url, agent_api_key, agent_model_id)

    if not needs_agent:
        _log("router: plain chat (no HTTP needed)")
        _emit({"type": "step", "text": "Composing a reply…"})
        answer = _plain_reply(messages, agent_base_url, agent_api_key, agent_model_id)
        _log(f"plain reply: {len(answer)} chars")
        _emit({"type": "final", "text": answer})
        return

    # Agent path — from here all step events go into <think>.
    _log("router: agent (HTTP tool use needed)")
    _emit({"type": "step", "text": "Choosing a tool…"})

    # ---- Deterministic PDF fast-path -------------------------------------------------
    # A small model cannot reliably re-emit a long, emoji/quote-heavy document as a
    # Python string literal for create_pdf(text="…"), so the CodeAgent call fails before
    # the tool ever runs. When the user clearly wants a PDF, extract their real text and
    # run format → render → upload directly. No model string-echoing, and 1 fewer call.
    # (pdf_api_base / pdf_worker_key are read once near the top of run_agent_chat.)
    if _PDF_INTENT.search(latest_user_text or ""):
        doc_text = _extract_pdf_doc_text(latest_user_text)
        doc_title = _derive_pdf_title(doc_text)
        _log(f"PDF fast-path: title={doc_title!r} doc_chars={len(doc_text)}")

        _emit({"type": "step", "text": "Formatting document…"})
        markdown_text = _format_text_for_pdf(
            doc_text, doc_title, coding_base_url, agent_api_key, coding_model_id
        )

        _emit({"type": "step", "text": "Rendering PDF…"})
        try:
            pdf_bytes = _render_pdf_bytes(markdown_text, doc_title)
        except RuntimeError as e:
            msg = str(e)
            _log(f"PDF fast-path render error: {msg[:200]}")
            _emit({"type": "final", "text": msg})
            return
        except Exception as e:  # noqa: BLE001
            _log(f"PDF fast-path render crash: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            _emit({"type": "final", "text": (
                f"Sorry — I couldn't render that into a PDF ({type(e).__name__}: {str(e)[:160]})."
            )})
            return

        _emit({"type": "step", "text": "Uploading PDF…"})
        try:
            download_url = _pdf_upload_via_api(
                pdf_api_base, pdf_worker_key, f"{doc_title}.pdf", pdf_bytes
            )
        except Exception as e:  # noqa: BLE001
            _log(f"PDF fast-path upload error: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            _emit({"type": "final", "text": (
                f"PDF was created but could not be uploaded: {str(e)[:200]}"
            )})
            return

        out = (
            f"✅ Your PDF \"{doc_title}\" is ready.\n"
            f"Download it here (link valid for a limited time):\n{download_url}"
        )
        _log(f"PDF fast-path ok title={doc_title!r} bytes={len(pdf_bytes)}")
        _emit({"type": "final", "text": out})
        return
    # ---------------------------------------------------------------------------------

    # Prepend explicit tool instructions so small models pick the right tool, never
    # fabricate URLs/responses, and always terminate with final_answer().
    from datetime import datetime as _dt_now
    now_str = _dt_now.now().astimezone().strftime("%A, %d %B %Y, %H:%M %Z")
    tool_hint = (
        "YOU HAVE EXACTLY ONE TURN. Read the TASK above. In a single code block, call "
        "the ONE tool that fits THAT task, then pass its result to final_answer(). "
        "Do not plan multiple steps.\n\n"
        "You have FOUR tools:\n"
        "  1. http_request(method, url, headers='', body='', username='', password='') — "
        "call a SPECIFIC known API/URL.\n"
        "  2. web_search(query) — look up facts about a person, place, thing, or topic "
        "when you do NOT already have a real URL. Returns a summary + source.\n"
        f"  3. get_current_datetime(timezone='') — current date/time ONLY (now: {now_str}). "
        "Use this ONLY when the task explicitly asks for the date or time.\n"
        "  4. create_pdf(text, title='') — make a PDF document from text/data and return a "
        "download link. Use ONLY when the task asks to create/make/generate/export a PDF.\n"
        "\n"
        "http_request RETURN FORMAT: 'HTTP 200\\n{body}' — first line is 'HTTP <code>', body follows.\n"
        "\n"
        "BASIC AUTH — ALWAYS use username= and password=, NEVER construct headers manually:\n"
        "  response = http_request('GET', 'https://api.example.com/data',\n"
        "                          username='alice@example.com', password='secret123')\n"
        "  final_answer(response)\n"
        "The function handles base64 encoding automatically. NEVER write 'Basic ' + anything.\n"
        "\n"
        "BEARER TOKEN — use headers:\n"
        "  response = http_request('GET', url, headers='{\"Authorization\": \"Bearer TOKEN\"}')\n"
        "  final_answer(response)\n"
        "\n"
        "CHOOSING A TOOL (match the TASK, not these examples):\n"
        "- ONLY a date/time question (e.g. 'what is the date today') -> get_current_datetime().\n"
        "- 'who is' / 'what is' / 'tell me about' / a person / place / topic / general "
        "knowledge -> web_search(query).\n"
        "- 'create/make/generate/export a PDF' of some text/data -> create_pdf(text, title).\n"
        "- A specific known API/URL was given -> http_request().\n"
        "\n"
        "RULES:\n"
        "- NEVER invent or guess a URL. If you have no real URL, use web_search() instead. "
        "If nothing works, call final_answer explaining what you need — do NOT make up an answer.\n"
        "- Only report what a tool ACTUALLY returned. Never fabricate a response, body, or status code.\n"
        "- Pass an http_request response DIRECTLY to final_answer — do NOT split, parse, or index it.\n"
        "- If a response starts with 'HTTP 2' it SUCCEEDED — call final_answer immediately.\n"
        "- If a tool returns 'Error:' or HTTP 4xx/5xx, try a DIFFERENT approach, not the same URL.\n"
        "- Do NOT put final_answer outside the code block.\n\n"
    )
    # Lead with the TASK so the weak model anchors on what's actually being asked —
    # not on the tool reference below. (Previously the hint led with the date, and the
    # 3B model treated every message as a date question.)
    task_with_hint = (
        "TASK (answer THIS, choose the tool that fits it):\n"
        f"{task_text}\n\n"
        "----- TOOL REFERENCE -----\n"
        + tool_hint
    )

    # Track URLs that have already failed so we don't retry dead endpoints across steps.
    _failed_urls: set = set()

    # Remember the last tool output so the single-shot fallback + the deterministic
    # final formatting can report exactly what a tool returned (no extra model call).
    _last_obs: dict = {"text": ""}

    @tool
    def http_request(method: str, url: str, headers: str = "", body: str = "",
                     username: str = "", password: str = "") -> str:
        """Perform an HTTP request and return the status code and body preview.

        Args:
            method: HTTP method (GET, POST, PUT, DELETE, etc.)
            url: Full URL to request
            headers: Optional JSON object string of request headers
            body: Optional request body string
            username: Username for Basic Authentication (avoids manual base64 encoding)
            password: Password for Basic Authentication
        """
        import base64 as _b64
        parsed_headers = {}
        # Basic Auth: encode credentials automatically if provided.
        if username or password:
            creds = _b64.b64encode(f"{username}:{password}".encode()).decode()
            parsed_headers["Authorization"] = f"Basic {creds}"
        if headers:
            try:
                parsed_headers.update(json.loads(headers))
            except Exception:  # noqa: BLE001
                pass
        url_key = f"{method.upper()}:{url}"
        if url_key in _failed_urls:
            msg = f"Error: {url} already failed — try a different URL or use Python stdlib."
            _emit({"type": "step", "text": f"HTTP {method.upper()} {url} → (skipped, already failed)"})
            _log(f"http_request skipped (already failed): {url_key}")
            return msg
        result = _http_request_impl(method, url, parsed_headers, body or None)
        if result.startswith("Error:"):
            # Retry once for flaky connections (e.g. gorok tunnels).
            _log(f"http_request retry {method.upper()} {url}")
            result = _http_request_impl(method, url, parsed_headers, body or None)
        if result.startswith("Error:"):
            # Both attempts failed — mark URL as dead so model tries something else.
            _failed_urls.add(url_key)
            result = (
                f"{result}\n"
                "Note: This URL failed twice. Do NOT retry it. "
                "Try a DIFFERENT URL or use Python's datetime/math/etc. module instead."
            )
        status_line = result.split("\n")[0][:150] if result else "no response"
        # Detect HTML pages so the model stops trying to json.loads() a web page.
        body_part = result.split("\n", 1)[1].lstrip().lower() if "\n" in result else ""
        is_html = body_part.startswith("<!doctype html") or body_part.startswith("<html")
        if is_html:
            result = result + (
                "\n[NOTE: This is an HTML web page, not JSON. Do NOT json.loads() it. "
                "Use web_search() for facts, or request a JSON API endpoint instead.]"
            )
        # Append a success tag to 2xx JSON responses so the model stops and calls final_answer.
        elif result and result.startswith("HTTP 2"):
            result = result + "\n[SUCCESS — call final_answer(response) now, do not parse or retry]"
        _emit({"type": "step", "text": f"HTTP {method.upper()} {url} → {status_line}"})
        _log(f"http_request {method.upper()} {url} → {result[:80]}")
        _last_obs["text"] = result
        return result

    @tool
    def get_current_datetime(timezone: str = "") -> str:
        """Return the current date and time. Use for any date/time question — no HTTP needed.

        Args:
            timezone: Optional IANA timezone name (e.g. 'Asia/Bangkok', 'UTC'). Empty = server local time.
        """
        from datetime import datetime as _dtl
        try:
            if timezone:
                from zoneinfo import ZoneInfo
                now = _dtl.now(ZoneInfo(timezone))
            else:
                now = _dtl.now().astimezone()
        except Exception:  # noqa: BLE001
            now = _dtl.now().astimezone()
        out = now.strftime("%A, %d %B %Y, %H:%M:%S %Z")
        _emit({"type": "step", "text": f"Current date/time → {out}"})
        _log(f"get_current_datetime({timezone!r}) → {out}")
        _last_obs["text"] = out
        return out

    @tool
    def web_search(query: str) -> str:
        """Search for factual or encyclopedic information using free no-key sources.

        Use this INSTEAD of guessing a URL when the user asks to 'find' something or asks a
        general-knowledge question. Returns a short summary and a source URL.

        Args:
            query: What to look up, e.g. 'capital of France' or 'productivity day-to-day method'.
        """
        _emit({"type": "step", "text": f"Searching the web → {query[:80]}"})
        result = _web_search_impl(query)
        _log(f"web_search {query[:60]!r} → {result[:80]}")
        _last_obs["text"] = result
        return result

    @tool
    def create_pdf(text: str, title: str = "") -> str:
        """Create a PDF document from text/data and return a download link.

        Use this ONLY when the user explicitly asks to make/create/generate/export a PDF.
        The text is first cleaned into well-formed Markdown, then rendered to a PDF on the
        worker and uploaded to cloud storage; the returned message contains the download URL.

        Args:
            text: The content to put in the PDF (raw text, notes, or data).
            title: Optional document title shown at the top and used in the file name.
        """
        doc_title = (title or "").strip() or "Document"
        # 1) Format the raw text into clean Markdown (extra model call, intentional).
        _emit({"type": "step", "text": "Formatting document…"})
        markdown_text = _format_text_for_pdf(
            text or "", doc_title, coding_base_url, agent_api_key, coding_model_id
        )

        # 2) Render the Markdown to PDF bytes locally (pure-Python xhtml2pdf).
        # Catch EVERYTHING (xhtml2pdf can raise arbitrary errors on exotic glyphs /
        # emoji), so the tool always returns a string and never breaks the single shot.
        _emit({"type": "step", "text": "Rendering PDF…"})
        try:
            pdf_bytes = _render_pdf_bytes(markdown_text, doc_title)
        except RuntimeError as e:
            # Engine missing or a clean render failure — surface the message as-is.
            msg = str(e)
            _log(f"create_pdf render error: {msg[:200]}")
            _last_obs["text"] = msg
            return msg
        except Exception as e:  # noqa: BLE001 — unexpected render crash
            _log(f"create_pdf render crash: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            msg = (
                "Sorry — I couldn't render that text into a PDF "
                f"({type(e).__name__}: {str(e)[:160]}). "
                "Try simpler text without unusual symbols/emoji."
            )
            _last_obs["text"] = msg
            return msg

        # 3) Upload via the API-presigned PUT (no AWS creds on the worker).
        _emit({"type": "step", "text": "Uploading PDF…"})
        file_name = f"{doc_title}.pdf"
        try:
            download_url = _pdf_upload_via_api(
                pdf_api_base, pdf_worker_key, file_name, pdf_bytes
            )
        except Exception as e:  # noqa: BLE001 — network / API errors must not crash the tool
            _log(f"create_pdf upload error: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            msg = f"PDF was created but could not be uploaded: {str(e)[:200]}"
            _last_obs["text"] = msg
            return msg

        out = (
            f"✅ Your PDF \"{doc_title}\" is ready.\n"
            f"Download it here (link valid for a limited time):\n{download_url}"
        )
        _emit({"type": "step", "text": f"PDF ready → {doc_title}.pdf"})
        _log(f"create_pdf ok title={doc_title!r} bytes={len(pdf_bytes)}")
        _last_obs["text"] = out
        return out

    def step_callback(step_log):
        step_num = getattr(step_log, "step_number", "?")

        # Log what was sent to the model (last message in the conversation).
        model_input = getattr(step_log, "model_input_messages", None)
        if model_input:
            last = model_input[-1]
            raw = getattr(last, "content", "")
            if isinstance(raw, list):
                raw = " ".join(p.get("text", "") for p in raw if isinstance(p, dict))
            _log(f"step {step_num} → model input (tail): {str(raw)[:400]}")

        # Log what the model generated (the Python code block).
        model_output = getattr(step_log, "model_output", None)
        if model_output:
            _log(f"step {step_num} ← model output: {str(model_output)[:400]}")

        try:
            text = _summarise_step(step_log)
        except Exception as e:  # noqa: BLE001
            text = f"Step: {e}"
        # Skip emitting if there's nothing beyond the tool name — the tool already
        # emitted its own step event with the actual response above.
        if not text or text.rstrip().endswith("| →"):
            _log(f"step {step_num} (empty obs, skipped)")
            return
        _log(f"step {step_num}: {text[:200]}")
        _emit({"type": "step", "text": text})

    # Wrap the model so we can see EXACTLY what smolagents posts to the model
    # server on every step — including its own system prompt, the task we passed,
    # and any step memory it accumulates. completion_kwargs["messages"] here is the
    # literal messages array sent to /v1/chat/completions.
    class _LoggingModel(OpenAIServerModel):
        def _prepare_completion_kwargs(self, *args, **kwargs):
            ck = super()._prepare_completion_kwargs(*args, **kwargs)
            try:
                msgs = ck.get("messages", []) or []
                _log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
                for i, m in enumerate(msgs):
                    role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
                    content = (
                        m.get("content") if isinstance(m, dict)
                        else getattr(m, "content", "")
                    )
                    if isinstance(content, list):
                        text = " ".join(
                            (c.get("text", "") if isinstance(c, dict) else str(c))
                            for c in content
                        )
                    else:
                        text = str(content)
                    text = text.replace("\n", " ")
                    _log(f"  [{i}] {role} ({len(text)} chars): {text[:600]}")
                _log("=== END MODEL REQUEST ===")
            except Exception as e:  # noqa: BLE001
                _log(f"MODEL REQUEST log error: {e}")
            return ck

    # Single-shot agent: if the one model call doesn't end in final_answer(),
    # smolagents would normally make an EXTRA model call (provide_final_answer) to
    # synthesize one. We override that to return the last tool observation
    # deterministically — keeping the agent to EXACTLY ONE model call, and never
    # corrupting exact tool output (dates/numbers) the way a weak model would.
    class _SingleShotAgent(CodeAgent):
        def provide_final_answer(self, task, *args, **kwargs):
            from smolagents.models import ChatMessage, MessageRole
            text = (_last_obs.get("text") or "").strip()
            if not text:
                text = ("I couldn't complete that in one step. Please rephrase, or give "
                        "a specific URL/API to call.")
            _log(f"single-shot fallback (no model call) → {text[:80]}")
            return ChatMessage(role=MessageRole.ASSISTANT, content=text)

    try:
        model = _LoggingModel(
            model_id=coding_model_id,
            api_base=coding_base_url,
            api_key=agent_api_key,
        )
        agent = _SingleShotAgent(
            tools=[http_request, web_search, get_current_datetime, create_pdf],
            model=model,
            max_steps=max_steps,
            step_callbacks=[step_callback],
            executor_kwargs={"timeout_seconds": 60},
            additional_authorized_imports=["json", "base64", "urllib", "urllib.request", "urllib.error"],
        )
        with contextlib.redirect_stdout(sys.stderr):
            result = agent.run(task_with_hint)
        # Deterministic final formatting — NO summarizer model call. The agent's
        # final_answer (or the single-shot fallback above) already holds exact tool
        # output; we just strip the internal hint tags we appended to tool results so
        # they don't leak to the user. This permanently fixes the date-corruption a
        # weak summarizer model used to introduce.
        _emit({"type": "step", "text": "Composing answer…"})
        final_text = _strip_tool_tags(str(result).strip()) or "[No result]"
        _log(f"done (deterministic, no summarizer call): {len(final_text)} chars")
        _emit({"type": "final", "text": final_text})
    except Exception as e:  # noqa: BLE001
        _log(f"agent error: {e}")
        _emit({"type": "final", "text": f"[Agent error: {e}]"})


def _log(text: str):
    """Emit a log event — worker prints it to console, not forwarded to chat."""
    _emit({"type": "log", "text": text})


def _emit(obj):
    """Write one NDJSON line to the real stdout and flush immediately."""
    _REAL_STDOUT.write(json.dumps(obj) + "\n")
    _REAL_STDOUT.flush()


def main():
    try:
        cfg = json.load(sys.stdin)
    except Exception as e:  # noqa: BLE001
        _emit({"type": "final", "text": f"[Invalid input: {e}]"})
        return
    run_agent_chat(cfg)


if __name__ == "__main__":
    main()
