#!/usr/bin/env python3 """ gonext_agent_chat.py — streaming agent chat for the gonext local worker. Reads on stdin: { "messages": [{"role": "system"|"user"|"assistant", "content": str}, ...], "agentBaseURL": str, "agentApiKey": str, "agentModelId": str, "codingBaseURL": str, # optional: dedicated coding/reasoning model for the "codingModelId": str, # CodeAgent's tool-use loop; empty = reuse agentModelId "tools": ["http_request"], # v1: only http_request "maxSteps": int # default 10 } Emits NDJSON lines on stdout: {"type": "log", "text": "..."} — worker logs to console, not shown in chat {"type": "step", "text": "..."} — shown in area {"type": "final", "text": "..."} — assistant answer """ import contextlib import json import re import sys import traceback import urllib.request import urllib.error # Capture stdout before anything can redirect it. _emit() must always write # to the real fd-1 so the Node worker's readline loop sees NDJSON even while # contextlib.redirect_stdout(sys.stderr) is active inside agent.run(). _REAL_STDOUT = sys.stdout def _ssl_context(): import ssl # Disable cert verification — this agent runs locally against dev tunnels # (gorok, ngrok) whose certs may not chain correctly in Python's SSL store. ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE return ctx def _http_request_impl(method, url, headers=None, body=None, timeout=25): # Merge caller headers on top of sensible defaults. merged = {"User-Agent": "gonext-agent/1.0", "Accept": "*/*"} if headers: merged.update(headers) req = urllib.request.Request(url, method=method.upper(), headers=merged) data = body.encode() if isinstance(body, str) and body else (body or None) try: ctx = _ssl_context() with urllib.request.urlopen(req, data=data, timeout=timeout, context=ctx) as resp: status = resp.status raw = resp.read(4096) snippet = raw.decode("utf-8", errors="replace")[:2000] return f"HTTP {status}\n{snippet}" except urllib.error.HTTPError as e: raw = e.read(512) snippet = raw.decode("utf-8", errors="replace") return f"HTTP {e.code} {e.reason}\n{snippet}" except Exception as e: # noqa: BLE001 return f"Error: {e}" def _get_json(url, timeout=15): """GET a URL and parse the JSON body. Returns dict/list, or None on failure. Used by web_search against free no-key APIs (DuckDuckGo, Wikipedia). Wikipedia requires a descriptive User-Agent, so we send one. """ req = urllib.request.Request(url, method="GET", headers={ "User-Agent": "gonext-agent/1.0 (local API testing assistant)", "Accept": "application/json", }) try: with urllib.request.urlopen(req, timeout=timeout, context=_ssl_context()) as resp: return json.loads(resp.read().decode("utf-8", errors="replace")) except Exception as e: # noqa: BLE001 _log(f"web_search fetch failed {url}: {e}") return None def _web_search_impl(query): """Look up factual info via free no-key JSON APIs (DuckDuckGo + Wikipedia). Returns a short text summary with a source URL, or a 'no results' message. Tries DuckDuckGo Instant Answer first, then falls back to a Wikipedia search + REST summary. Never fabricates — callers should surface 'no results' honestly. """ from urllib.parse import quote q = (query or "").strip() if not q: return "web_search: empty query." # 1) DuckDuckGo Instant Answer API. ddg = _get_json( f"https://api.duckduckgo.com/?q={quote(q)}&format=json&no_html=1&skip_disambig=1" ) if isinstance(ddg, dict): abstract = (ddg.get("AbstractText") or "").strip() if abstract: src = (ddg.get("AbstractURL") or "").strip() return f"{abstract[:1500]}\nSource: {src}" if src else abstract[:1500] # No abstract — use the first related topic that has text. for topic in ddg.get("RelatedTopics") or []: if isinstance(topic, dict) and topic.get("Text"): src = (topic.get("FirstURL") or "").strip() text = topic["Text"][:1500] return f"{text}\nSource: {src}" if src else text # 2) Wikipedia: find the best-matching title, then fetch its summary extract. search = _get_json( "https://en.wikipedia.org/w/api.php?action=query&list=search" f"&srsearch={quote(q)}&format=json&srlimit=1" ) title = "" try: title = search["query"]["search"][0]["title"] except Exception: # noqa: BLE001 title = "" if title: slug = quote(title.replace(" ", "_")) summary = _get_json("https://en.wikipedia.org/api/rest_v1/page/summary/" + slug) if isinstance(summary, dict): extract = (summary.get("extract") or "").strip() if extract: src = ( (summary.get("content_urls") or {}).get("desktop", {}).get("page", "") or f"https://en.wikipedia.org/wiki/{slug}" ) return f"{extract[:1500]}\nSource: {src}" return ( f"No results found for '{q}'. Tell the user you couldn't find this — " "do NOT invent an answer or a URL." ) def _detect_model_id(base_url, api_key=""): """Ask an OpenAI-compatible server which model it serves. Queries GET {base_url}/models and returns the first reported model id. `base_url` already ends with /v1. Returns "" on any failure so callers can fall back. Used when the user supplies a coding-model URL but no model name. """ url = base_url.rstrip("/") + "/models" headers = {"Accept": "application/json"} if api_key and api_key != "local": headers["Authorization"] = f"Bearer {api_key}" req = urllib.request.Request(url, method="GET", headers=headers) try: with urllib.request.urlopen(req, timeout=10, context=_ssl_context()) as resp: payload = json.loads(resp.read().decode("utf-8", errors="replace")) except Exception as e: # noqa: BLE001 _log(f"model detect failed {url}: {e}") return "" data = payload.get("data") if isinstance(payload, dict) else None if isinstance(data, list) and data: first = data[0] if isinstance(first, dict) and isinstance(first.get("id"), str): return first["id"].strip() return "" def _summarise_step(step_log): """Return a short human-readable description of an agent step.""" tool_calls = getattr(step_log, "tool_calls", None) or [] observations = getattr(step_log, "observations", None) error = getattr(step_log, "error", None) step_num = getattr(step_log, "step_number", None) parts = [] for tc in tool_calls: name = getattr(tc, "name", "") args = getattr(tc, "arguments", None) if name == "python_interpreter" and isinstance(args, dict): code = args.get("code", "") # Show the http_request call if present, else first meaningful line m = re.search(r'http_request\s*\(\s*(?:method\s*=\s*)?[\'"]?(\w+)[\'"]?\s*,\s*(?:url\s*=\s*)?[\'"]([^\'"]+)', code) if m: parts.append(f"HTTP {m.group(1).upper()} {m.group(2)}") else: first = next( (l.strip() for l in code.splitlines() if l.strip() and not l.strip().startswith("#")), code[:80], ) parts.append(first[:120]) else: if isinstance(args, dict): method = args.get("method", "") url = args.get("url", "") if method and url: parts.append(f"HTTP {method.upper()} {url}") else: parts.append(f"{name}()") else: parts.append(name or "tool call") if observations: obs = str(observations).strip() # smolagents prefixes output with "Execution logs:" — skip that header line # and show the first line of actual content. lines = [l for l in obs.splitlines() if l.strip() and l.strip() != "Execution logs:"] if lines: parts.append(f"→ {lines[0][:200]}") if error: err = str(error) if "Import of" in err and "not allowed" in err: parts.append("→ (import blocked — using http_request tool instead)") else: parts.append(f"→ Error: {err[:120]}") # No numeric "Step N:" prefix — show only the semantic action. return (" | ".join(parts) if parts else "thinking…") # Keywords that strongly indicate the user wants to make an HTTP/network request, # regardless of what the final output is (time, text, data, etc.). _AGENT_KEYWORDS = re.compile( r"\b(" r"request|fetch|call|hit|ping|curl|wget|GET|POST|PUT|DELETE|PATCH" r"|api|endpoint|url|http|https" r"|external\s+source|external\s+api|external\s+service" r"|web\s+service|rest\s+api|rest\s+call" r"|download|scrape|crawl" r"|search|find|look\s*up|lookup|weather|news|latest|current|today|tonight" r"|date|time|what\s+day|what\s+time" r"|pdf|\.pdf|create\s+a\s+pdf|generate\s+a\s+pdf|make\s+a\s+pdf|export.*pdf|make\s+a\s+document" r")\b", re.IGNORECASE, ) def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool: """Decide if the task needs the HTTP agent (True) or a plain chat reply (False). Fast-path: if the user explicitly mentions network/request keywords → agent. Otherwise: ask the model to classify. """ # Show the routing stage in the web Thinking panel. _emit({"type": "step", "text": "Routing your request…"}) # Fast-path: explicit HTTP/network intent overrides the model classifier. if _AGENT_KEYWORDS.search(task_text): _log(f"router → YES (keyword match)") _emit({"type": "step", "text": "→ Agent mode (needs tools)"}) return True try: from openai import OpenAI client = OpenAI(base_url=base_url, api_key=api_key or "local", max_retries=0, timeout=20) resp = client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": ( "You are a task classifier. Reply YES or NO only, no punctuation.\n" "Answer YES if the task requires fetching data from an external network source " "(URL, API, website, remote server), a web search / factual lookup, or the " "current date or time.\n" "Answer NO only if it is pure conversation, opinion, or simple text the " "assistant can answer directly without looking anything up." )}, {"role": "user", "content": ( f"Does this task require fetching data from an external network source?\n\n" f"Task: {task_text}\n\nYES or NO:" )}, ], max_tokens=3, temperature=0, ) answer = (resp.choices[0].message.content or "").strip().upper() _log(f"router → {answer!r} (model)") is_agent = answer.startswith("Y") _emit({"type": "step", "text": "→ Agent mode (needs tools)" if is_agent else "→ Chat reply"}) return is_agent except Exception as e: # noqa: BLE001 _log(f"router error: {e} — defaulting to agent") _emit({"type": "step", "text": "→ Agent mode (needs tools)"}) return True def _summarize_result(task_text: str, agent_output: str, base_url: str, api_key: str, model_id: str) -> str: """Always call the model to turn the raw agent output into a clean reply.""" _log(f"summarizing agent output ({len(agent_output)} chars)") try: from openai import OpenAI client = OpenAI(base_url=base_url, api_key=api_key or "local", max_retries=0, timeout=30) resp = client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": ( "You are a helpful assistant. An agent ran HTTP tools to answer the user's " "request. Write a clear, concise reply (1-3 sentences) explaining what was " "found. Do not include raw code, tool names, or error traces." )}, {"role": "user", "content": ( f"User asked: {task_text}\n\n" f"Agent result: {agent_output[:2000]}\n\n" "Reply to the user:" )}, ], max_tokens=200, temperature=0.3, ) summary = (resp.choices[0].message.content or "").strip() _log(f"summary: {summary[:120]}") return summary or agent_output except Exception as e: # noqa: BLE001 _log(f"summarize error: {e}") return agent_output def _plain_reply(messages: list, base_url: str, api_key: str, model_id: str) -> str: """Plain chat completion using the full conversation history.""" _THINK_RE_LOCAL = re.compile(r".*?", re.DOTALL | re.IGNORECASE) chat_messages = [{"role": "system", "content": "You are a helpful assistant."}] for m in messages: role = m.get("role", "") content = m.get("content", "") if role not in ("user", "assistant"): continue if role == "assistant": content = _THINK_RE_LOCAL.sub("", content).strip() if not content: continue chat_messages.append({"role": role, "content": content}) try: from openai import OpenAI client = OpenAI(base_url=base_url, api_key=api_key or "local", max_retries=0, timeout=60) resp = client.chat.completions.create( model=model_id, messages=chat_messages, temperature=0.7, max_tokens=512, ) return (resp.choices[0].message.content or "").strip() except Exception as e: # noqa: BLE001 return f"[Error: {e}]" def _strip_tool_tags(text: str) -> str: """Remove the internal hint tags we append to tool output (e.g. '[SUCCESS …]', '[NOTE: …]', 'Note: This URL failed …') so they never leak into the user reply.""" out = [] for ln in (text or "").splitlines(): s = ln.strip() if s.startswith("[SUCCESS") or s.startswith("[NOTE:") or s.startswith("Note: This URL failed"): continue out.append(ln) return "\n".join(out).strip() _PDF_INSTALL_HINT = ( "PDF engine not installed on the worker. On macOS:\n" " brew install pango libffi\n" " python3 -m pip install weasyprint markdown\n" "(optional, for color emoji/flags: brew install --cask font-noto-color-emoji)\n" "then restart the worker. (See the Instructions page in the web app.)" ) # WeasyPrint IS installed, but its system libraries (pango/cairo/gobject) couldn't be # dlopen'd — almost always because the worker process predates the build that injects # the Homebrew library path. A worker restart fixes it. _PDF_LIBS_HINT = ( "PDF engine is installed but its system libraries could not be loaded. " "Please RESTART the local worker — it automatically points WeasyPrint at the " "Homebrew libraries (/opt/homebrew/lib on Apple Silicon, /usr/local/lib on Intel). " "If it still fails, confirm `brew install pango libffi` succeeded." ) # Font stack with color-emoji fallbacks last, so WeasyPrint resolves emoji/flag # codepoints (🏆 🇿🇦) to a color font per-glyph while text uses a clean sans-serif. _PDF_FONT_STACK = ( "'Helvetica Neue', Helvetica, Arial, 'DejaVu Sans', " "'Noto Color Emoji', 'Apple Color Emoji', sans-serif" ) def _render_pdf_bytes(markdown_text: str, title: str = "") -> bytes: """Render Markdown text to PDF bytes using WeasyPrint (renders color emoji/flags). Requires the WeasyPrint system libs (pango/cairo via Homebrew on macOS). Raises RuntimeError with an install hint if the engine/libs aren't available, or on a render failure. The model is expected to pass already-formatted Markdown. """ try: import markdown as _md except Exception: # noqa: BLE001 — markdown pip pkg missing raise RuntimeError(_PDF_INSTALL_HINT) try: from weasyprint import HTML as _HTML # pulls pango/cairo via cffi at import except ModuleNotFoundError: # WeasyPrint itself isn't pip-installed. raise RuntimeError(_PDF_INSTALL_HINT) except Exception as e: # noqa: BLE001 — installed but cffi can't dlopen the system libs _log(f"weasyprint lib load failed: {type(e).__name__}: {str(e)[:200]}") raise RuntimeError(_PDF_LIBS_HINT) body_html = _md.markdown( markdown_text or "", extensions=["extra", "sane_lists", "tables", "nl2br"], ) html = ( "" f"{body_html}" "" ) try: return _HTML(string=html).write_pdf() except Exception as e: # noqa: BLE001 — surface a clean render failure raise RuntimeError(f"PDF rendering failed (WeasyPrint: {type(e).__name__}: {str(e)[:160]}).") def _ssl_context(): """SSL context backed by certifi's CA bundle. macOS Python (python.org / Homebrew) doesn't use the system keychain, so urllib's default verification fails with CERTIFICATE_VERIFY_FAILED. certifi ships with the openai/httpx stack the worker already depends on. """ import ssl try: import certifi return ssl.create_default_context(cafile=certifi.where()) except Exception: # noqa: BLE001 — fall back to system defaults return ssl.create_default_context() def _pdf_upload_via_api(api_base: str, worker_key: str, file_name: str, pdf_bytes: bytes) -> str: """Ask the API to presign an S3 upload, PUT the PDF bytes, return the download URL. Keeps all AWS credentials on the API/Lambda — the worker only holds its worker key. """ import urllib.request as _u import urllib.error as _ue ctx = _ssl_context() base = (api_base or "").rstrip("/") if not base or not worker_key: raise RuntimeError("PDF upload is not available: worker API base/key missing.") # 1) Presign. req = _u.Request( f"{base}/api/worker/pdf-upload-url", data=json.dumps({"fileName": file_name}).encode("utf-8"), headers={ "Content-Type": "application/json", "X-Worker-Key": worker_key, }, method="POST", ) try: with _u.urlopen(req, timeout=30, context=ctx) as resp: ref = json.loads(resp.read().decode("utf-8")) except _ue.HTTPError as e: # noqa: PERF203 detail = e.read().decode("utf-8", "replace")[:300] raise RuntimeError(f"Could not get a PDF upload URL (HTTP {e.code}): {detail}") except _ue.URLError as e: raise RuntimeError( f"Could not reach the API at {base} to presign the upload " f"({getattr(e, 'reason', e)}). Is the worker API deployed/online?" ) put_url = ref.get("putUrl") get_url = ref.get("getUrl") if not put_url or not get_url: raise RuntimeError("PDF upload URL response was incomplete.") # 2) Upload the bytes to the presigned PUT URL. put_req = _u.Request( put_url, data=pdf_bytes, headers={"Content-Type": "application/pdf"}, method="PUT", ) try: with _u.urlopen(put_req, timeout=60, context=ctx) as up: if up.status not in (200, 201, 204): raise RuntimeError(f"S3 upload failed (HTTP {up.status}).") except _ue.HTTPError as e: # noqa: PERF203 detail = e.read().decode("utf-8", "replace")[:200] raise RuntimeError(f"S3 upload failed (HTTP {e.code}): {detail}") except _ue.URLError as e: raise RuntimeError(f"S3 upload could not connect ({getattr(e, 'reason', e)}).") return get_url def _format_text_for_pdf(text: str, title: str, base_url: str, api_key: str, model_id: str) -> str: """Use the model to clean/structure raw text into well-formed Markdown for the PDF. Falls back to the original text (lightly wrapped) if the model call fails, so a PDF is still produced. """ fallback = text or "" if title and not fallback.lstrip().startswith("#"): fallback = f"# {title}\n\n{fallback}" try: from openai import OpenAI client = OpenAI(base_url=base_url, api_key=api_key or "local", max_retries=0, timeout=60) resp = client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": ( "You format raw text/data into clean Markdown for a PDF document. " "Add a single top-level '# Title', sensible headings, bullet lists, " "and tables where appropriate. Do NOT invent facts, do NOT add " "commentary, and preserve all numbers and wording. Output ONLY the " "Markdown — no code fences, no explanations." )}, {"role": "user", "content": ( (f"Title: {title}\n\n" if title else "") + f"Content to format:\n{text}" )}, ], max_tokens=1500, temperature=0.2, ) out = (resp.choices[0].message.content or "").strip() # Strip accidental ```markdown fences. if out.startswith("```"): out = re.sub(r"^```[a-zA-Z]*\n?|\n?```$", "", out).strip() return out or fallback except Exception as e: # noqa: BLE001 _log(f"pdf format error: {e} — using raw text") return fallback # A clear "make a PDF from this" request. Matched against the raw user message so we # can run the PDF pipeline deterministically — a small model cannot reliably echo a # long document back into a Python string literal for the create_pdf() tool call. _PDF_INTENT = re.compile( r"(create|make|generate|export|build|produce|convert|turn)\b[\s\S]{0,40}\bpdf\b" r"|\bpdf\b[\s\S]{0,40}\b(from|for|of|with|out of)\b", re.IGNORECASE, ) def _extract_pdf_doc_text(user_text: str) -> str: """Pull the document body out of a 'create a PDF from this text "…"' message. Prefers a quoted span; otherwise strips the leading instruction clause. """ t = user_text or "" # 1) Largest quoted span (straight, smart, or single quotes). best = "" for pat in (r'"([\s\S]+)"', r"“([\s\S]+)”", r"'([\s\S]+)'"): m = re.search(pat, t) if m and len(m.group(1).strip()) > len(best): best = m.group(1).strip() if best: return best # 2) Everything after a 'text:'/'following:'/'content:' lead-in. m = re.search(r"\b(texts?|following|below|content|data)\b\s*[:\-]?\s*\n?", t, re.IGNORECASE) if m and t[m.end():].strip(): return t[m.end():].strip() # 3) Drop a leading 'please create a pdf (file) from this text:' verb phrase. stripped = re.sub( r"^\s*(please\s+)?(create|make|generate|export|build|produce|convert|turn)\s+" r"(a\s+|an\s+|this\s+|the\s+)?(pdf|document)\s*(file|doc)?\s*" r"(from|for|of|with|using|out of)?\s*(this|the|following)?\s*" r"(text|texts|data|content)?\s*[:\-]?\s*", "", t, flags=re.IGNORECASE, ) return stripped.strip() or t.strip() def _derive_pdf_title(doc_text: str) -> str: """Use the first meaningful line as the document title (strip leading symbols/emoji).""" for line in (doc_text or "").splitlines(): s = line.strip().lstrip("#").strip() if not s: continue # Drop leading non-letter symbols/emoji (keep Latin + Vietnamese letters/digits). s2 = re.sub(r"^[^\wÀ-ỹ]+", "", s).strip() return (s2 or s)[:60] return "Document" def run_agent_chat(cfg): try: from smolagents import CodeAgent, OpenAIServerModel, tool except Exception as e: # noqa: BLE001 _emit({"type": "final", "text": f"[smolagents not installed: {e}]"}) return messages = cfg.get("messages") or [] agent_base_url = cfg.get("agentBaseURL") or "" agent_api_key = cfg.get("agentApiKey") or "local" agent_model_id = cfg.get("agentModelId") or "" # For the create_pdf tool: API base + worker key to request a presigned S3 upload. pdf_api_base = (cfg.get("apiBaseURL") or "").strip() pdf_worker_key = (cfg.get("workerKey") or "").strip() # Optional dedicated coding/reasoning model for the CodeAgent's tool-use loop. # Routing, plain replies and summarization stay on the chat model (better at # natural language); the code model only drives http_request reasoning. raw_coding_base = (cfg.get("codingBaseURL") or "").strip() raw_coding_model = (cfg.get("codingModelId") or "").strip() if raw_coding_base: # A dedicated coding server is configured. If no model name was given, # ask the server which model it serves (mlx_lm.server otherwise tries to # download a mismatched name from HF and 404s). detected = raw_coding_model or _detect_model_id(raw_coding_base, agent_api_key) if detected: coding_base_url = raw_coding_base coding_model_id = detected else: _log( f"coding model id unresolved for {raw_coding_base!r}; " "falling back to chat model" ) coding_base_url = agent_base_url coding_model_id = agent_model_id else: coding_base_url = agent_base_url coding_model_id = agent_model_id # Strict single-shot: exactly ONE agent model call per message. The single code # block must call a tool AND final_answer together — no multi-step ReAct loop. # If the model fails to call final_answer, the max-steps fallback below returns # the last tool observation deterministically (no extra model call). max_steps = 1 _log( f"start model={agent_model_id!r} base={agent_base_url!r} " f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}" ) # Build the task from the conversation history. We include the FULL conversation # (both user AND assistant turns) so the agent remembers what it already did — # e.g. data it fetched on a previous turn. Assistant turns are condensed (drop # reasoning; clip long raw HTTP dumps), and we keep the most recent turns # within a character budget so we never overflow the model's context window. # ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving # ample room for smolagents' own system prompt + step memory (HTTP observations). HISTORY_CHAR_BUDGET = 8000 think_re = re.compile(r".*?", re.DOTALL | re.IGNORECASE) def _condense(role, content): text = (content or "").strip() if role == "assistant": text = think_re.sub("", text).strip() # Raw HTTP dumps add little conversational value — keep only a snippet. if text.startswith("HTTP "): text = text[:500] return text # The latest user message is the current task; everything before it is history. last_user_idx = -1 for i, m in enumerate(messages): if m.get("role") == "user": last_user_idx = i if last_user_idx < 0: _emit({"type": "final", "text": "[No user message found in history]"}) return task_text = (messages[last_user_idx].get("content") or "").strip() # Routing must look at the CURRENT message alone — not the history-laden blob # below. Otherwise the keyword router matches URLs/"api"/"GET" from prior turns # and fires the agent on trivial replies like "thanks" or "good". latest_user_text = task_text # Walk prior turns newest-first, keeping condensed lines until the budget is # spent, then restore chronological (oldest→newest) order. history_lines = [] used = 0 for m in reversed(messages[:last_user_idx]): role = m.get("role", "") if role not in ("user", "assistant"): continue text = _condense(role, m.get("content", "")) if not text: continue line = f"{'User' if role == 'user' else 'Assistant'}: {text}" if used + len(line) > HISTORY_CHAR_BUDGET: break history_lines.append(line) used += len(line) history_lines.reverse() if history_lines: convo = "\n".join(history_lines) task_text = ( "Conversation so far (oldest to newest):\n" f"{convo}\n\nCurrent task: {task_text}" ) _log( f"history: {len(history_lines)} prior turn(s), {used} chars " f"(budget {HISTORY_CHAR_BUDGET}) — exact turns sent to the agent below:" ) for j, ln in enumerate(history_lines): _log(f" history[{j}]: {ln[:240]}") _log(f"current task (latest user message): {task_text.rsplit('Current task: ', 1)[-1][:240]!r}") # Route: ask the model if this task needs HTTP tool use. needs_agent = _route(latest_user_text, agent_base_url, agent_api_key, agent_model_id) if not needs_agent: _log("router: plain chat (no HTTP needed)") _emit({"type": "step", "text": "Composing a reply…"}) answer = _plain_reply(messages, agent_base_url, agent_api_key, agent_model_id) _log(f"plain reply: {len(answer)} chars") _emit({"type": "final", "text": answer}) return # Agent path — from here all step events go into . _log("router: agent (HTTP tool use needed)") _emit({"type": "step", "text": "Choosing a tool…"}) # ---- Deterministic PDF fast-path ------------------------------------------------- # A small model cannot reliably re-emit a long, emoji/quote-heavy document as a # Python string literal for create_pdf(text="…"), so the CodeAgent call fails before # the tool ever runs. When the user clearly wants a PDF, extract their real text and # run format → render → upload directly. No model string-echoing, and 1 fewer call. # (pdf_api_base / pdf_worker_key are read once near the top of run_agent_chat.) if _PDF_INTENT.search(latest_user_text or ""): doc_text = _extract_pdf_doc_text(latest_user_text) doc_title = _derive_pdf_title(doc_text) _log(f"PDF fast-path: title={doc_title!r} doc_chars={len(doc_text)}") _emit({"type": "step", "text": "Formatting document…"}) markdown_text = _format_text_for_pdf( doc_text, doc_title, coding_base_url, agent_api_key, coding_model_id ) _emit({"type": "step", "text": "Rendering PDF…"}) try: pdf_bytes = _render_pdf_bytes(markdown_text, doc_title) except RuntimeError as e: msg = str(e) _log(f"PDF fast-path render error: {msg[:200]}") _emit({"type": "final", "text": msg}) return except Exception as e: # noqa: BLE001 _log(f"PDF fast-path render crash: {type(e).__name__}: {e!r}\n{traceback.format_exc()}") _emit({"type": "final", "text": ( f"Sorry — I couldn't render that into a PDF ({type(e).__name__}: {str(e)[:160]})." )}) return _emit({"type": "step", "text": "Uploading PDF…"}) try: download_url = _pdf_upload_via_api( pdf_api_base, pdf_worker_key, f"{doc_title}.pdf", pdf_bytes ) except Exception as e: # noqa: BLE001 _log(f"PDF fast-path upload error: {type(e).__name__}: {e!r}\n{traceback.format_exc()}") _emit({"type": "final", "text": ( f"PDF was created but could not be uploaded: {str(e)[:200]}" )}) return out = ( f"✅ Your PDF \"{doc_title}\" is ready.\n" f"Download it here (link valid for a limited time):\n{download_url}" ) _log(f"PDF fast-path ok title={doc_title!r} bytes={len(pdf_bytes)}") _emit({"type": "final", "text": out}) return # --------------------------------------------------------------------------------- # Prepend explicit tool instructions so small models pick the right tool, never # fabricate URLs/responses, and always terminate with final_answer(). from datetime import datetime as _dt_now now_str = _dt_now.now().astimezone().strftime("%A, %d %B %Y, %H:%M %Z") tool_hint = ( "YOU HAVE EXACTLY ONE TURN. Read the TASK above. In a single code block, call " "the ONE tool that fits THAT task, then pass its result to final_answer(). " "Do not plan multiple steps.\n\n" "You have FOUR tools:\n" " 1. http_request(method, url, headers='', body='', username='', password='') — " "call a SPECIFIC known API/URL.\n" " 2. web_search(query) — look up facts about a person, place, thing, or topic " "when you do NOT already have a real URL. Returns a summary + source.\n" f" 3. get_current_datetime(timezone='') — current date/time ONLY (now: {now_str}). " "Use this ONLY when the task explicitly asks for the date or time.\n" " 4. create_pdf(text, title='') — make a PDF document from text/data and return a " "download link. Use ONLY when the task asks to create/make/generate/export a PDF.\n" "\n" "http_request RETURN FORMAT: 'HTTP 200\\n{body}' — first line is 'HTTP

', body follows.\n"
        "\n"
        "BASIC AUTH — ALWAYS use username= and password=, NEVER construct headers manually:\n"
        "  response = http_request('GET', 'https://api.example.com/data',\n"
        "                          username='alice@example.com', password='secret123')\n"
        "  final_answer(response)\n"
        "The function handles base64 encoding automatically. NEVER write 'Basic ' + anything.\n"
        "\n"
        "BEARER TOKEN — use headers:\n"
        "  response = http_request('GET', url, headers='{\"Authorization\": \"Bearer TOKEN\"}')\n"
        "  final_answer(response)\n"
        "\n"
        "CHOOSING A TOOL (match the TASK, not these examples):\n"
        "- ONLY a date/time question (e.g. 'what is the date today') -> get_current_datetime().\n"
        "- 'who is' / 'what is' / 'tell me about' / a person / place / topic / general "
        "knowledge -> web_search(query).\n"
        "- 'create/make/generate/export a PDF' of some text/data -> create_pdf(text, title).\n"
        "- A specific known API/URL was given -> http_request().\n"
        "\n"
        "RULES:\n"
        "- NEVER invent or guess a URL. If you have no real URL, use web_search() instead. "
        "If nothing works, call final_answer explaining what you need — do NOT make up an answer.\n"
        "- Only report what a tool ACTUALLY returned. Never fabricate a response, body, or status code.\n"
        "- Pass an http_request response DIRECTLY to final_answer — do NOT split, parse, or index it.\n"
        "- If a response starts with 'HTTP 2' it SUCCEEDED — call final_answer immediately.\n"
        "- If a tool returns 'Error:' or HTTP 4xx/5xx, try a DIFFERENT approach, not the same URL.\n"
        "- Do NOT put final_answer outside the code block.\n\n"
    )
    # Lead with the TASK so the weak model anchors on what's actually being asked —
    # not on the tool reference below. (Previously the hint led with the date, and the
    # 3B model treated every message as a date question.)
    task_with_hint = (
        "TASK (answer THIS, choose the tool that fits it):\n"
        f"{task_text}\n\n"
        "----- TOOL REFERENCE -----\n"
        + tool_hint
    )

    # Track URLs that have already failed so we don't retry dead endpoints across steps.
    _failed_urls: set = set()

    # Remember the last tool output so the single-shot fallback + the deterministic
    # final formatting can report exactly what a tool returned (no extra model call).
    _last_obs: dict = {"text": ""}

    @tool
    def http_request(method: str, url: str, headers: str = "", body: str = "",
                     username: str = "", password: str = "") -> str:
        """Perform an HTTP request and return the status code and body preview.

        Args:
            method: HTTP method (GET, POST, PUT, DELETE, etc.)
            url: Full URL to request
            headers: Optional JSON object string of request headers
            body: Optional request body string
            username: Username for Basic Authentication (avoids manual base64 encoding)
            password: Password for Basic Authentication
        """
        import base64 as _b64
        parsed_headers = {}
        # Basic Auth: encode credentials automatically if provided.
        if username or password:
            creds = _b64.b64encode(f"{username}:{password}".encode()).decode()
            parsed_headers["Authorization"] = f"Basic {creds}"
        if headers:
            try:
                parsed_headers.update(json.loads(headers))
            except Exception:  # noqa: BLE001
                pass
        url_key = f"{method.upper()}:{url}"
        if url_key in _failed_urls:
            msg = f"Error: {url} already failed — try a different URL or use Python stdlib."
            _emit({"type": "step", "text": f"HTTP {method.upper()} {url} → (skipped, already failed)"})
            _log(f"http_request skipped (already failed): {url_key}")
            return msg
        result = _http_request_impl(method, url, parsed_headers, body or None)
        if result.startswith("Error:"):
            # Retry once for flaky connections (e.g. gorok tunnels).
            _log(f"http_request retry {method.upper()} {url}")
            result = _http_request_impl(method, url, parsed_headers, body or None)
        if result.startswith("Error:"):
            # Both attempts failed — mark URL as dead so model tries something else.
            _failed_urls.add(url_key)
            result = (
                f"{result}\n"
                "Note: This URL failed twice. Do NOT retry it. "
                "Try a DIFFERENT URL or use Python's datetime/math/etc. module instead."
            )
        status_line = result.split("\n")[0][:150] if result else "no response"
        # Detect HTML pages so the model stops trying to json.loads() a web page.
        body_part = result.split("\n", 1)[1].lstrip().lower() if "\n" in result else ""
        is_html = body_part.startswith(" str:
        """Return the current date and time. Use for any date/time question — no HTTP needed.

        Args:
            timezone: Optional IANA timezone name (e.g. 'Asia/Bangkok', 'UTC'). Empty = server local time.
        """
        from datetime import datetime as _dtl
        try:
            if timezone:
                from zoneinfo import ZoneInfo
                now = _dtl.now(ZoneInfo(timezone))
            else:
                now = _dtl.now().astimezone()
        except Exception:  # noqa: BLE001
            now = _dtl.now().astimezone()
        out = now.strftime("%A, %d %B %Y, %H:%M:%S %Z")
        _emit({"type": "step", "text": f"Current date/time → {out}"})
        _log(f"get_current_datetime({timezone!r}) → {out}")
        _last_obs["text"] = out
        return out

    @tool
    def web_search(query: str) -> str:
        """Search for factual or encyclopedic information using free no-key sources.

        Use this INSTEAD of guessing a URL when the user asks to 'find' something or asks a
        general-knowledge question. Returns a short summary and a source URL.

        Args:
            query: What to look up, e.g. 'capital of France' or 'productivity day-to-day method'.
        """
        _emit({"type": "step", "text": f"Searching the web → {query[:80]}"})
        result = _web_search_impl(query)
        _log(f"web_search {query[:60]!r} → {result[:80]}")
        _last_obs["text"] = result
        return result

    @tool
    def create_pdf(text: str, title: str = "") -> str:
        """Create a PDF document from text/data and return a download link.

        Use this ONLY when the user explicitly asks to make/create/generate/export a PDF.
        The text is first cleaned into well-formed Markdown, then rendered to a PDF on the
        worker and uploaded to cloud storage; the returned message contains the download URL.

        Args:
            text: The content to put in the PDF (raw text, notes, or data).
            title: Optional document title shown at the top and used in the file name.
        """
        doc_title = (title or "").strip() or "Document"
        # 1) Format the raw text into clean Markdown (extra model call, intentional).
        _emit({"type": "step", "text": "Formatting document…"})
        markdown_text = _format_text_for_pdf(
            text or "", doc_title, coding_base_url, agent_api_key, coding_model_id
        )

        # 2) Render the Markdown to PDF bytes locally (pure-Python xhtml2pdf).
        # Catch EVERYTHING (xhtml2pdf can raise arbitrary errors on exotic glyphs /
        # emoji), so the tool always returns a string and never breaks the single shot.
        _emit({"type": "step", "text": "Rendering PDF…"})
        try:
            pdf_bytes = _render_pdf_bytes(markdown_text, doc_title)
        except RuntimeError as e:
            # Engine missing or a clean render failure — surface the message as-is.
            msg = str(e)
            _log(f"create_pdf render error: {msg[:200]}")
            _last_obs["text"] = msg
            return msg
        except Exception as e:  # noqa: BLE001 — unexpected render crash
            _log(f"create_pdf render crash: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            msg = (
                "Sorry — I couldn't render that text into a PDF "
                f"({type(e).__name__}: {str(e)[:160]}). "
                "Try simpler text without unusual symbols/emoji."
            )
            _last_obs["text"] = msg
            return msg

        # 3) Upload via the API-presigned PUT (no AWS creds on the worker).
        _emit({"type": "step", "text": "Uploading PDF…"})
        file_name = f"{doc_title}.pdf"
        try:
            download_url = _pdf_upload_via_api(
                pdf_api_base, pdf_worker_key, file_name, pdf_bytes
            )
        except Exception as e:  # noqa: BLE001 — network / API errors must not crash the tool
            _log(f"create_pdf upload error: {type(e).__name__}: {e!r}\n{traceback.format_exc()}")
            msg = f"PDF was created but could not be uploaded: {str(e)[:200]}"
            _last_obs["text"] = msg
            return msg

        out = (
            f"✅ Your PDF \"{doc_title}\" is ready.\n"
            f"Download it here (link valid for a limited time):\n{download_url}"
        )
        _emit({"type": "step", "text": f"PDF ready → {doc_title}.pdf"})
        _log(f"create_pdf ok title={doc_title!r} bytes={len(pdf_bytes)}")
        _last_obs["text"] = out
        return out

    def step_callback(step_log):
        step_num = getattr(step_log, "step_number", "?")

        # Log what was sent to the model (last message in the conversation).
        model_input = getattr(step_log, "model_input_messages", None)
        if model_input:
            last = model_input[-1]
            raw = getattr(last, "content", "")
            if isinstance(raw, list):
                raw = " ".join(p.get("text", "") for p in raw if isinstance(p, dict))
            _log(f"step {step_num} → model input (tail): {str(raw)[:400]}")

        # Log what the model generated (the Python code block).
        model_output = getattr(step_log, "model_output", None)
        if model_output:
            _log(f"step {step_num} ← model output: {str(model_output)[:400]}")

        try:
            text = _summarise_step(step_log)
        except Exception as e:  # noqa: BLE001
            text = f"Step: {e}"
        # Skip emitting if there's nothing beyond the tool name — the tool already
        # emitted its own step event with the actual response above.
        if not text or text.rstrip().endswith("| →"):
            _log(f"step {step_num} (empty obs, skipped)")
            return
        _log(f"step {step_num}: {text[:200]}")
        _emit({"type": "step", "text": text})

    # Wrap the model so we can see EXACTLY what smolagents posts to the model
    # server on every step — including its own system prompt, the task we passed,
    # and any step memory it accumulates. completion_kwargs["messages"] here is the
    # literal messages array sent to /v1/chat/completions.
    class _LoggingModel(OpenAIServerModel):
        def _prepare_completion_kwargs(self, *args, **kwargs):
            ck = super()._prepare_completion_kwargs(*args, **kwargs)
            try:
                msgs = ck.get("messages", []) or []
                _log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
                for i, m in enumerate(msgs):
                    role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
                    content = (
                        m.get("content") if isinstance(m, dict)
                        else getattr(m, "content", "")
                    )
                    if isinstance(content, list):
                        text = " ".join(
                            (c.get("text", "") if isinstance(c, dict) else str(c))
                            for c in content
                        )
                    else:
                        text = str(content)
                    text = text.replace("\n", " ")
                    _log(f"  [{i}] {role} ({len(text)} chars): {text[:600]}")
                _log("=== END MODEL REQUEST ===")
            except Exception as e:  # noqa: BLE001
                _log(f"MODEL REQUEST log error: {e}")
            return ck

    # Single-shot agent: if the one model call doesn't end in final_answer(),
    # smolagents would normally make an EXTRA model call (provide_final_answer) to
    # synthesize one. We override that to return the last tool observation
    # deterministically — keeping the agent to EXACTLY ONE model call, and never
    # corrupting exact tool output (dates/numbers) the way a weak model would.
    class _SingleShotAgent(CodeAgent):
        def provide_final_answer(self, task, *args, **kwargs):
            from smolagents.models import ChatMessage, MessageRole
            text = (_last_obs.get("text") or "").strip()
            if not text:
                text = ("I couldn't complete that in one step. Please rephrase, or give "
                        "a specific URL/API to call.")
            _log(f"single-shot fallback (no model call) → {text[:80]}")
            return ChatMessage(role=MessageRole.ASSISTANT, content=text)

    try:
        model = _LoggingModel(
            model_id=coding_model_id,
            api_base=coding_base_url,
            api_key=agent_api_key,
        )
        agent = _SingleShotAgent(
            tools=[http_request, web_search, get_current_datetime, create_pdf],
            model=model,
            max_steps=max_steps,
            step_callbacks=[step_callback],
            executor_kwargs={"timeout_seconds": 60},
            additional_authorized_imports=["json", "base64", "urllib", "urllib.request", "urllib.error"],
        )
        with contextlib.redirect_stdout(sys.stderr):
            result = agent.run(task_with_hint)
        # Deterministic final formatting — NO summarizer model call. The agent's
        # final_answer (or the single-shot fallback above) already holds exact tool
        # output; we just strip the internal hint tags we appended to tool results so
        # they don't leak to the user. This permanently fixes the date-corruption a
        # weak summarizer model used to introduce.
        _emit({"type": "step", "text": "Composing answer…"})
        final_text = _strip_tool_tags(str(result).strip()) or "[No result]"
        _log(f"done (deterministic, no summarizer call): {len(final_text)} chars")
        _emit({"type": "final", "text": final_text})
    except Exception as e:  # noqa: BLE001
        _log(f"agent error: {e}")
        _emit({"type": "final", "text": f"[Agent error: {e}]"})


def _log(text: str):
    """Emit a log event — worker prints it to console, not forwarded to chat."""
    _emit({"type": "log", "text": text})


def _emit(obj):
    """Write one NDJSON line to the real stdout and flush immediately."""
    _REAL_STDOUT.write(json.dumps(obj) + "\n")
    _REAL_STDOUT.flush()


def main():
    try:
        cfg = json.load(sys.stdin)
    except Exception as e:  # noqa: BLE001
        _emit({"type": "final", "text": f"[Invalid input: {e}]"})
        return
    run_agent_chat(cfg)


if __name__ == "__main__":
    main()