#!/usr/bin/env bash
#
# Memtrace UserPromptSubmit hook for Claude Code.
#
# Fires once per user turn (NOT per tool call). Reads the user's
# prompt; if it looks like a code-discovery question and the
# Memtrace daemon is reachable, injects an `additionalContext`
# nudge so Claude considers Memtrace MCP tools BEFORE falling back
# to Read/Grep/Glob.
#
# Why UserPromptSubmit instead of PreToolUse-on-Read|Grep|Glob:
#   - fires once per turn, not per tool call → no per-Read latency
#   - gets the user's actual prompt → can decide based on intent,
#     not on which file the model chose to grep
#   - non-blocking → just adds context, never denies a tool call
#
# Per-session debounce (round-2 G4 / agent-I):
#   The hook still fires once per user turn, but in a long Orbit-
#   style automated session that's dozens of fires per prompt.
#   Each fire pings the daemon health endpoint — cheap individually,
#   death-by-thousand-cuts in aggregate. We add a per-session lock
#   file at $HOME/.memtrace/hook-debounce/<session_id>.lock; if its
#   mtime is within MEMTRACE_HOOK_DEBOUNCE_SECS (default 120s) we
#   short-circuit to a no-op-but-well-formed JSON output and skip
#   the daemon probe entirely.
#
# Exit codes:
#   0  : success (stdout is parsed for hook output)
#   2  : would block the prompt (we never want this)
#
# Hook output JSON shape (Claude Code UserPromptSubmit validator):
#   { "hookSpecificOutput": { "hookEventName": "UserPromptSubmit",
#       "additionalContext": "..." } }
# To inject nothing: exit 0 with empty stdout (or print "{}").
#
# Override:
#   MEMTRACE_HOOK_MODE=off              → unconditional no-op (skips lock too)
#   MEMTRACE_HEALTH_URL=...             → custom health endpoint (default 3030)
#   MEMTRACE_HOOK_DEBOUNCE_SECS=120     → debounce window seconds (0 disables)
#   MEMTRACE_HOOK_DEBOUNCE_DIR=...      → override lock dir (default $HOME/.memtrace/hook-debounce)
#   CLAUDE_SESSION_ID / CLAUDE_CONVERSATION_ID → session id sources
#
set -euo pipefail

mode="${MEMTRACE_HOOK_MODE:-advisory}"
[[ "$mode" == "off" ]] && exit 0

# ── Session-id resolution (G4.2) ────────────────────────────────────
#
# Source priority:
#   1. CLAUDE_SESSION_ID  env (preferred — Claude Code may set this)
#   2. CLAUDE_CONVERSATION_ID env (alternate naming)
#   3. fallback: stable hash of $PPID + parent process start time
#      (so the same parent process tree always resolves to the same
#       id, but a new shell parent gets its own id).
#
# Output is sanitised (only [A-Za-z0-9_-]) so it's safe to use as a
# filename component on every supported OS.
resolve_session_id() {
    local raw=""
    if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then
        raw="$CLAUDE_SESSION_ID"
    elif [[ -n "${CLAUDE_CONVERSATION_ID:-}" ]]; then
        raw="$CLAUDE_CONVERSATION_ID"
    else
        # Hash of PPID + parent start-time. ps prints lstart for
        # the parent process; combined with PPID this is stable
        # within the parent's lifetime and changes when a new
        # parent process is spawned.
        local ppid="${PPID:-0}"
        local pstart=""
        pstart="$(ps -o lstart= -p "$ppid" 2>/dev/null || true)"
        # Fold to a short hash so the lock file name stays short.
        # md5/shasum availability varies; prefer shasum (POSIX-ish),
        # fall back to a python one-liner, then to the raw string.
        local input="ppid=${ppid};start=${pstart}"
        if command -v shasum >/dev/null 2>&1; then
            raw="$(printf '%s' "$input" | shasum -a 1 | awk '{print $1}')"
        elif command -v sha1sum >/dev/null 2>&1; then
            raw="$(printf '%s' "$input" | sha1sum | awk '{print $1}')"
        elif command -v python3 >/dev/null 2>&1; then
            raw="$(printf '%s' "$input" | python3 -c '
import hashlib, sys
print(hashlib.sha1(sys.stdin.buffer.read()).hexdigest())
' 2>/dev/null || true)"
        else
            raw="$input"
        fi
    fi

    # Sanitise for filesystem safety: keep only [A-Za-z0-9_-], replace
    # everything else with `_`. Also collapse to at most 128 chars so
    # we don't blow path-length limits.
    local cleaned
    cleaned="$(printf '%s' "$raw" | tr -c 'A-Za-z0-9_-' '_' | cut -c1-128)"
    if [[ -z "$cleaned" ]]; then
        cleaned="unknown_session"
    fi
    printf '%s' "$cleaned"
}

# ── Debounce window parsing ─────────────────────────────────────────
#
# Validates that MEMTRACE_HOOK_DEBOUNCE_SECS is a non-negative
# integer. Anything malformed falls back to 120s. A literal `0`
# disables debounce entirely (every fire proceeds).
parse_debounce_secs() {
    local raw="${MEMTRACE_HOOK_DEBOUNCE_SECS:-120}"
    if [[ "$raw" =~ ^[0-9]+$ ]]; then
        printf '%s' "$raw"
    else
        printf '%s' "120"
    fi
}

# ── Orphan cleanup (G4.5) ───────────────────────────────────────────
#
# Opportunistic + bounded: at hook entry, remove lock files older
# than 24h, but cap how many we touch per fire so we don't stat
# thousands of files on every prompt. `find ... -mtime +1 -delete`
# is the portable form (BSD + GNU find both support it). We pipe
# through `head -n N` to bound the work.
ORPHAN_CLEANUP_MAX="${MEMTRACE_HOOK_ORPHAN_CLEANUP_MAX:-32}"
orphan_cleanup() {
    local dir="$1"
    local max="$2"
    [[ -d "$dir" ]] || return 0
    # `-mmin +1440` matches files modified more than 1440 minutes
    # (24h) ago. We deliberately use mmin (not -mtime +1) because
    # BSD `find` truncates `-mtime` to whole days then strict-
    # compares — so a 25h-old file does NOT match `-mtime +1`. The
    # mmin form is unambiguous on both BSD (macOS) and GNU (Linux).
    # We list candidates, head-bound them, then rm. This avoids
    # walking a giant directory linearly on every fire.
    local f
    while IFS= read -r f; do
        [[ -z "$f" ]] && continue
        rm -f -- "$f" 2>/dev/null || true
    done < <(find "$dir" -maxdepth 1 -type f -name '*.lock' -mmin +1440 2>/dev/null | head -n "$max")
}

# ── Lock-file gate ──────────────────────────────────────────────────
LOCK_DIR="${MEMTRACE_HOOK_DEBOUNCE_DIR:-${HOME:-/tmp}/.memtrace/hook-debounce}"
DEBOUNCE_SECS="$(parse_debounce_secs)"

# Ensure lock dir exists. If we can't create it (read-only home,
# permission denied, etc.) the hook still works — we just skip the
# debounce gate this fire.
mkdir -p "$LOCK_DIR" 2>/dev/null || true

# Cleanup orphans BEFORE evaluating the gate so a stale lock that's
# been orphaned for 24h+ doesn't accidentally suppress the fire.
if [[ -d "$LOCK_DIR" ]]; then
    orphan_cleanup "$LOCK_DIR" "$ORPHAN_CLEANUP_MAX"
fi

SESSION_ID="$(resolve_session_id)"
LOCK_FILE="$LOCK_DIR/$SESSION_ID.lock"

# Debounce gate: if lock exists and is fresh, short-circuit.
# DEBOUNCE_SECS==0 is the explicit disable knob; we skip the gate
# entirely so every fire proceeds (useful for debugging & tests).
if (( DEBOUNCE_SECS > 0 )) && [[ -f "$LOCK_FILE" ]]; then
    NOW=$(date +%s)
    # `stat -f %m` is BSD/macOS, `stat -c %Y` is GNU/Linux. Try
    # both; if neither works (extremely unusual) we treat the lock
    # as fresh enough to suppress, on the principle that "we just
    # touched it" is the safer default than "spam the daemon".
    LAST="$(stat -f %m "$LOCK_FILE" 2>/dev/null || stat -c %Y "$LOCK_FILE" 2>/dev/null || printf '%s' "$NOW")"
    if [[ "$LAST" =~ ^[0-9]+$ ]]; then
        AGE=$((NOW - LAST))
        if (( AGE < DEBOUNCE_SECS )); then
            # Within debounce window → emit a well-formed no-op
            # hook output and exit. We do NOT probe the daemon.
            cat <<'EOF'
{
  "hookSpecificOutput": {
    "hookEventName": "UserPromptSubmit",
    "additionalContext": ""
  }
}
EOF
            exit 0
        fi
    fi
fi

# Outside the window (or first fire): touch the lock and proceed.
# `touch` creates the file if missing and updates mtime if present;
# this is the canonical "I just fired" signal for the next gate
# evaluation in the same session.
touch "$LOCK_FILE" 2>/dev/null || true

# ── Daemon liveness (portable: works on macOS/Linux/Windows-WSL) ──
#
# We use the Memtrace UI's status endpoint instead of `pgrep` so
# Windows + restricted-shell environments work the same way.
# 1-second timeout — must not slow Claude down.
health_url="${MEMTRACE_HEALTH_URL:-http://localhost:3030/api/health}"
if ! curl -sf --max-time 1 "$health_url" >/dev/null 2>&1; then
    # Daemon unreachable: silent no-op. We never inject memtrace
    # nudges when memtrace itself isn't running.
    exit 0
fi

# ── Read prompt from stdin ──────────────────────────────────────────
input="$(cat)"

# Use python3 for JSON parsing — every macOS/Linux/Windows-with-Python
# has it; jq is more concise but less universal.
prompt="$(printf '%s' "$input" | python3 -c '
import json, sys
try:
    obj = json.load(sys.stdin)
    print(obj.get("prompt", ""), end="")
except Exception:
    pass
' 2>/dev/null || true)"

# If the prompt is empty or unparseable, no-op.
[[ -z "$prompt" ]] && exit 0

# ── Match code-discovery intent ─────────────────────────────────────
#
# The match list is intentionally generous on the "ask Memtrace" side
# and is anchored against directive verbs and possessive phrases that
# a real user prompt looks like. The agent's planner also uses these
# (e.g. "trace through", "find the function that") so this catches
# both human-typed and agent-internal phrasings.
shopt -s nocasematch
should_nudge=0
case "$prompt" in
    *"where is"*|*"where's"*|*"how does"*|*"how is"*) should_nudge=1 ;;
    *"what calls"*|*"who calls"*|*"callers of"*|*"callees of"*) should_nudge=1 ;;
    *"why does"*|*"why is"*|*"why was"*) should_nudge=1 ;;
    *"find the function"*|*"find the class"*|*"find the type"*) should_nudge=1 ;;
    *"trace through"*|*"trace this"*|*"trace the"*) should_nudge=1 ;;
    *"investigate"*|*"debug"*|*"diagnose"*) should_nudge=1 ;;
    *"explain this"*|*"explain the code"*|*"understand this"*) should_nudge=1 ;;
    *"audit"*|*"review"*|*"refactor"*) should_nudge=1 ;;
    *"fix bug"*|*"fix the bug"*|*"broken"*|*"failing"*) should_nudge=1 ;;
    *"impact of"*|*"what breaks if"*|*"safe to remove"*|*"safe to rename"*) should_nudge=1 ;;
    *"locate"*|*"look up"*|*"search for "*) should_nudge=1 ;;
    *"recent changes"*|*"what changed"*|*"evolution of"*) should_nudge=1 ;;
    *"call graph"*|*"dependency"*|*"depend on"*|*"dependencies of"*) should_nudge=1 ;;
esac
shopt -u nocasematch

[[ "$should_nudge" -eq 0 ]] && exit 0

# ── Emit the nudge ──────────────────────────────────────────────────
#
# Per Anthropic's hook docs, UserPromptSubmit accepts a top-level
# `decision: continue` (don't block) plus `additionalContext` (which
# is injected as a system-style reminder Claude reads alongside the
# user's prompt). The wording is concise on purpose — this is added
# to context for every matching prompt, so token cost is real.
cat <<'EOF'
{
  "hookSpecificOutput": {
    "hookEventName": "UserPromptSubmit",
    "additionalContext": "Memtrace is active for this repository. For this code-discovery question, prefer the Memtrace MCP tools FIRST - `mcp__memtrace__find_code` for natural-language search, `mcp__memtrace__find_symbol` for exact lookup, `mcp__memtrace__get_symbol_context` for callers/callees, `mcp__memtrace__get_impact` for blast radius. They return exact file:start_line:end_line in one round-trip. Fall back to Read/Grep/Glob only for: (a) config files (.env, package.json, README, raw JSON/YAML/TOML), (b) file inventory questions, (c) paths confirmed outside any indexed repo, (d) reading exact lines you already have from a Memtrace result."
  }
}
EOF