/** @author juntak */
import { createHash } from "node:crypto";

import { IAnalysisSectionEntry } from "../orchestrate/common/structures/IAnalysisSectionEntry";
import { EmbeddingProvider } from "./EmbeddingProvider";

export interface RequirementSection {
  filename: `${string}.md`;
  heading: string;
  content: string;
  index: number;
  level: 2 | 3;
}

export interface RetrievalHit {
  section: RequirementSection;
  score: number;
  reason: string;
}

export interface VectorIndexItem {
  id: string;
  section: RequirementSection;
  vector: number[];

  tf: Map<string, number>;
  docLen: number;
}

export interface Bm25Stats {
  N: number;
  avgdl: number;
  df: Map<string, number>;
}

function cosineSimilarity(a: number[], b: number[]): number {
  let dot = 0;
  let normA = 0;
  let normB = 0;
  const n = Math.min(a.length, b.length);
  for (let i = 0; i < n; i++) {
    const ai = a[i]!;
    const bi = b[i]!;
    dot += ai * bi;
    normA += ai * ai;
    normB += bi * bi;
  }
  if (normA === 0 || normB === 0) return 0;
  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}

// BM25
function tokenize(text: string): string[] {
  return text
    .toLowerCase()
    .replace(/[`"'.,:;!?()[\]{}<>]/g, " ")
    .split(/\s+/)
    .filter((t) => t.length >= 2);
}

function buildTf(tokens: string[]): Map<string, number> {
  const tf = new Map<string, number>();
  for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
  return tf;
}

function buildDf(indexDocs: { tokens: string[] }[]): Map<string, number> {
  const df = new Map<string, number>();
  for (const d of indexDocs) {
    const uniq = new Set(d.tokens);
    for (const term of uniq) df.set(term, (df.get(term) ?? 0) + 1);
  }
  return df;
}

function bm25Score(
  queryTokens: string[],
  docTf: Map<string, number>,
  docLen: number,
  stats: Bm25Stats,
  k1: number = 1.5,
  b: number = 0.75,
): number {
  let score = 0;
  const uq = new Set(queryTokens);
  for (const term of uq) {
    const df = stats.df.get(term) ?? 0;
    if (df === 0) continue;
    const idf = Math.log(1 + (stats.N - df + 0.5) / (df + 0.5));
    const tf = docTf.get(term) ?? 0;
    if (tf === 0) continue;
    const denom = tf + k1 * (1 - b + b * (docLen / stats.avgdl));
    score += idf * ((tf * (k1 + 1)) / denom);
  }
  return score;
}

function minMaxNormalize(values: number[]): number[] {
  if (values.length === 0) return [];
  const min = Math.min(...values);
  const max = Math.max(...values);
  if (max === min) return values.map(() => (max === 0 ? 0 : 1));
  return values.map((v) => (v - min) / (max - min));
}

export async function buildVectorIndexHybrid(
  embedder: EmbeddingProvider,
  sections: RequirementSection[],
): Promise<{ index: VectorIndexItem[]; bm25: Bm25Stats }> {
  const docs = sections.map((s) => {
    const text = `${s.heading}\n${s.content}`;
    const tokens = tokenize(text);
    const tf = buildTf(tokens);
    return {
      id: `${s.filename}:${s.index}`,
      text,
      section: s,
      tokens,
      tf,
      docLen: tokens.length,
    };
  });

  const vectors = await embedder.embed(docs.map((d) => d.text));

  const N = docs.length;
  const totalLen = docs.reduce((acc, d) => acc + d.docLen, 0);
  const avgdl = N > 0 ? totalLen / N : 0;
  const df = buildDf(docs);
  const bm25: Bm25Stats = { N, avgdl, df };

  const index: VectorIndexItem[] = docs.map((d, i) => ({
    id: d.id,
    section: d.section,
    vector: vectors[i]!,
    tf: d.tf,
    docLen: d.docLen,
  }));
  return { index, bm25 };
}

function clamp(x: number, lo: number, hi: number): number {
  return Math.max(lo, Math.min(hi, x));
}

function percentile(sortedAsc: number[], p: number): number {
  if (sortedAsc.length === 0) return 0;
  const pos = (p / 100) * (sortedAsc.length - 1);
  const base = Math.floor(pos);
  const rest = pos - base;
  const a = sortedAsc[base]!;
  const b = sortedAsc[Math.min(base + 1, sortedAsc.length - 1)]!;
  return a + (b - a) * rest;
}

// Hybrid Retrieval
export async function retrieveTopKAdaptiveHybrid(
  embedder: EmbeddingProvider,
  queryText: string,
  index: VectorIndexItem[],
  bm25: Bm25Stats,
  kMin: number = 3,
  kMax?: number,
  wVec: number = 0.6,
  wBm25: number = 0.4,
  debug: boolean = false,
): Promise<RetrievalHit[]> {
  const N = index.length;
  const effectiveKMax = kMax ?? Math.min(12, Math.max(8, Math.ceil(0.05 * N)));

  const qVecs = await embedder.embed([queryText]);
  const qVec = qVecs[0];
  if (!qVec) return [];

  const qTokens = tokenize(queryText);

  const vecScores = index.map((item) => cosineSimilarity(qVec, item.vector));
  const bmScores = index.map((item) =>
    bm25Score(qTokens, item.tf, item.docLen, bm25),
  );

  const vecNorm = minMaxNormalize(vecScores);
  const bmNorm = minMaxNormalize(bmScores);

  const hits: RetrievalHit[] = index.map((item, i) => {
    const score = wVec * vecNorm[i]! + wBm25 * bmNorm[i]!;
    return {
      section: item.section,
      score,
      reason: `hybrid=${score.toFixed(4)} (vec=${vecNorm[i]!.toFixed(3)}, bm25=${bmNorm[
        i
      ]!.toFixed(3)})`,
    };
  });

  // Compute Dynamic K with debug info
  const scores = hits.map((h) => h.score);
  const sorted = [...scores].sort((a, b) => a - b);
  const p90 = percentile(sorted, 90);
  const p50 = percentile(sorted, 50);
  const gap = p90 - p50;
  const GAP_MIN = 0.02;
  const GAP_MAX = 0.5;
  const sharpness = clamp((gap - GAP_MIN) / (GAP_MAX - GAP_MIN), 0, 1);
  const K = Math.round(kMin + (1 - sharpness) * (effectiveKMax - kMin));

  if (debug) {
    console.log(`[DYNAMIC-K-DEBUG]`);
    console.log(`  kMin=${kMin}, kMax=${effectiveKMax}, computedK=${K}`);
    console.log(
      `  p90=${p90.toFixed(4)}, p50=${p50.toFixed(4)}, gap=${gap.toFixed(4)}`,
    );
    console.log(`  sharpness=${sharpness.toFixed(4)} (0=flat, 1=sharp)`);
    console.log(`  totalHits=${hits.length}`);
  }

  return hits.sort((a, b) => b.score - a.score).slice(0, K);
}

// ============================================================================
// Section-level index caching
// ============================================================================

interface CachedSectionIndex {
  hash: string;
  index: VectorIndexItem[];
  bm25: Bm25Stats;
}

type SectionBuildResult = { index: VectorIndexItem[]; bm25: Bm25Stats };

const _sectionIndexCache = new Map<string, CachedSectionIndex>();
const _sectionBuildingPromises = new Map<string, Promise<SectionBuildResult>>();

function computeSectionsHash(sections: RequirementSection[]): string {
  const payload = sections
    .map((s) => `${s.filename}:${s.index}:${s.heading}\n${s.content}`)
    .join("\n---\n");
  return createHash("sha256").update(payload).digest("hex");
}

async function getOrBuildSectionIndex(
  embedder: EmbeddingProvider,
  sections: RequirementSection[],
): Promise<SectionBuildResult> {
  if (sections.length === 0) {
    return {
      index: [],
      bm25: { N: 0, avgdl: 0, df: new Map<string, number>() },
    };
  }

  const hash = computeSectionsHash(sections);

  const cached = _sectionIndexCache.get(hash);
  if (cached) {
    return { index: cached.index, bm25: cached.bm25 };
  }

  const existingPromise = _sectionBuildingPromises.get(hash);
  if (existingPromise) {
    return existingPromise;
  }

  const buildPromise = (async (): Promise<SectionBuildResult> => {
    const { index, bm25 } = await buildVectorIndexHybrid(embedder, sections);
    _sectionIndexCache.set(hash, { hash, index, bm25 });
    return { index, bm25 };
  })();

  _sectionBuildingPromises.set(hash, buildPromise);

  try {
    return await buildPromise;
  } finally {
    _sectionBuildingPromises.delete(hash);
  }
}

export function clearIndexCache(): void {
  _sectionIndexCache.clear();
  _sectionBuildingPromises.clear();
}

// ============================================================================
// Analysis Context Mode - Unified RAG control
// ============================================================================

/**
 * Analysis context mode for RAG control.
 *
 * - "TOPK": Use RAG retrieval (section-level hybrid search)
 * - "FULL": Use all analysis sections without filtering (no RAG call)
 * - "NONE": Use no analysis sections (no RAG call)
 */
export type AnalysisContextMode = "TOPK" | "FULL" | "NONE";

export interface BuildAnalysisContextOptions {
  kMin?: number;
  kMax?: number;
  splitCount?: number;
  log?: boolean;
  logPrefix?: string;
}

/**
 * Build analysis context at section granularity.
 *
 * Operates on `IAnalysisSectionEntry[]` from `convertToSectionEntries`. Each
 * section entry is treated as an independent retrieval unit (~200-600 words),
 * yielding fine-grained context for downstream agents.
 *
 * Uses SHA256-based index caching to avoid rebuilding the vector index when
 * called repeatedly with the same section pool (e.g. across batch items in
 * `executeCachedBatch`).
 *
 * @param embedder - Embedding provider for vector search (only used in TOPK)
 * @param sections - Source section entries (from convertToSectionEntries)
 * @param query - Query text for retrieval (only used in TOPK mode)
 * @param mode - Analysis context mode (NONE/FULL/TOPK)
 * @param options - Optional parameters for retrieval tuning
 * @returns Filtered or full section entries based on mode
 */
export async function buildAnalysisContextSections(
  embedder: EmbeddingProvider,
  sections: IAnalysisSectionEntry[],
  query: string,
  mode: AnalysisContextMode,
  options?: BuildAnalysisContextOptions,
): Promise<IAnalysisSectionEntry[]> {
  const log = options?.log ?? false;
  const prefix = options?.logPrefix ? `[${options.logPrefix}]` : "";

  const inputTotalChars = sections.reduce(
    (sum, s) => sum + (s.content?.length ?? 0),
    0,
  );

  if (mode === "NONE" || sections.length === 0) {
    if (log) {
      console.log(
        `[RAG-SECTIONS]${prefix} mode=${mode} sections=0 chars=0 (skipped)`,
      );
    }
    return [];
  }

  if (mode === "FULL") {
    if (log) {
      console.log(
        `[RAG-SECTIONS]${prefix} mode=FULL sections=${sections.length} chars=${inputTotalChars} (pass-through)`,
      );
    }
    return sections;
  }

  // TOPK mode: convert section entries to RequirementSection for retrieval
  const reqSections: RequirementSection[] = sections.map((s) => ({
    filename: s.filename as `${string}.md`,
    heading: `### ${s.sectionTitle}`,
    content: `[${s.unitTitle}] ${s.content}`,
    index: s.id,
    level: 3 as const,
  }));

  const { index, bm25 } = await getOrBuildSectionIndex(embedder, reqSections);

  if (index.length === 0) {
    if (log) {
      console.log(
        `[RAG-SECTIONS]${prefix} mode=TOPK sections=0 chars=0 (empty index)`,
      );
    }
    return [];
  }

  const split = options?.splitCount ?? 1;
  const kMin = options?.kMin ? Math.ceil(options.kMin / split) : undefined;
  const kMax = options?.kMax ? Math.ceil(options.kMax / split) : undefined;

  const hits = await retrieveTopKAdaptiveHybrid(
    embedder,
    query,
    index,
    bm25,
    kMin,
    kMax,
  );

  // Map hits back to section entries by index (set to s.id above)
  const sectionMap = new Map(sections.map((s) => [s.id, s]));
  const result = hits
    .map((h) => sectionMap.get(h.section.index))
    .filter((s): s is IAnalysisSectionEntry => s !== undefined);

  if (log) {
    const resultTotalChars = result.reduce(
      (sum, s) => sum + (s.content?.length ?? 0),
      0,
    );
    const reduction =
      inputTotalChars > 0
        ? ((1 - resultTotalChars / inputTotalChars) * 100).toFixed(1)
        : "0";
    console.log(
      `[RAG-SECTIONS]${prefix} mode=TOPK sections=${result.length} chars=${resultTotalChars} reduction=${reduction}%`,
    );
  }

  return result;
}
