# Hallucination Detection Schema
# Based on REF-059 LitLLM Citation Processing
# Issue: #247

$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/hallucination-detection/v1"
title: "Hallucination Detection Schema"
description: |
  Automated detection of fabricated or hallucinated citations implementing
  pattern recognition per REF-059 LitLLM.

type: object
required:
  - version
  - detection_methods
  - detection_pipeline
  - severity_levels

properties:
  version:
    type: string
    pattern: "^\\d+\\.\\d+\\.\\d+$"
    default: "1.0.0"

  detection_methods:
    $ref: "#/$defs/DetectionMethods"

  detection_pipeline:
    $ref: "#/$defs/DetectionPipeline"

  severity_levels:
    $ref: "#/$defs/SeverityLevels"

$defs:
  DetectionMethods:
    type: object
    description: "Methods for detecting citation hallucinations"
    properties:
      corpus_mismatch:
        type: object
        properties:
          description: { type: string, default: "Citation REF-XXX not found in research corpus" }
          severity: { type: string, default: "critical" }
          detection:
            type: string
            default: "Check if REF-XXX exists in .aiwg/research/corpus/"
          false_positive_rate: { type: number, default: 0.01 }

      metadata_inconsistency:
        type: object
        properties:
          description: { type: string, default: "Authors, year, or title mismatch with corpus entry" }
          severity: { type: string, default: "critical" }
          checks:
            type: array
            items: { type: string }
            default:
              - "Title match (normalized comparison)"
              - "Authors match (last name comparison)"
              - "Year match (exact)"
              - "Venue match (fuzzy)"
          similarity_threshold: { type: number, default: 0.85 }

      context_mismatch:
        type: object
        properties:
          description: { type: string, default: "Claim keywords have low semantic overlap with paper content" }
          severity: { type: string, default: "warning" }
          detection:
            type: string
            default: "Compute semantic similarity between claim and paper abstract/summary"
          similarity_threshold: { type: number, default: 0.5 }

      training_data_leakage:
        type: object
        properties:
          description: { type: string, default: "Pre-cutoff citations without corpus entry" }
          severity: { type: string, default: "warning" }
          detection:
            type: string
            default: "Citation year < 2022 AND not in corpus (likely from LLM training data)"
          indicators:
            type: array
            items: { type: string }
            default:
              - "Publication date before LLM training cutoff"
              - "Citation not in corpus but plausible-looking"
              - "Authors are well-known but paper unfindable"
              - "DOI doesn't resolve"

      format_anomaly:
        type: object
        properties:
          description: { type: string, default: "Non-standard REF format or duplicate REF with different metadata" }
          severity: { type: string, default: "info" }
          checks:
            type: array
            items: { type: string }
            default:
              - "REF-XXX format compliance"
              - "No duplicate REF numbers with different metadata"
              - "Consistent citation style"

  DetectionPipeline:
    type: object
    description: "Multi-stage detection pipeline"
    properties:
      enabled:
        type: boolean
        default: true

      pipeline_stages:
        type: array
        items:
          type: object
          properties:
            stage: { type: integer }
            name: { type: string }
            description: { type: string }
            method: { type: string }
        default:
          - stage: 1
            name: "corpus_check"
            description: "Check corpus membership (whitelist)"
            method: "corpus_mismatch"
          - stage: 2
            name: "metadata_verify"
            description: "Verify metadata consistency"
            method: "metadata_inconsistency"
          - stage: 3
            name: "context_analyze"
            description: "Analyze context-claim alignment"
            method: "context_mismatch"
          - stage: 4
            name: "leakage_check"
            description: "Check for training data leakage patterns"
            method: "training_data_leakage"
          - stage: 5
            name: "format_validate"
            description: "Validate format compliance"
            method: "format_anomaly"

      aggregation:
        type: object
        properties:
          strategy: { type: string, default: "severity_based" }
          critical_count_threshold: { type: integer, default: 1 }
          warning_count_threshold: { type: integer, default: 3 }
          final_status_rules:
            type: object
            properties:
              fail: { type: string, default: "Any critical detection OR warning_count >= threshold" }
              warn: { type: string, default: "warning_count > 0 AND warning_count < threshold" }
              pass: { type: string, default: "No detections" }

  SeverityLevels:
    type: object
    description: "Detection severity classification"
    properties:
      critical:
        type: object
        properties:
          symbol: { type: string, default: "✗" }
          action: { type: string, default: "block_merge" }
          methods:
            type: array
            items: { type: string }
            default:
              - "corpus_mismatch"
              - "metadata_inconsistency"

      warning:
        type: object
        properties:
          symbol: { type: string, default: "⚠" }
          action: { type: string, default: "allow_with_warning" }
          methods:
            type: array
            items: { type: string }
            default:
              - "context_mismatch"
              - "training_data_leakage"

      info:
        type: object
        properties:
          symbol: { type: string, default: "ℹ" }
          action: { type: string, default: "log_only" }
          methods:
            type: array
            items: { type: string }
            default:
              - "format_anomaly"

# Detection result schema
detection_result:
  type: object
  properties:
    file:
      type: string
    citations_analyzed:
      type: integer
    detections:
      type: array
      items:
        type: object
        properties:
          citation:
            type: string
            description: "REF-XXX identifier"
          method:
            type: string
            description: "Detection method that triggered"
          severity:
            type: string
            enum: [critical, warning, info]
          message:
            type: string
          evidence:
            type: object
            description: "Supporting evidence for detection"
          suggestion:
            type: string
            description: "How to fix the issue"
    summary:
      type: object
      properties:
        critical: { type: integer }
        warning: { type: integer }
        info: { type: integer }
        status: { type: string, enum: [pass, fail, warn] }

# CLI commands
cli_commands:
  detect_hallucinations:
    command: "aiwg detect-hallucinations [path]"
    description: "Detect citation hallucinations in documents"
    options:
      - name: "--all"
        short: "-a"
        description: "Check all markdown files"
      - name: "--fail-on"
        description: "Fail on severity level (critical, warning, info)"
        default: "critical"
      - name: "--format"
        short: "-f"
        description: "Output format (text, json, github)"
        default: "text"
      - name: "--fix"
        description: "Suggest fixes for detected issues"

# Agent protocol
agent_protocol:
  detect_hallucinations:
    description: "Run hallucination detection on document"
    steps:
      - read_document
      - extract_all_citations
      - for_each_citation:
          - stage_1_corpus_check
          - if_in_corpus:
              - stage_2_metadata_verify
              - stage_3_context_analyze
          - if_not_in_corpus:
              - stage_4_leakage_check
          - stage_5_format_validate
          - record_detections
      - aggregate_results
      - calculate_severity_summary
      - determine_final_status
      - return_detection_result

  generate_fix_suggestions:
    description: "Generate suggestions for detected hallucinations"
    steps:
      - for_each_detection:
          - if_corpus_mismatch:
              - suggest_search_for_similar_paper
              - suggest_add_to_corpus_if_real
              - suggest_remove_if_fabricated
          - if_metadata_inconsistency:
              - suggest_correct_metadata
              - show_expected_vs_actual
          - if_context_mismatch:
              - suggest_revise_claim
              - suggest_find_supporting_paper
          - if_training_leakage:
              - suggest_verify_source
              - suggest_add_to_corpus_if_verified
      - return_suggestions

# Integration with writing-validator
writing_validator_integration:
  hook_point: "validation_pipeline"
  order: 3
  enabled: true
  fail_on: "critical"
  report_format: "inline"

# CI/CD integration
ci_integration:
  github_actions:
    workflow_snippet: |
      - name: Detect Citation Hallucinations
        run: aiwg detect-hallucinations --all --fail-on critical

# Storage
storage:
  detection_logs: ".aiwg/logs/hallucination-detection/"
  false_positive_log: ".aiwg/logs/hallucination-false-positives.jsonl"

# Research targets (from REF-059)
research_targets:
  corpus_validation: "Detect citations not in approved corpus"
  metadata_verification: "Verify citation metadata accuracy"
  context_analysis: "Detect claim-paper misalignment"
  leakage_prevention: "Identify training data hallucinations"
  format_compliance: "Enforce citation format standards"

# Success metrics
success_metrics:
  false_positive_rate: "< 5%"
  detection_rate: "> 95% on synthetic hallucinations"
  production_target: "Zero fabricated citations in production docs"
  manual_override_rate: "< 1%"

# Example detection report
example_detection_report: |
  ## Hallucination Detection Report

  **File**: docs/research/synthesis.md
  **Citations Analyzed**: 15

  ### Detections

  ✗ **CRITICAL**: REF-999 (Corpus Mismatch)
    Citation "REF-999: Smith et al., 2024" not found in research corpus.
    Suggestion: Verify this is a real paper and add to corpus, or remove citation.

  ✗ **CRITICAL**: REF-043 (Metadata Inconsistency)
    Year mismatch: Document says 2024, corpus entry says 2023.
    Suggestion: Update citation to match corpus: Year → 2023

  ⚠ **WARNING**: REF-012 (Context Mismatch)
    Claim "LLMs achieve 95% accuracy" has low overlap with paper content (similarity: 0.32).
    Paper discusses methodology, not accuracy metrics.
    Suggestion: Verify claim is supported by cited paper.

  ⚠ **WARNING**: REF-008 (Training Data Leakage)
    Citation from 2019 not in corpus. May be from LLM training data.
    Suggestion: Verify source exists and add to corpus if real.

  ---

  **Summary**:
  - Critical: 2
  - Warning: 2
  - Info: 0

  **Status**: FAIL (2 critical issues require resolution)

# References
references:
  research:
    - "@.aiwg/research/findings/REF-059-litllm-citation-processing.md"
  implementation:
    - "#247"
  related:
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/citation-verification.yaml"
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/citation-integrity.yaml"
    - "@agentic/code/agents/writing-validator.md"
  dependencies:
    - "#231 (Retrieval-first policy)"
    - "#232 (Citation whitelist)"
    - "#236 (Citation verification pipeline)"
