# Agent Selection Schema
# Based on REF-024 Language Agent Tree Search (LATS)
# Issue: #252

$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/agent-selection/v1"
title: "Agent Selection Schema"
description: |
  UCT-based agent selection formula balancing exploration of new agents
  with exploitation of proven performers per REF-024 LATS.

type: object
required:
  - version
  - uct_formula
  - agent_statistics
  - selection_algorithm

properties:
  version:
    type: string
    pattern: "^\\d+\\.\\d+\\.\\d+$"
    default: "1.0.0"

  uct_formula:
    $ref: "#/$defs/UCTFormula"

  agent_statistics:
    $ref: "#/$defs/AgentStatistics"

  selection_algorithm:
    $ref: "#/$defs/SelectionAlgorithm"

$defs:
  UCTFormula:
    type: object
    description: "Upper Confidence bound applied to Trees formula"
    properties:
      research_backing:
        type: object
        properties:
          source: { type: string, default: "REF-024" }
          finding: { type: string, default: "UCT-based selection improves task success by 25% vs greedy selection" }
          mechanism: { type: string, default: "Balances trying new approaches (exploration) with using what works (exploitation)" }

      formula:
        type: string
        default: "UCT(agent) = Q(agent) + c * sqrt(ln(N) / n(agent))"

      components:
        type: object
        properties:
          Q_agent:
            type: object
            properties:
              name: { type: string, default: "Exploitation term" }
              description: { type: string, default: "Average value of agent (success rate * quality)" }
              formula: { type: string, default: "totalValue / visitCount" }

          exploration_term:
            type: object
            properties:
              name: { type: string, default: "Exploration term" }
              description: { type: string, default: "Uncertainty bonus for undervisited agents" }
              formula: { type: string, default: "c * sqrt(ln(N) / n(agent))" }

          c:
            type: object
            properties:
              name: { type: string, default: "Exploration constant" }
              description: { type: string, default: "Controls exploration vs exploitation tradeoff" }
              default: { type: number, default: 1.414 }
              note: { type: string, default: "sqrt(2) is theoretically optimal for UCT" }

          N:
            type: object
            properties:
              name: { type: string, default: "Total visits" }
              description: { type: string, default: "Total visits across all agents" }

          n_agent:
            type: object
            properties:
              name: { type: string, default: "Agent visits" }
              description: { type: string, default: "Visit count for this specific agent" }

      behavior:
        type: object
        properties:
          high_visits_agent: { type: string, default: "Exploration term decreases, relies more on exploitation" }
          low_visits_agent: { type: string, default: "Exploration term high, encourages trying agent" }
          never_visited_agent: { type: string, default: "Exploration term is infinite, guarantees first visit" }

  AgentStatistics:
    type: object
    description: "Statistics tracked for each agent"
    properties:
      schema:
        type: object
        properties:
          agent_id: { type: string }
          visit_count:
            type: integer
            description: "n(agent) - number of times agent was selected"
          last_visited: { type: string, format: "date-time" }
          total_value:
            type: number
            description: "Sum of all values (success * quality)"
          average_value:
            type: number
            description: "Q(agent) = totalValue / visitCount"
          outcomes:
            type: object
            properties:
              success: { type: integer }
              failure: { type: integer }
              timeout: { type: integer }
          average_quality:
            type: number
            description: "Average final quality score"
          average_iterations:
            type: number
            description: "Average iterations to completion"
          task_types:
            type: object
            description: "Map of task_type -> success count"
            additionalProperties: { type: integer }
          phase_preference:
            type: object
            description: "Map of phase -> success count"
            additionalProperties: { type: integer }

      value_calculation:
        type: string
        default: |
          if outcome == "success":
            value = quality  # 0-1 based on final quality score
          else:
            value = 0

      update_rules:
        type: array
        items: { type: string }
        default:
          - "On selection: increment visit_count, update last_visited"
          - "On completion: update outcomes, total_value, average_value"
          - "On success: update task_types and phase_preference counts"
          - "Always: recompute average_quality, average_iterations"

  SelectionAlgorithm:
    type: object
    description: "Agent selection algorithm"
    properties:
      process:
        type: array
        items:
          type: object
          properties:
            step: { type: integer }
            description: { type: string }
        default:
          - step: 1
            description: "Initialize statistics for any unseen agents"
          - step: 2
            description: "Compute UCT score for each candidate agent"
          - step: 3
            description: "Apply context-based bonus (optional)"
          - step: 4
            description: "Select agent with highest UCT score"
          - step: 5
            description: "Log selection reasoning"

      context_bonus:
        type: object
        description: "Optional context-based score adjustment"
        properties:
          enabled: { type: boolean, default: true }
          task_type_bonus:
            type: object
            properties:
              description: { type: string, default: "+0.1 per previous success with same task type" }
              weight: { type: number, default: 0.1 }
          phase_bonus:
            type: object
            properties:
              description: { type: string, default: "+0.1 per previous success in same phase" }
              weight: { type: number, default: 0.1 }

      configuration:
        type: object
        properties:
          exploration_constant:
            type: number
            default: 1.414
            description: "c in UCT formula"
          min_visits_for_trust:
            type: integer
            default: 5
            description: "Minimum visits before trusting average value"
          recency_weight:
            type: number
            default: 0.95
            description: "Decay factor for old statistics"
          recency_window_days:
            type: integer
            default: 30
            description: "Window for recency weighting"

# Selection state schema
selection_state:
  type: object
  properties:
    agents:
      type: object
      description: "Map of agent_id -> AgentStats"
      additionalProperties:
        $ref: "#/$defs/AgentStatistics/properties/schema"
    total_visits:
      type: integer
      description: "N - total visits across all agents"
    exploration_constant:
      type: number
      default: 1.414
    last_updated:
      type: string
      format: "date-time"

# CLI commands
cli_commands:
  ralph_agents:
    command: "aiwg ralph-agents"
    description: "View agent statistics"
    options:
      - name: "--task-type"
        description: "Filter by task type"
      - name: "--reset"
        description: "Reset statistics"
      - name: "--export"
        description: "Export statistics to file"

  ralph_select:
    command: "aiwg ralph-select"
    description: "Simulate agent selection"
    options:
      - name: "--candidates"
        description: "Comma-separated agent list"
      - name: "--task-type"
        description: "Task type for context bonus"
      - name: "--phase"
        description: "Phase for context bonus"

# Agent protocol
agent_protocol:
  select_agent:
    description: "Select best agent using UCT"
    steps:
      - load_selection_state
      - initialize_unseen_agents
      - for_each_candidate:
          - compute_exploitation_term
          - compute_exploration_term
          - compute_context_bonus
          - calculate_total_uct_score
      - sort_by_uct_score
      - log_selection_reasoning
      - return_selected_agent

  update_statistics:
    description: "Update agent statistics after task"
    steps:
      - load_selection_state
      - increment_visit_count
      - update_outcomes
      - compute_value
      - update_total_value
      - recompute_average_value
      - update_quality_metrics
      - if_success:
          - update_task_type_counts
          - update_phase_counts
      - persist_selection_state

  generate_report:
    description: "Generate agent performance report"
    steps:
      - load_selection_state
      - sort_agents_by_average_value
      - for_each_agent:
          - compute_success_rate
          - identify_best_task_types
          - calculate_efficiency_metrics
      - format_report
      - return_report

# Storage
storage:
  agent_stats: ".aiwg/ralph/agent-stats.json"
  selection_logs: ".aiwg/logs/agent-selection/"

# Success metrics
success_metrics:
  success_rate_improvement: "25% vs random selection"
  agent_utilization: "All agents tried within first 20 tasks"
  convergence: "Best agent identified within 50 tasks"
  adaptability: "Adjusts to changing agent performance"

# Example agent statistics output
example_agent_stats: |
  $ aiwg ralph-agents

  Agent Statistics (UCT-based selection)
  ═══════════════════════════════════════

  test-engineer
    Visits: 45
    Avg Value: 0.78 (Q)
    Success Rate: 82%
    Avg Quality: 0.85
    Avg Iterations: 4.2
    Best at: unit-testing (15), integration-testing (12)

  security-auditor
    Visits: 23
    Avg Value: 0.71 (Q)
    Success Rate: 78%
    Avg Quality: 0.80
    Avg Iterations: 5.1
    Best at: security-review (8), vulnerability-fix (6)

  api-designer
    Visits: 18
    Avg Value: 0.82 (Q)
    Success Rate: 89%
    Avg Quality: 0.88
    Avg Iterations: 3.5
    Best at: api-design (10), schema-update (5)

  integrator
    Visits: 12
    Avg Value: 0.65 (Q)
    Success Rate: 67%
    Avg Quality: 0.75
    Avg Iterations: 6.2
    Best at: deployment (4), configuration (3)

  ═══════════════════════════════════════
  Total Visits: 98

# Example selection reasoning
example_selection: |
  $ aiwg ralph-select --candidates "test-engineer,security-auditor,api-designer" \
      --task-type implement-feature --phase construction

  Agent Selection (UCT)
  ─────────────────────
  Task: implement-feature (construction phase)
  Total Visits (N): 98

  Candidate Scores:
    1. api-designer:     1.32 (Q=0.82 + explore=0.35 + context=0.15)
    2. test-engineer:    1.18 (Q=0.78 + explore=0.30 + context=0.10)
    3. security-auditor: 1.15 (Q=0.71 + explore=0.39 + context=0.05)

  Selected: api-designer
  Reason: Highest UCT score with strong exploitation (Q=0.82)
          and context bonus from phase preference

# References
references:
  research:
    - "@.aiwg/research/findings/REF-024-lats.md"
  implementation:
    - "#252"
  related:
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/episodic-memory.yaml"
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/iteration-analytics.yaml"
    - "@tools/ralph-external/uct-selector.ts"