# Execution Snapshot Schema
# Based on REF-058 R-LAM (Reproducible LLM Agent Workflows)
# Issue: #115

$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/execution-snapshot/v1"
title: "Execution Snapshot Schema"
description: |
  Configuration snapshot system that captures full execution context
  at runtime for later replay and reproducibility validation.

type: object
required:
  - snapshot_id
  - workflow_id
  - captured_at
  - config
  - inputs
  - outputs

properties:
  snapshot_id:
    type: string
    format: uuid
    description: "Unique snapshot identifier"

  workflow_id:
    type: string
    description: "Workflow this snapshot captures"

  version:
    type: string
    default: "1.0"
    description: "Schema version for evolution"

  captured_at:
    type: string
    format: date-time
    description: "When snapshot was captured"

  config:
    type: object
    required: [model, execution_mode]
    description: "Complete execution configuration"
    properties:
      model:
        type: object
        properties:
          id:
            type: string
            description: "Model identifier (e.g., claude-3-opus-20240229)"
          provider:
            type: string
            description: "Model provider"
          version:
            type: string
            description: "Model version if available"
        required: [id]

      execution_mode:
        type: string
        enum: [strict, seeded, logged, default]

      temperature:
        type: number
        minimum: 0
        maximum: 2

      seed:
        type: integer
        description: "Random seed if set"

      max_tokens:
        type: integer

      top_p:
        type: number

      stop_sequences:
        type: array
        items:
          type: string

      system_prompt_hash:
        type: string
        description: "Hash of system prompt for verification"

  agent:
    type: object
    description: "Agent configuration"
    properties:
      name:
        type: string
      version:
        type: string
      tools:
        type: array
        items:
          type: string
      rules_active:
        type: array
        items:
          type: string

  inputs:
    type: object
    description: "All inputs to the workflow"
    properties:
      user_prompt:
        type: string
        description: "Original user request"
      user_prompt_hash:
        type: string
        description: "SHA-256 of user prompt"
      context_files:
        type: array
        items:
          type: object
          properties:
            path:
              type: string
            hash:
              type: string
            size_bytes:
              type: integer
        description: "Context files loaded"
      environment:
        type: object
        additionalProperties:
          type: string
        description: "Relevant environment state"

  outputs:
    type: object
    description: "All outputs from the workflow"
    properties:
      response:
        type: string
        description: "Final response"
      response_hash:
        type: string
        description: "SHA-256 of response"
      artifacts_created:
        type: array
        items:
          type: object
          properties:
            path:
              type: string
            hash:
              type: string
            operation:
              type: string
              enum: [created, modified, deleted]
      tool_calls:
        type: array
        items:
          type: object
          properties:
            tool:
              type: string
            input_hash:
              type: string
            output_hash:
              type: string
            timestamp:
              type: string
              format: date-time

  metrics:
    type: object
    description: "Execution metrics"
    properties:
      duration_ms:
        type: integer
      tokens_input:
        type: integer
      tokens_output:
        type: integer
      tool_calls_count:
        type: integer
      iterations:
        type: integer

  replay_status:
    type: object
    description: "Status of replay attempts"
    properties:
      replayed:
        type: boolean
        default: false
      replay_count:
        type: integer
        default: 0
      last_replay:
        type: string
        format: date-time
      match_status:
        type: string
        enum:
          - exact_match       # Outputs identical
          - semantic_match    # Semantically equivalent
          - partial_match     # Some outputs differ
          - no_match          # Outputs differ significantly
          - not_replayed      # Never replayed

# Replay configuration
replay:
  matching_strategies:
    exact:
      description: "Byte-for-byte identical outputs"
      use_for: "Deterministic workflows"

    semantic:
      description: "Semantically equivalent outputs"
      use_for: "Natural language outputs"
      similarity_threshold: 0.95

    structural:
      description: "Same structure, different values"
      use_for: "Code generation, schemas"

  comparison_fields:
    required: [outputs.response_hash]
    optional: [outputs.artifacts_created, outputs.tool_calls]

# Storage
storage:
  location: ".aiwg/provenance/snapshots/"
  format: json
  naming: "{workflow_id}-{timestamp}.json"
  retention:
    default_days: 90
    on_replay_failure: 180

# Examples
examples:
  - snapshot_id: "snap-001-example"
    workflow_id: "flow-implement-112"
    version: "1.0"
    captured_at: "2026-01-25T15:00:00Z"
    config:
      model:
        id: "claude-3-opus-20240229"
        provider: "anthropic"
      execution_mode: strict
      temperature: 0
      seed: 42
      max_tokens: 4096
    agent:
      name: "Software Implementer"
      version: "1.0.0"
      tools: [Read, Write, Edit, Bash, Glob, Grep]
    inputs:
      user_prompt: "Implement checkpoint recovery system"
      user_prompt_hash: "abc123..."
      context_files:
        - path: ".aiwg/research/findings/REF-058-r-lam.md"
          hash: "def456..."
          size_bytes: 12345
    outputs:
      response: "Created checkpoint schema..."
      response_hash: "ghi789..."
      artifacts_created:
        - path: "agentic/code/addons/ralph/schemas/checkpoint.yaml"
          hash: "jkl012..."
          operation: created
    metrics:
      duration_ms: 5000
      tokens_input: 2000
      tokens_output: 3000
      tool_calls_count: 5
    replay_status:
      replayed: false
      replay_count: 0
      match_status: not_replayed

# References
references:
  research:
    - "@.aiwg/research/findings/REF-058-r-lam.md"
  implementation:
    - "#115"
  related:
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/execution-mode.yaml"
    - "@agentic/code/addons/ralph/schemas/checkpoint.yaml"
    - "@agentic/code/frameworks/sdlc-complete/schemas/provenance/prov-record.yaml"
