# FAIR Metadata Framework Schema
# Based on REF-056 FAIR Data Principles
# Issues: #233 (INDEX.md Generation), #235 (YAML Frontmatter)

$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/fair-metadata/v1"
title: "FAIR Metadata Framework Schema"
description: |
  Comprehensive metadata framework implementing FAIR principles for
  automated INDEX.md generation and YAML frontmatter standards per REF-056.

type: object
required:
  - version
  - frontmatter_schema
  - index_generation

properties:
  version:
    type: string
    pattern: "^\\d+\\.\\d+\\.\\d+$"
    default: "1.0.0"

  frontmatter_schema:
    $ref: "#/$defs/FrontmatterSchema"

  index_generation:
    $ref: "#/$defs/IndexGeneration"

$defs:
  FrontmatterSchema:
    type: object
    description: "YAML frontmatter schema for REF-XXX documents"
    properties:
      fair_principles:
        type: object
        properties:
          F1:
            type: string
            default: "Globally unique and persistent identifiers (REF-XXX)"
          F4:
            type: string
            default: "Metadata registered in searchable resource (INDEX.md)"
          I1:
            type: string
            default: "Formal, accessible language (YAML frontmatter)"
          R1:
            type: string
            default: "Rich metadata with usage guidance"

      required_fields:
        type: array
        items: { type: string }
        default:
          - ref
          - title
          - authors
          - year
          - category
          - summary

      optional_fields:
        type: array
        items: { type: string }
        default:
          - tags
          - quality
          - relevance
          - citations
          - applies_to
          - doi
          - url
          - source_type
          - evidence_level

      field_definitions:
        type: object
        properties:
          ref:
            type: object
            properties:
              type: { type: string, default: "string" }
              pattern: { type: string, default: "^REF-\\d{3}$" }
              description: { type: string, default: "Unique reference identifier" }
              example: { type: string, default: "REF-056" }

          title:
            type: object
            properties:
              type: { type: string, default: "string" }
              max_length: { type: integer, default: 200 }
              description: { type: string, default: "Full paper title" }
              example: { type: string, default: "FAIR Data Principles for Research Artifacts" }

          authors:
            type: object
            properties:
              type: { type: string, default: "array" }
              items: { type: string, default: "string" }
              description: { type: string, default: "Author list" }
              example: { type: array, default: ["Wilkinson et al."] }

          year:
            type: object
            properties:
              type: { type: string, default: "integer" }
              min: { type: integer, default: 1900 }
              max: { type: integer, default: 2030 }
              description: { type: string, default: "Publication year" }
              example: { type: integer, default: 2016 }

          tags:
            type: object
            properties:
              type: { type: string, default: "array" }
              items: { type: string, default: "string" }
              description: { type: string, default: "Searchable keywords" }
              example:
                type: array
                default: ["research-management", "metadata", "interoperability"]

          category:
            type: object
            properties:
              type: { type: string, default: "string" }
              enum:
                type: array
                default:
                  - agent-frameworks
                  - voice-profiles
                  - rag-retrieval
                  - quality-assurance
                  - sdlc-methodology
                  - research-management
                  - provenance
                  - reproducibility
              description: { type: string, default: "Primary category" }

          summary:
            type: object
            properties:
              type: { type: string, default: "string" }
              max_length: { type: integer, default: 500 }
              description: { type: string, default: "Brief summary of paper" }

          quality:
            type: object
            properties:
              type: { type: string, default: "string" }
              enum:
                type: array
                default: ["high", "moderate", "low", "very-low"]
              description: { type: string, default: "GRADE quality level" }

          relevance:
            type: object
            properties:
              type: { type: string, default: "string" }
              enum:
                type: array
                default: ["foundational", "high", "moderate", "supplementary"]
              description: { type: string, default: "Relevance to AIWG" }

          citations:
            type: object
            properties:
              type: { type: string, default: "array" }
              items: { type: string, default: "string" }
              pattern: { type: string, default: "^REF-\\d{3}$" }
              description: { type: string, default: "Papers this references" }

          applies_to:
            type: object
            properties:
              type: { type: string, default: "array" }
              items: { type: string, default: "string" }
              description: { type: string, default: "AIWG components this applies to" }

          source_type:
            type: object
            properties:
              type: { type: string, default: "string" }
              enum:
                type: array
                default:
                  - peer-reviewed-journal
                  - conference-proceedings
                  - preprint
                  - industry-blog
                  - thesis
                  - book
                  - technical-report
              description: { type: string, default: "Publication type for GRADE" }

          evidence_level:
            type: object
            properties:
              type: { type: string, default: "string" }
              enum:
                type: array
                default:
                  - empirical-study
                  - systematic-review
                  - meta-analysis
                  - case-study
                  - opinion-piece
                  - tutorial
              description: { type: string, default: "Evidence type for GRADE" }

      template:
        type: string
        default: |
          ---
          ref: REF-XXX
          title: "Paper Title"
          authors: [Author1, Author2]
          year: 2024
          tags: [tag1, tag2]
          category: category-name
          summary: Brief summary of the paper's key findings
          quality: moderate
          relevance: high
          source_type: peer-reviewed-journal
          evidence_level: empirical-study
          citations:
            - REF-001
            - REF-002
          applies_to:
            - component1
            - component2
          doi: "10.xxxx/xxxxx"
          url: "https://..."
          ---

      example:
        type: string
        default: |
          ---
          ref: REF-056
          title: FAIR Data Principles for Research Artifacts
          authors: [Wilkinson et al.]
          year: 2016
          tags: [research-management, metadata, interoperability]
          category: research-management
          summary: Principles for making research outputs Findable, Accessible, Interoperable, and Reusable
          quality: high
          relevance: foundational
          source_type: peer-reviewed-journal
          evidence_level: empirical-study
          citations:
            - REF-062
            - REF-060
          applies_to:
            - artifact-management
            - provenance-tracking
            - research-integration
          doi: "10.1038/sdata.2016.18"
          ---

  IndexGeneration:
    type: object
    description: "Automated INDEX.md generation from frontmatter"
    properties:
      enabled:
        type: boolean
        default: true

      fair_compliance:
        type: string
        default: "FAIR F4 - (Meta)data are registered or indexed in a searchable resource"

      generation_config:
        type: object
        properties:
          source_patterns:
            type: array
            items: { type: string }
            default:
              - "**/*.md"
              - "!**/INDEX.md"
              - "!**/README.md"

          output_file:
            type: string
            default: "INDEX.md"

          grouping:
            type: string
            enum: [category, year, relevance, alphabetical]
            default: "category"

          include_summary:
            type: boolean
            default: true
            description: "Include first 280 chars of summary"

          validate_links:
            type: boolean
            default: true
            description: "Verify all cross-references resolve"

          timestamp:
            type: boolean
            default: true
            description: "Include last updated timestamp"

      index_template:
        type: string
        default: |
          # {directory_name} Index

          > Auto-generated from YAML frontmatter. Do not edit manually.
          > Last updated: {timestamp}

          ## Summary

          - Total documents: {total_count}
          - Categories: {category_count}

          ## By Category

          {category_sections}

          ## Alphabetical

          {alphabetical_list}

          ---
          Generated by `aiwg index generate`

      category_section_template:
        type: string
        default: |
          ### {category_name}

          | Ref | Title | Year | Quality | Relevance |
          |-----|-------|------|---------|-----------|
          {rows}

      row_template:
        type: string
        default: "| [{ref}]({file_path}) | {title} | {year} | {quality} | {relevance} |"

      validation_rules:
        type: array
        items:
          type: object
          properties:
            rule: { type: string }
            severity: { type: string }
        default:
          - rule: "All required frontmatter fields present"
            severity: "error"
          - rule: "REF-XXX format valid"
            severity: "error"
          - rule: "Category in allowed list"
            severity: "warning"
          - rule: "Cross-references resolve"
            severity: "error"
          - rule: "Year within valid range"
            severity: "warning"

# Frontmatter validation result
frontmatter_validation:
  type: object
  properties:
    file:
      type: string
    valid:
      type: boolean
    errors:
      type: array
      items:
        type: object
        properties:
          field: { type: string }
          message: { type: string }
          severity: { type: string }
    warnings:
      type: array
      items:
        type: object
        properties:
          field: { type: string }
          message: { type: string }

# CLI commands
cli_commands:
  index_generate:
    command: "aiwg index generate [dir]"
    description: "Generate INDEX.md from frontmatter"
    options:
      - name: "--validate"
        description: "Validate cross-references"
      - name: "--dry-run"
        description: "Show what would be generated"

  frontmatter_validate:
    command: "aiwg research validate-frontmatter [dir]"
    description: "Validate all frontmatter in directory"
    options:
      - name: "--fix"
        description: "Auto-fix where possible"
      - name: "--strict"
        description: "Fail on warnings"

  frontmatter_add:
    command: "aiwg research add-frontmatter <file>"
    description: "Add frontmatter to existing document"
    options:
      - name: "--interactive"
        description: "Prompt for values"

  frontmatter_report:
    command: "aiwg research frontmatter-report"
    description: "Report on frontmatter coverage"

# Agent protocol
agent_protocol:
  generate_index:
    description: "Generate INDEX.md from frontmatter"
    steps:
      - scan_directory_for_markdown
      - for_each_file:
          - extract_yaml_frontmatter
          - validate_required_fields
          - parse_metadata
      - group_by_category
      - sort_within_groups
      - validate_cross_references
      - render_index_template
      - write_index_file
      - return_generation_report

  validate_frontmatter:
    description: "Validate frontmatter in document"
    steps:
      - read_file
      - extract_yaml_frontmatter
      - check_required_fields
      - validate_field_formats
      - check_enum_values
      - validate_cross_references
      - return_validation_result

  add_frontmatter:
    description: "Add frontmatter to existing document"
    steps:
      - read_existing_content
      - extract_metadata_hints_from_content
      - prompt_for_missing_values
      - validate_frontmatter
      - prepend_frontmatter_to_content
      - write_updated_file

# CI integration
ci_integration:
  pre_commit_hook:
    description: "Validate frontmatter on commit"
    script: |
      #!/bin/bash
      # Validate frontmatter for staged markdown files
      STAGED=$(git diff --cached --name-only --diff-filter=AM | grep -E "\.md$")

      for FILE in $STAGED; do
        if ! aiwg research validate-frontmatter "$FILE" --quiet; then
          echo "ERROR: Invalid frontmatter in $FILE"
          exit 1
        fi
      done

  index_freshness_check:
    description: "Ensure INDEX.md is up to date"
    script: |
      #!/bin/bash
      # Check if INDEX.md needs regeneration
      aiwg index generate --dry-run --check

      if [ $? -ne 0 ]; then
        echo "ERROR: INDEX.md is out of date"
        echo "Run: aiwg index generate"
        exit 1
      fi

# Storage
storage:
  indices: "**/INDEX.md"
  frontmatter_schema: "agentic/code/frameworks/sdlc-complete/schemas/flows/frontmatter-schema.yaml"

# Research targets (from REF-056 FAIR)
research_targets:
  findable_f1: "Globally unique REF-XXX identifiers"
  findable_f4: "Searchable INDEX.md from frontmatter"
  interoperable_i1: "Formal YAML frontmatter schema"
  reusable_r1: "Rich metadata with categories and tags"

# Example INDEX.md output
example_index_output: |
  # Research Corpus Index

  > Auto-generated from YAML frontmatter. Do not edit manually.
  > Last updated: 2026-01-25T14:30:00Z

  ## Summary

  - Total documents: 62
  - Categories: 8

  ## By Category

  ### agent-frameworks

  | Ref | Title | Year | Quality | Relevance |
  |-----|-------|------|---------|-----------|
  | [REF-013](corpus/REF-013-metagpt.md) | MetaGPT: Multi-Agent Framework | 2023 | high | foundational |
  | [REF-021](corpus/REF-021-reflexion.md) | Reflexion: Language Agents | 2023 | high | high |
  | [REF-022](corpus/REF-022-autogen.md) | AutoGen: Multi-Agent Conversation | 2023 | high | high |

  ### research-management

  | Ref | Title | Year | Quality | Relevance |
  |-----|-------|------|---------|-----------|
  | [REF-056](corpus/REF-056-fair.md) | FAIR Data Principles | 2016 | high | foundational |
  | [REF-060](corpus/REF-060-grade.md) | GRADE Evidence Quality | 2011 | high | foundational |
  | [REF-061](corpus/REF-061-oais.md) | OAIS Reference Model | 2012 | high | high |

  ### voice-profiles

  | Ref | Title | Year | Quality | Relevance |
  |-----|-------|------|---------|-----------|
  | [REF-043](corpus/REF-043-voice.md) | Voice Consistency in AI Writing | 2024 | moderate | high |

  ## Alphabetical

  - [REF-013](corpus/REF-013-metagpt.md) - MetaGPT: Multi-Agent Framework
  - [REF-021](corpus/REF-021-reflexion.md) - Reflexion: Language Agents
  - [REF-022](corpus/REF-022-autogen.md) - AutoGen: Multi-Agent Conversation
  - [REF-043](corpus/REF-043-voice.md) - Voice Consistency in AI Writing
  - [REF-056](corpus/REF-056-fair.md) - FAIR Data Principles
  - [REF-060](corpus/REF-060-grade.md) - GRADE Evidence Quality
  - [REF-061](corpus/REF-061-oais.md) - OAIS Reference Model

  ---
  Generated by `aiwg index generate`

# References
references:
  research:
    - "@.aiwg/research/findings/REF-056-fair-principles.md"
  implementation:
    - "#233"
    - "#235"
  related:
    - "@.aiwg/research/corpus/"
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/grade-evidence-quality.yaml"
    - "@agentic/code/frameworks/sdlc-complete/schemas/flows/citation-integrity.yaml"
