# Research Document Frontmatter Schema
# Based on REF-056 FAIR Principles (I1 - machine-readable metadata)
# Issue: #105

$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/research-frontmatter/v1"
title: "Research Document Frontmatter Schema"
description: |
  Structured YAML frontmatter for research documents to enable machine-readable
  metadata extraction and FAIR compliance. All research documents (REF-XXX)
  MUST include this frontmatter for interoperability.

type: object
required:
  - ref_id
  - title
  - authors
  - year
  - source_type

properties:
  ref_id:
    type: string
    pattern: "^REF-[0-9]{3}(-[a-z]+)?$"
    description: "Unique reference identifier (e.g., REF-001, REF-056-fair)"

  title:
    type: string
    minLength: 10
    maxLength: 300
    description: "Full paper/document title"

  short_title:
    type: string
    maxLength: 50
    description: "Abbreviated title for references"

  authors:
    type: array
    minItems: 1
    items:
      type: object
      properties:
        name:
          type: string
        affiliation:
          type: string
        orcid:
          type: string
          pattern: "^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$"
    description: "Author list with optional ORCID"

  year:
    type: integer
    minimum: 1900
    maximum: 2100
    description: "Publication year"

  month:
    type: integer
    minimum: 1
    maximum: 12
    description: "Publication month (optional)"

  source_type:
    type: string
    enum:
      - peer_reviewed_journal
      - peer_reviewed_conference
      - preprint
      - technical_report
      - book_chapter
      - industry_whitepaper
      - standard
      - thesis
    description: "Type of source for GRADE assessment"

  venue:
    type: object
    properties:
      name:
        type: string
        description: "Journal/conference name"
      abbreviation:
        type: string
        description: "Venue abbreviation (e.g., NeurIPS, ICML)"
      volume:
        type: string
      issue:
        type: string
      pages:
        type: string
    description: "Publication venue details"

  identifiers:
    type: object
    properties:
      doi:
        type: string
        pattern: "^10\\.[0-9]{4,}/.+$"
        description: "Digital Object Identifier"
      arxiv:
        type: string
        pattern: "^[0-9]{4}\\.[0-9]{4,5}(v[0-9]+)?$"
        description: "arXiv identifier"
      isbn:
        type: string
        description: "ISBN for books"
      url:
        type: string
        format: uri
        description: "Canonical URL"
    description: "Document identifiers for retrieval"

  keywords:
    type: array
    items:
      type: string
    description: "Keywords/topics"

  categories:
    type: array
    items:
      type: string
      enum:
        - multi_agent_systems
        - code_generation
        - reasoning
        - planning
        - tool_use
        - memory
        - retrieval
        - evaluation
        - human_ai_collaboration
        - production_systems
        - quality_assurance
        - information_science
        - standards
    description: "AIWG-specific categories"

  abstract:
    type: string
    minLength: 50
    description: "Paper abstract or summary"

  key_findings:
    type: array
    items:
      type: object
      properties:
        finding:
          type: string
        metric:
          type: string
        impact:
          type: string
          enum: [high, medium, low]
    description: "Primary findings with quantified metrics"

  aiwg_relevance:
    type: object
    properties:
      applicability:
        type: string
        enum: [direct, partial, reference, background]
      components_affected:
        type: array
        items:
          type: string
          enum:
            - agents
            - flows
            - schemas
            - rules
            - templates
            - commands
            - skills
      implementation_priority:
        type: string
        enum: [top-10, round-2, round-3, future]
      related_issues:
        type: array
        items:
          type: string
          pattern: "^#[0-9]+$"
    description: "AIWG-specific relevance assessment"

  quality_assessment:
    type: object
    properties:
      grade_baseline:
        type: string
        enum: [high, moderate, low, very_low]
      downgrade_factors:
        type: array
        items:
          type: string
      upgrade_factors:
        type: array
        items:
          type: string
      final_grade:
        type: string
        enum: [high, moderate, low, very_low]
    description: "GRADE quality assessment"

  pdf_hash:
    type: string
    pattern: "^[a-f0-9]{64}$"
    description: "SHA-256 hash of source PDF for fixity"

  analysis_date:
    type: string
    format: date
    description: "When AIWG analysis was performed"

  last_verified:
    type: string
    format: date
    description: "Last verification of DOI/source"

# Validation rules
validation:
  require_doi_for_published:
    condition: "source_type in ['peer_reviewed_journal', 'peer_reviewed_conference']"
    require: "identifiers.doi"
    message: "Published papers must have DOI"

  require_arxiv_for_preprints:
    condition: "source_type == 'preprint'"
    require: "identifiers.arxiv OR identifiers.url"
    message: "Preprints must have arXiv ID or URL"

# Example frontmatter
examples:
  - ref_id: "REF-001"
    title: "Production-Grade Agentic AI Workflows"
    short_title: "Production Agentic"
    authors:
      - name: "Anonymous Authors"
        affiliation: "Industry Research"
    year: 2024
    source_type: preprint
    identifiers:
      arxiv: "2512.08769"
      url: "https://arxiv.org/abs/2512.08769"
    keywords:
      - agentic AI
      - production systems
      - best practices
    categories:
      - production_systems
      - multi_agent_systems
    key_findings:
      - finding: "Nine best practices for production agentic workflows"
        metric: "Qualitative patterns"
        impact: high
    aiwg_relevance:
      applicability: direct
      components_affected: [agents, flows, rules]
      implementation_priority: round-2
      related_issues: ["#110"]
    quality_assessment:
      grade_baseline: moderate
      downgrade_factors: ["not_peer_reviewed"]
      final_grade: moderate
    analysis_date: "2026-01-25"

# References
references:
  research:
    - "@.aiwg/research/findings/REF-056-fair-principles.md"
  implementation:
    - "#105"
  related:
    - "@agentic/code/frameworks/sdlc-complete/schemas/research/quality-assessment.yaml"
