openapi: 3.0.0
info:
  title: Prysm Scraper API
  version: 1.0.0
  description: API for the Prysm structure-aware web scraper
  contact:
    name: Pink Pixel
    url: https://pinkpixel.dev
  license:
    name: MIT
    url: https://opensource.org/licenses/MIT

servers:
  - url: http://localhost:3000
    description: Development server

tags:
  - name: Jobs
    description: Scraping job operations
  - name: Results
    description: Scraping results operations

components:
  schemas:
    ScraperOptions:
      type: object
      properties:
        pages:
          type: integer
          description: Maximum number of pages to scrape by following links
          default: 1
          minimum: 1
          maximum: 100
        images:
          type: boolean
          description: Whether to download images from the page
          default: false
        output:
          type: string
          description: Custom output path for results (default: ~/prysm/output)
          nullable: true
        imageOutput:
          type: string
          description: Custom output path for images (default: ~/prysm/output/images)
          nullable: true
    
    JobRequest:
      type: object
      required:
        - url
      properties:
        url:
          type: string
          format: uri
          description: URL to scrape
        options:
          $ref: '#/components/schemas/ScraperOptions'
        priority:
          type: integer
          minimum: 1
          maximum: 10
          default: 5
          description: Job priority (1 = highest, 10 = lowest)
        webhook:
          type: string
          format: uri
          description: URL to receive webhook notifications when job completes
    
    JobResponse:
      type: object
      properties:
        jobId:
          type: string
          description: Unique identifier for the job
        status:
          type: string
          enum: [pending, processing, completed, failed, cancelled]
        url:
          type: string
          format: uri
        createdAt:
          type: string
          format: date-time
        startedAt:
          type: string
          format: date-time
          nullable: true
        completedAt:
          type: string
          format: date-time
          nullable: true
        error:
          type: string
          nullable: true
        progress:
          type: integer
          minimum: 0
          maximum: 100
    
    ScrapingResults:
      type: object
      properties:
        title:
          type: string
        content:
          type: array
          items:
            type: string
        metadata:
          type: object
        structureType:
          type: string
        paginationType:
          type: string
        extractionMethod:
          type: string
        url:
          type: string
          format: uri
    
    JobWithResults:
      allOf:
        - $ref: '#/components/schemas/JobResponse'
        - type: object
          properties:
            result:
              $ref: '#/components/schemas/ScrapingResults'
    
    Error:
      type: object
      properties:
        message:
          type: string
        code:
          type: string
        details:
          type: object

  responses:
    NotFound:
      description: The specified resource was not found
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    
    BadRequest:
      description: The request was malformed
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    
    Unauthorized:
      description: Authentication is required or failed
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
    
    TooManyRequests:
      description: Request rate limit exceeded
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
            
paths:
  /api/jobs:
    post:
      summary: Create a new scraping job
      description: |
        Start a new scraping job with the specified URL and options.
        This is the primary endpoint for initiating scraping operations.
      tags:
        - Jobs
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/JobRequest'
            examples:
              basic:
                summary: Basic scraping job
                value:
                  url: "https://example.com"
              withOptions:
                summary: Scraping with options
                value:
                  url: "https://example.com"
                  options:
                    pages: 5
                    images: true
                    output: "/custom/path"
      responses:
        '202':
          description: Job accepted
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobResponse'
        '400':
          description: Bad request
    
    get:
      summary: List all jobs
      description: Retrieve a list of all jobs with optional filtering
      tags:
        - Jobs
      parameters:
        - name: status
          in: query
          schema:
            type: string
            enum: [pending, processing, completed, failed, cancelled]
          description: Filter jobs by status
        - name: limit
          in: query
          schema:
            type: integer
            default: 20
          description: Maximum number of jobs to return
        - name: offset
          in: query
          schema:
            type: integer
            default: 0
          description: Offset for pagination
      responses:
        '200':
          description: List of jobs
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobs:
                    type: array
                    items:
                      $ref: '#/components/schemas/JobResponse'
                  total:
                    type: integer
                  limit:
                    type: integer
                  offset:
                    type: integer
  
  /api/jobs/{jobId}:
    get:
      summary: Get job status
      description: Get the status and details of a specific job
      tags:
        - Jobs
      parameters:
        - name: jobId
          in: path
          required: true
          schema:
            type: string
          description: Job identifier
      responses:
        '200':
          description: Job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobResponse'
        '404':
          description: Job not found
    
    delete:
      summary: Cancel or delete job
      description: Cancel a job or delete its results
      tags:
        - Jobs
      parameters:
        - name: jobId
          in: path
          required: true
          schema:
            type: string
          description: Job identifier
      responses:
        '204':
          description: Job cancelled or deleted
        '404':
          description: Job not found
  
  /api/jobs/{jobId}/results:
    get:
      summary: Get job results
      description: Retrieve the results of a completed job
      tags:
        - Results
      parameters:
        - name: jobId
          in: path
          required: true
          schema:
            type: string
          description: Job identifier
      responses:
        '200':
          description: Job results
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobWithResults'
        '404':
          description: Job or results not found
        '409':
          description: Job not completed 