name: gitlab-ci-debugging
title: GitLab CI/CD Debugging and Issue Resolution
description: Systematic workflow for diagnosing and resolving GitLab CI/CD pipeline issues with intelligent root cause analysis and cross-pack integration support

agents:
  - glab
  - dev
  - architect

metadata:
  estimated_duration: "2-8 hours"
  complexity: "medium"
  prerequisites:
    - Access to failing GitLab pipeline
    - GitLab CLI authenticated
    - Pipeline logs and error messages available
  success_metrics:
    - Pipeline failures resolved
    - Root causes identified and documented
    - Prevention measures implemented
    - Team knowledge updated

phases:
  issue_discovery:
    title: "Issue Discovery and Initial Analysis"
    description: "Identify and categorize pipeline failures with initial impact assessment"
    estimated_duration: "30-60 minutes"
    agents: ["glab"]
    tasks:
      - analyze-pipeline-failures
      - monitor-pipeline-status
    decision_points:
      - failure_severity_assessment:
          question: "Is this a critical failure blocking development?"
          options:
            critical: "Proceed with urgent resolution path"
            moderate: "Continue with standard debugging workflow"
            minor: "Consider deferring or batching with other fixes"
    success_criteria:
      - Failure patterns identified
      - Impact assessment completed
      - Initial root cause hypotheses formed
      - Resolution priority established
    outputs:
      - failure_analysis_report
      - priority_classification
      - initial_root_cause_hypotheses

  root_cause_analysis:
    title: "Deep Root Cause Analysis"
    description: "Comprehensive analysis of failure causes using logs, configuration, and historical patterns"
    estimated_duration: "1-3 hours"
    dependencies: ["issue_discovery"]
    agents: ["glab", "dev"]
    tasks:
      - debug-ci-configuration
      - analyze-pipeline-failures
    decision_points:
      - analysis_depth_decision:
          question: "Is additional analysis needed beyond standard failure patterns?"
          options:
            deep_analysis: "Engage architect for complex system issues"
            standard_analysis: "Continue with standard resolution"
    success_criteria:
      - Root causes identified with confidence
      - Configuration issues documented
      - Environmental factors assessed
      - Historical pattern analysis completed
    outputs:
      - detailed_root_cause_analysis
      - configuration_issues_list
      - environmental_factor_assessment

  solution_design:
    title: "Solution Design and Planning"
    description: "Design comprehensive solution addressing root causes and preventing recurrence"
    estimated_duration: "30-90 minutes"
    dependencies: ["root_cause_analysis"]
    agents: ["dev", "architect"]
    tasks:
      - create-gitlab-workflow-plan
    decision_points:
      - solution_complexity_review:
          question: "Does the solution require architectural changes?"
          options:
            architectural_changes: "Involve architect for system design"
            configuration_changes: "Proceed with configuration fixes"
            simple_fixes: "Apply direct fixes to pipeline"
    success_criteria:
      - Solution approach defined
      - Implementation plan created
      - Risk assessment completed
      - Testing strategy established
    outputs:
      - solution_design_document
      - implementation_plan
      - risk_mitigation_strategy

  implementation:
    title: "Solution Implementation"
    description: "Implement fixes and improvements with proper testing and validation"
    estimated_duration: "1-4 hours"
    dependencies: ["solution_design"]
    agents: ["dev", "glab"]
    tasks:
      - debug-ci-configuration
      - monitor-pipeline-status
    checkpoints:
      - configuration_updated:
          description: "CI configuration changes applied"
          validation: "Configuration passes lint validation"
      - initial_testing:
          description: "Basic functionality testing completed"
          validation: "Pipeline executes without syntax errors"
    success_criteria:
      - Fixes implemented according to plan
      - Configuration changes validated
      - Initial testing successful
      - No regression introduced
    outputs:
      - updated_ci_configuration
      - implementation_validation_results

  integration_sync:
    title: "Cross-Pack Integration Synchronization"
    description: "Update related systems and notify stakeholders of resolution progress"
    estimated_duration: "15-30 minutes"
    dependencies: ["implementation"]
    agents: ["glab"]
    tasks:
      - sync-ci-status-to-jira
      - coordinate-parallel-ci
    decision_points:
      - integration_update_scope:
          question: "Which integration systems need to be updated?"
          options:
            all_integrations: "Update all connected systems"
            critical_only: "Update only critical integrations"
            manual_notify: "Use manual notification process"
    success_criteria:
      - JIRA issues updated with resolution status
      - Parallel development teams notified
      - Integration systems synchronized
      - Stakeholders informed
    outputs:
      - integration_update_summary
      - stakeholder_notifications

  validation_and_monitoring:
    title: "Solution Validation and Monitoring Setup"
    description: "Validate the complete solution and establish monitoring to prevent recurrence"
    estimated_duration: "30-60 minutes"
    dependencies: ["integration_sync"]
    agents: ["glab", "dev"]
    tasks:
      - monitor-pipeline-status
      - generate-ci-health-report
    decision_points:
      - validation_scope:
          question: "What level of validation testing is required?"
          options:
            comprehensive: "Full end-to-end testing including edge cases"
            standard: "Standard functionality testing"
            minimal: "Basic smoke testing"
    success_criteria:
      - Solution thoroughly validated
      - Monitoring established
      - Documentation updated
      - Knowledge shared with team
    outputs:
      - validation_test_results
      - monitoring_configuration
      - updated_documentation

checkpoints:
  - phase: issue_discovery
    checkpoint: failure_identified
    validation: "Pipeline failure categorized and impact assessed"
  - phase: root_cause_analysis
    checkpoint: root_cause_confirmed
    validation: "Root cause identified with supporting evidence"
  - phase: solution_design
    checkpoint: solution_approved
    validation: "Solution design reviewed and approved by team"
  - phase: implementation
    checkpoint: fix_deployed
    validation: "Fix implemented and basic testing passed"
  - phase: integration_sync
    checkpoint: integrations_updated
    validation: "All relevant integration systems notified and updated"
  - phase: validation_and_monitoring
    checkpoint: solution_validated
    validation: "Complete solution validated and monitoring established"

quality_gates:
  - gate: root_cause_validation
    criteria: "Root cause analysis supported by clear evidence"
    phase: root_cause_analysis
    blocking: true
  - gate: solution_review
    criteria: "Solution design addresses root cause and prevents recurrence"
    phase: solution_design
    blocking: true
  - gate: regression_testing
    criteria: "Implementation does not introduce new issues"
    phase: implementation
    blocking: true
  - gate: integration_verification
    criteria: "All integration updates successful"
    phase: integration_sync
    blocking: false

risk_mitigation:
  - risk: "Solution introduces new failures"
    mitigation: "Implement incremental changes with rollback capability"
    phase: implementation
  - risk: "Incomplete root cause analysis"
    mitigation: "Use multiple analysis methods and historical pattern review"
    phase: root_cause_analysis
  - risk: "Integration update failures"
    mitigation: "Test integration updates in isolation before full deployment"
    phase: integration_sync

tools_and_resources:
  required_tools:
    - GitLab CLI (glab)
    - Git access
    - Pipeline log access
  recommended_tools:
    - GitLab CI linter
    - Log analysis tools
    - Collaboration tools for team communication
  external_dependencies:
    - GitLab repository access
    - CI/CD pipeline execution environment
    - Integration system access (JIRA, etc.)

escalation_paths:
  - trigger: "Critical system-wide failure"
    escalation: "Engage architect and senior dev immediately"
    timeline: "Within 15 minutes"
  - trigger: "Complex architectural issues identified"
    escalation: "Include architect in solution design phase"
    timeline: "Before implementation begins"
  - trigger: "Multiple failed resolution attempts"
    escalation: "Senior team review and alternative approach"
    timeline: "After 2 failed attempts"

success_patterns:
  - pattern: "Configuration syntax errors"
    typical_resolution: "CI configuration debugging and validation"
    average_duration: "1-2 hours"
  - pattern: "Dependency or environment issues"
    typical_resolution: "Environment analysis and dependency updates"
    average_duration: "2-4 hours"
  - pattern: "Test failures or flaky tests"
    typical_resolution: "Test analysis and stabilization"
    average_duration: "2-6 hours"

integration_hooks:
  jira_integration:
    - phase: issue_discovery
      action: "Create or update JIRA issue with failure details"
    - phase: validation_and_monitoring
      action: "Update JIRA with resolution and close if appropriate"
  parallel_dev_integration:
    - phase: issue_discovery
      action: "Notify parallel development teams of potential blocking issues"
    - phase: validation_and_monitoring
      action: "Confirm resolution across all parallel development streams"
  core_bmad_integration:
    - phase: solution_design
      action: "Integrate solution into development workflow planning"
    - phase: validation_and_monitoring
      action: "Update project documentation and knowledge base"