# git-analysis

Comprehensive git repository analysis utilities for extracting development insights, correlating code changes with BMAD stories and JIRA issues, and supporting intelligent sync operations.

## Core Analysis Functions

### Git History Analysis

**Commit Activity Extraction:**

```bash
# Yesterday's team activity
git log --since="yesterday" --oneline --stat --all --no-merges

# Individual developer analysis
git log --since="yesterday" --author="developer@email.com" --oneline --stat

# Detailed commit information with file changes
git log --since="yesterday" --name-status --pretty=format:"%h|%an|%ae|%ad|%s" --date=iso

# Branch activity and merge patterns
git log --graph --oneline --since="yesterday" --all --decorate
```

**File Change Analysis:**

```bash
# Files modified with change statistics
git diff --name-status HEAD~1 HEAD
git diff --stat HEAD~1 HEAD

# Line-by-line changes for specific files
git diff HEAD~1 HEAD -- src/specific/file.js

# Directory-level change summary
git diff --dirstat HEAD~1 HEAD
```

**Developer Activity Patterns:**

```bash
# Commit frequency by developer
git shortlog -sn --since="1 week ago"

# Developer activity timeline
git log --since="1 week ago" --pretty=format:"%an %ad %s" --date=short | sort

# Code ownership analysis
git log --pretty=format:"%an" -- path/to/component | sort | uniq -c | sort -nr
```

### Code Quality Analysis

**Test Coverage Detection:**

```bash
# Test file changes
git log --since="yesterday" --name-only | grep -E "\.(test|spec)\.(js|ts|py|rb)$"

# Test line additions
git log --since="yesterday" --numstat | grep -E "\.(test|spec)\." | awk '{sum+=$1} END {print sum}'

# Test coverage trend
git log --since="1 week ago" --pretty=format:"%ad" --date=short --name-only | \
  grep -E "\.(test|spec)\." | sort | uniq -c
```

**Refactoring Detection:**

```bash
# Refactoring commits (high line changes, same functionality)
git log --since="yesterday" --pretty=format:"%h %s" --stat | \
  awk '/files? changed/ && $(NF-3) > 50 {print prev} {prev=$0}'

# Code complexity indicators
git log --since="yesterday" --numstat | \
  awk '{files++; added+=$1; deleted+=$2} END {print "Files:", files, "Added:", added, "Deleted:", deleted}'

# Documentation changes
git log --since="yesterday" --name-only | grep -E "\.(md|rst|txt|doc)$"
```

**Bug Fix Detection:**

```bash
# Bug fix commits
git log --since="yesterday" --grep="fix\|bug\|issue" --oneline

# Emergency fixes (commits outside normal hours)
git log --since="yesterday" --pretty=format:"%ad %h %s" --date=format:"%H" | \
  awk '$1 < 8 || $1 > 18 {print}'

# Hotfix branch activity
git log --since="yesterday" --all --grep="hotfix\|urgent" --oneline
```

### Collaboration Analysis

**Code Review Activity:**

```bash
# Pull request related commits
git log --since="yesterday" --grep="merge\|pull\|review" --oneline

# Collaborative commits (multiple authors)
git log --since="yesterday" --pretty=format:"%an %cn %s" | \
  awk '$1 != $2 {print "Collaborative:", $0}'

# Review comments and feedback
git log --since="yesterday" --grep="review\|feedback\|address" --oneline
```

**Knowledge Sharing Patterns:**

```bash
# Cross-component work (developers working outside their usual areas)
git log --since="1 week ago" --name-only --pretty=format:"%an" | \
  grep -v "^$" | awk 'NR%2==1{dev=$0} NR%2==0{print dev, $0}' | \
  sort | uniq -c | sort -nr

# Pair programming detection
git log --since="yesterday" --pretty=format:"%s" | grep -i "pair\|mob\|together"
```

## BMAD Correlation Intelligence

### Story-to-Code Mapping

**File Path Correlation:**

```bash
# Map file changes to story components
analyze_story_correlation() {
  local story_file="$1"
  local component_paths=$(grep -E "Component|Module|File" "$story_file" | \
    grep -oE "src/[a-zA-Z0-9/_-]+")

  for path in $component_paths; do
    git log --since="yesterday" --name-only | grep -E "^$path" | \
      while read file; do
        echo "Story: $story_file, File: $file"
      done
  done
}

# Example: analyze_story_correlation "authentication.storyimpl.md"
```

**Commit Message Analysis:**

```bash
# Extract story references from commit messages
extract_story_references() {
  git log --since="yesterday" --pretty=format:"%h|%s" | \
    grep -iE "(story|bmad|feature)" | \
    grep -oE "([a-zA-Z0-9-]+\.story\.md|BMAD-[0-9]+|story[:#]\s*[a-zA-Z0-9-]+)"
}

# Find commits mentioning specific stories
find_story_commits() {
  local story_name="$1"
  git log --since="yesterday" --grep="$story_name" --oneline
}
```

**Component Change Detection:**

```bash
# Identify which story components were modified
detect_component_changes() {
  local components=("auth" "user" "payment" "search" "admin")

  for component in "${components[@]}"; do
    local changes=$(git log --since="yesterday" --name-only | \
      grep -E "src.*$component" | wc -l)
    if [ "$changes" -gt 0 ]; then
      echo "$component: $changes files changed"
    fi
  done
}
```

### Progress Estimation

**Completion Analysis:**

```bash
# Estimate story completion based on file changes
estimate_story_progress() {
  local story_file="$1"
  local total_components=$(grep -c "Component\|Module" "$story_file")
  local modified_components=0

  # Count components with recent changes
  grep -E "Component|Module" "$story_file" | \
    while read component_line; do
      local component=$(echo "$component_line" | grep -oE "src/[a-zA-Z0-9/_-]+")
      local changes=$(git log --since="1 week ago" --name-only | \
        grep -E "^$component" | wc -l)
      if [ "$changes" -gt 0 ]; then
        ((modified_components++))
      fi
    done

  local completion_percentage=$((modified_components * 100 / total_components))
  echo "Story: $story_file, Progress: $completion_percentage%"
}
```

**Acceptance Criteria Mapping:**

```bash
# Map commits to acceptance criteria
map_commits_to_criteria() {
  local story_file="$1"

  # Extract acceptance criteria from story
  grep -E "^-.*\[.*\]|^[0-9]+\." "$story_file" | \
    while read criteria; do
      # Look for commits that might address this criteria
      local keywords=$(echo "$criteria" | \
        grep -oE "[a-zA-Z]{3,}" | tr '\n' '|' | sed 's/|$//')

      if [ -n "$keywords" ]; then
        echo "Criteria: $criteria"
        git log --since="1 week ago" --grep="$keywords" --oneline | \
          sed 's/^/  Commit: /'
      fi
    done
}
```

## JIRA Integration Analysis

### Issue Correlation

**Commit-to-Issue Matching:**

```bash
# Extract JIRA issue references from commits
extract_jira_references() {
  git log --since="yesterday" --pretty=format:"%h|%s" | \
    grep -oE "[A-Z]+-[0-9]+" | sort | uniq
}

# Find commits for specific JIRA issue
find_issue_commits() {
  local issue_key="$1"
  git log --since="1 month ago" --grep="$issue_key" --oneline
}

# Analyze commit patterns for JIRA issues
analyze_jira_commit_patterns() {
  local issue_key="$1"
  git log --grep="$issue_key" --name-only --pretty=format:"" | \
    grep -v "^$" | sort | uniq -c | sort -nr | head -10
}
```

**Status Correlation:**

```bash
# Detect development activity for JIRA issues
detect_jira_activity() {
  local since_date="${1:-yesterday}"

  extract_jira_references | while read issue; do
    local commit_count=$(git log --since="$since_date" --grep="$issue" --oneline | wc -l)
    local files_changed=$(git log --since="$since_date" --grep="$issue" --name-only | \
      grep -v "^$" | sort | uniq | wc -l)

    if [ "$commit_count" -gt 0 ]; then
      echo "Issue: $issue, Commits: $commit_count, Files: $files_changed"
    fi
  done
}
```

### Time Estimation

**Commit-Based Time Analysis:**

```bash
# Estimate time spent based on commit patterns
estimate_development_time() {
  local author="$1"
  local since_date="${2:-yesterday}"

  # Get commit timestamps for the author
  git log --author="$author" --since="$since_date" \
    --pretty=format:"%ad" --date=format:"%Y-%m-%d %H:%M" | \
    sort | while read timestamp; do
      echo "$timestamp"
    done | \
    awk '
      {
        if (prev != "") {
          diff = strftime("%s", mktime(gensub(/-|:/, " ", "g", $0))) -
                 strftime("%s", mktime(gensub(/-|:/, " ", "g", prev)))
          if (diff < 7200) total_time += diff  # Only count if < 2 hours apart
        }
        prev = $0
      }
      END { print "Estimated time: " total_time/3600 " hours" }
    '
}

# Analyze development session patterns
analyze_development_sessions() {
  local author="$1"

  git log --author="$author" --since="1 week ago" \
    --pretty=format:"%ad" --date=format:"%H" | \
    sort | uniq -c | \
    awk '{print "Hour " $2 ": " $1 " commits"}'
}
```

## Sync Intelligence Support

### Divergence Detection

**Status Lag Detection:**

```bash
# Find commits without corresponding JIRA activity
detect_silent_development() {
  local since_date="${1:-yesterday}"

  # Get all commits since date
  git log --since="$since_date" --pretty=format:"%h|%s" | \
    while IFS='|' read hash message; do
      # Check if commit mentions JIRA issue
      if ! echo "$message" | grep -qE "[A-Z]+-[0-9]+"; then
        echo "Silent commit: $hash - $message"
      fi
    done
}

# Find JIRA issues marked done without recent commits
detect_phantom_progress() {
  # This would integrate with JIRA API to compare status vs git activity
  echo "Requires JIRA API integration for full implementation"
}
```

**Documentation Drift Detection:**

```bash
# Find BMAD stories that might be outdated
detect_documentation_drift() {
  local stories_dir="${1:-.}"

  find "$stories_dir" -name "*.storyimpl.md" | while read story; do
    local story_date=$(stat -f "%m" "$story" 2>/dev/null || stat -c "%Y" "$story" 2>/dev/null)
    local last_commit_date=$(git log -1 --format="%ct" 2>/dev/null || echo "0")

    if [ "$last_commit_date" -gt "$story_date" ]; then
      local component=$(basename "$story" .storyimpl.md)
      local recent_commits=$(git log --since="@$story_date" --grep="$component" --oneline | wc -l)

      if [ "$recent_commits" -gt 0 ]; then
        echo "Outdated story: $story ($recent_commits commits since last update)"
      fi
    fi
  done
}
```

### Quality Metrics

**Code Quality Indicators:**

```bash
# Calculate code quality metrics
calculate_quality_metrics() {
  local since_date="${1:-yesterday}"

  # Test coverage change
  local test_files=$(git log --since="$since_date" --name-only | \
    grep -E "\.(test|spec)\." | wc -l)
  local total_files=$(git log --since="$since_date" --name-only | \
    grep -v "^$" | sort | uniq | wc -l)

  # Documentation changes
  local doc_files=$(git log --since="$since_date" --name-only | \
    grep -E "\.(md|rst|txt)$" | wc -l)

  # Refactoring indicators
  local refactor_commits=$(git log --since="$since_date" --grep="refactor\|cleanup\|improve" --oneline | wc -l)

  echo "Quality Metrics for $since_date:"
  echo "  Test files modified: $test_files"
  echo "  Total files modified: $total_files"
  echo "  Documentation updates: $doc_files"
  echo "  Refactoring commits: $refactor_commits"
}

# Team collaboration score
calculate_collaboration_score() {
  local since_date="${1:-1 week ago}"

  # Cross-developer file modifications
  local shared_files=$(git log --since="$since_date" --name-only | \
    sort | uniq -c | awk '$1 > 1 {count++} END {print count+0}')

  # Code review activity
  local review_commits=$(git log --since="$since_date" --grep="review\|merge\|pull" --oneline | wc -l)

  # Pair programming indicators
  local pair_commits=$(git log --since="$since_date" --grep="pair\|mob" --oneline | wc -l)

  echo "Collaboration Metrics:"
  echo "  Shared files: $shared_files"
  echo "  Review activity: $review_commits"
  echo "  Pair programming: $pair_commits"
}
```

## Advanced Analysis Features

### Predictive Analytics

**Development Velocity Trends:**

```bash
# Calculate weekly development velocity
calculate_velocity_trend() {
  for week in {0..4}; do
    local start_date=$(date -d "$week weeks ago" +%Y-%m-%d 2>/dev/null || \
      date -v-${week}w +%Y-%m-%d 2>/dev/null)
    local end_date=$(date -d "$((week-1)) weeks ago" +%Y-%m-%d 2>/dev/null || \
      date -v-$((week-1))w +%Y-%m-%d 2>/dev/null)

    local commits=$(git log --since="$start_date" --until="$end_date" --oneline | wc -l)
    local files=$(git log --since="$start_date" --until="$end_date" --name-only | \
      grep -v "^$" | sort | uniq | wc -l)

    echo "Week $week ago: $commits commits, $files files"
  done
}

# Predict completion based on current velocity
predict_completion() {
  local total_stories="$1"
  local completed_stories="$2"
  local weeks_elapsed="$3"

  local velocity=$(echo "scale=2; $completed_stories / $weeks_elapsed" | bc)
  local remaining_stories=$((total_stories - completed_stories))
  local estimated_weeks=$(echo "scale=2; $remaining_stories / $velocity" | bc)

  echo "Current velocity: $velocity stories/week"
  echo "Estimated completion: $estimated_weeks weeks"
}
```

### Risk Analysis

**Code Risk Indicators:**

```bash
# Identify high-risk areas
identify_risk_areas() {
  local since_date="${1:-1 month ago}"

  # Files with high change frequency
  echo "High-change files (potential instability):"
  git log --since="$since_date" --name-only | \
    grep -v "^$" | sort | uniq -c | sort -nr | head -10

  # Large commits (potential complexity)
  echo -e "\nLarge commits (potential complexity):"
  git log --since="$since_date" --numstat | \
    awk '{lines=$1+$2; if(lines>100) print $3, lines}' | sort -nr | head -10

  # Commits without tests
  echo -e "\nRecent commits potentially missing tests:"
  git log --since="yesterday" --name-only --pretty=format:"%h %s" | \
    awk '/^[a-f0-9]/ {commit=$0; files=""}
         !/^[a-f0-9]/ && !/test|spec/ {files=files $0 " "}
         /^$/ && files {print commit ": " files; files=""}'
}

# Bus factor analysis
analyze_bus_factor() {
  echo "Code ownership concentration:"
  git log --since="3 months ago" --pretty=format:"%an" | \
    sort | uniq -c | sort -nr | \
    awk 'BEGIN{total=0} {total+=$1; authors[NR]=$2; commits[NR]=$1}
         END{
           for(i=1; i<=NR; i++) {
             percent=commits[i]*100/total
             printf "%-20s %3d commits (%2.0f%%)\n", authors[i], commits[i], percent
           }
         }'
}
```

## Integration Helpers

### Data Export Functions

**JSON Export for External Tools:**

```bash
# Export git analysis as JSON
export_git_analysis_json() {
  local since_date="${1:-yesterday}"
  local output_file="${2:-git_analysis.json}"

  cat > "$output_file" << EOF
{
  "analysis_date": "$(date -Iseconds)",
  "period": "$since_date",
  "commits": [
$(git log --since="$since_date" --pretty=format:'    {"hash":"%h","author":"%an","date":"%ad","message":"%s"},' --date=iso | sed '$s/,$//')
  ],
  "files_changed": [
$(git log --since="$since_date" --name-only --pretty=format:'' | grep -v '^$' | sort | uniq | sed 's/.*/"&",/' | sed '$s/,$//')
  ],
  "statistics": {
    "total_commits": $(git log --since="$since_date" --oneline | wc -l),
    "files_modified": $(git log --since="$since_date" --name-only | grep -v '^$' | sort | uniq | wc -l),
    "authors": $(git log --since="$since_date" --pretty=format:"%an" | sort | uniq | wc -l)
  }
}
EOF
}

# Export BMAD correlation data
export_bmad_correlation_json() {
  local stories_dir="${1:-.}"
  local output_file="${2:-bmad_correlation.json}"

  echo '{"story_correlations": [' > "$output_file"

  find "$stories_dir" -name "*.storyimpl.md" | while read story; do
    local story_name=$(basename "$story" .storyimpl.md)
    local commits=$(git log --since="1 week ago" --grep="$story_name" --oneline | wc -l)

    echo "  {" >> "$output_file"
    echo "    \"story\": \"$story\"," >> "$output_file"
    echo "    \"recent_commits\": $commits," >> "$output_file"
    echo "    \"last_modified\": \"$(stat -f "%m" "$story" 2>/dev/null || stat -c "%Y" "$story")\"" >> "$output_file"
    echo "  }," >> "$output_file"
  done

  # Remove last comma and close JSON
  sed -i '' '$s/,$//' "$output_file" 2>/dev/null || sed -i '$s/,$//' "$output_file"
  echo ']}' >> "$output_file"
}
```

## Usage Examples

### Daily Standup Preparation

```bash
# Comprehensive daily analysis
./git-analysis.sh daily-standup

# Individual developer analysis
./git-analysis.sh developer-activity "john.doe@company.com"

# Story correlation analysis
./git-analysis.sh story-correlation "authentication.storyimpl.md"
```

### Sync Operation Support

```bash
# Detect sync issues
./git-analysis.sh detect-divergence

# Generate sync recommendations
./git-analysis.sh sync-recommendations

# Validate sync completion
./git-analysis.sh validate-sync
```

This comprehensive git analysis toolkit provides the foundation for intelligent standup preparation, sync operation support, and continuous development insight generation that bridges code reality with project planning and tracking systems.
