All files confluenceReader.ts

90.16% Statements 55/61
65.38% Branches 17/26
100% Functions 4/4
91.22% Lines 52/57

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 1671x 1x       1x   1x           16x 16x 16x   16x 1x     15x 1x     14x 1x       13x   13x               10x 1x     9x 9x   9x       8x 8x     2x 2x     2x 2x     1x 1x           1x       8x       8x             4x 4x 4x     4x     4x     4x               1x     1x 1x     1x   1x   3x   3x                     3x                 5x     5x       5x             5x       5x     5x           5x    
import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
 
// Load environment variables
dotenv.config();
 
export class ConfluenceReader {
  private baseUrl: string;
  private apiToken: string;
  private username: string;
 
  constructor() {
    this.baseUrl = process.env.CONFLUENCE_URL || '';
    this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
    this.username = process.env.CONFLUENCE_USERNAME || '';
    
    if (!this.baseUrl) {
      throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
    }
    
    if (!this.apiToken) {
      throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
    }
    
    if (!this.username) {
      throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
    }
    
    // Remove trailing slash if present
    this.baseUrl = this.baseUrl.replace(/\/$/, '');
    
    console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
  }
 
  /**
   * Extract page ID from a Confluence URL or return the ID if directly provided
   */
  private extractPageId(pageUrlOrId: string): string {
    // If it's just a number, assume it's already a page ID
    if (/^\d+$/.test(pageUrlOrId)) {
      return pageUrlOrId;
    }
    
    let pageId: string | null = null;
    try {
      // Parse the URL
      const parsedUrl = new URL(pageUrlOrId);
      
      // Extract the page ID from various Confluence URL formats
      // Format: .../pages/123456/Page+Title
      const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
      if (pagesMatch) pageId = pagesMatch[1];
      else {
        // Format: .../spaces/SPACE/pages/123456
        const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
        Iif (spacePagesMatch) pageId = spacePagesMatch[1];
        else {
          // Format: .../view/123456
          const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
          if (viewMatch) pageId = viewMatch[1];
          else {
            // Format: ...?pageId=123456
            const pageIdParam = parsedUrl.searchParams.get('pageId');
            if (pageIdParam) pageId = pageIdParam;
          }
        }
      }
    } catch (error) {
      // This catch block specifically handles URL parsing errors
      throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
    }
    
    // Check if an ID was found *after* trying to parse/extract
    Iif (!pageId) {
      throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
    }
 
    return pageId;
  }
 
  /**
   * Fetch content from a Confluence page by URL or ID
   */
  async fetchPageContent(pageUrlOrId: string): Promise<string> {
    try {
      const pageId = this.extractPageId(pageUrlOrId);
      console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
      
      // Construct API URL for the content endpoint
      const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
      
      // Create Basic Auth token from username and API token
      const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
      
      // Make the API request
      const response = await axios.get(apiUrl, {
        headers: {
          'Authorization': `Basic ${authToken}`,
          'Content-Type': 'application/json'
        }
      });
      
      // Extract the content from the response
      const htmlContent = response.data.body.storage.value;
      
      // Extract the title for reference
      const title = response.data.title;
      console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
      
      // Process HTML content to extract the text
      const textContent = this.extractTextFromHtml(htmlContent);
      
      return textContent;
    } catch (error) {
      console.error('[ConfluenceReader] Error fetching page content:', error);
      
      Iif (axios.isAxiosError(error)) {
        if (error.response) {
          // The request was made and the server responded with a status code outside of 2xx
          throw new Error(`Confluence API error: ${error.response.status} - ${error.response.data?.message || 'Unknown error'}`);
        } else Iif (error.request) {
          // The request was made but no response was received
          throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
        }
      }
      
      // Generic error handling
      throw new Error(`Failed to fetch Confluence page: ${error instanceof Error ? error.message : String(error)}`);
    }
  }
 
  /**
   * Extract readable text content from HTML
   * Very simple implementation - can be enhanced with a proper HTML parser if needed
   */
  private extractTextFromHtml(html: string): string {
    let text = html;
    
    // 1. Remove script and style blocks first
    text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
              .replace(/<style[^>]*>.*?<\/style>/gis, '');
 
    // 2. Replace common HTML entities
    text = text.replace(/&nbsp;/g, ' ')
              .replace(/&amp;/g, '&')
              .replace(/&lt;/g, '<')
              .replace(/&gt;/g, '>');
    
    // 3. Replace line break elements with newlines
    // Add double newline after block elements for better spacing
    text = text.replace(/<br\s*\/?>/gi, '\n') 
              .replace(/<\/(p|div|h[1-6]|li|blockquote)>/gi, '\n\n'); 
    
    // 4. Strip remaining HTML tags
    text = text.replace(/<[^>]*>/g, '');
    
    // 5. Clean up excessive whitespace
    text = text.replace(/[ \t]+/g, ' ')         // Consolidate spaces/tabs
              .replace(/\n[ \t]+/g, '\n')     // Remove leading space after newline
              .replace(/[ \t]+\n/g, '\n')     // Remove trailing space before newline
              .replace(/\n{3,}/g, '\n\n') // Reduce multiple newlines to max 2
              .trim();                       // Final trim
    
    return text;
  }
}