All files / modules confluenceReader.ts

81.25% Statements 65/80
55.55% Branches 20/36
100% Functions 4/4
81.33% Lines 61/75

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 1971x 1x       1x   1x           14x 14x 14x   14x 1x     13x 1x     12x 1x       11x   11x               9x 1x     8x 8x   8x       7x 7x     3x 3x     3x 3x     2x 2x           1x       7x   1x     6x             2x 2x 2x     2x     2x     2x               1x     1x 1x     1x   1x   1x     1x                                                   1x     1x                       6x   4x     4x       4x     4x     4x     4x                 4x 4x 4x 4x 4x 4x   4x    
import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
 
// Load environment variables
dotenv.config();
 
export class ConfluenceReader {
  private baseUrl: string;
  private apiToken: string;
  private username: string;
 
  constructor() {
    this.baseUrl = process.env.CONFLUENCE_URL || '';
    this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
    this.username = process.env.CONFLUENCE_USERNAME || '';
    
    if (!this.baseUrl) {
      throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
    }
    
    if (!this.apiToken) {
      throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
    }
    
    if (!this.username) {
      throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
    }
    
    // Remove trailing slash if present
    this.baseUrl = this.baseUrl.replace(/\/$/, '');
    
    console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
  }
 
  /**
   * Extract page ID from a Confluence URL or return the ID if directly provided
   */
  private extractPageId(pageUrlOrId: string): string {
    // If it's just a number, assume it's already a page ID
    if (/^\d+$/.test(pageUrlOrId)) {
      return pageUrlOrId;
    }
    
    let pageId: string | null = null;
    try {
      // Parse the URL
      const parsedUrl = new URL(pageUrlOrId);
      
      // Order matters: check more specific paths first
      // Format: .../spaces/SPACE/pages/123456
      const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
      if (spacePagesMatch) pageId = spacePagesMatch[1];
      else {
          // Format: .../pages/123456/Page+Title or /pages/123456
          const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
          Iif (pagesMatch) pageId = pagesMatch[1];
          else {
              // Format: .../view/123456
              const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
              if (viewMatch) pageId = viewMatch[1];
              else {
                  // Format: ...?pageId=123456
                  const pageIdParam = parsedUrl.searchParams.get('pageId');
                  if (pageIdParam) pageId = pageIdParam;
              }
          }
      }
    } catch (error) {
      // This catch block specifically handles URL parsing errors
      throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
    }
    
    // Check if an ID was found *after* trying to parse/extract
    if (!pageId) {
      // Use the original input in the error message for clarity
      throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
    }
 
    return pageId;
  }
 
  /**
   * Fetch content from a Confluence page by URL or ID
   */
  async fetchPageContent(pageUrlOrId: string): Promise<string> {
    try {
      const pageId = this.extractPageId(pageUrlOrId);
      console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
      
      // Construct API URL for the content endpoint
      const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
      
      // Create Basic Auth token from username and API token
      const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
      
      // Make the API request
      const response = await axios.get(apiUrl, {
        headers: {
          'Authorization': `Basic ${authToken}`,
          'Content-Type': 'application/json'
        }
      });
      
      // Extract the content from the response
      const htmlContent = response.data.body.storage.value;
      
      // Extract the title for reference
      const title = response.data.title;
      console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
      
      // Process HTML content to extract the text
      const textContent = this.extractTextFromHtml(htmlContent);
      
      return textContent;
    } catch (error) {
      console.error('[ConfluenceReader] Error fetching page content:', error);
 
      // Check for Axios specific errors first
      Iif (axios.isAxiosError(error)) {
        if (error.response) {
          // Server responded with error status
          // Try to extract a meaningful message
          let message = 'Unknown server error';
          Iif (error.response.data) {
              if (typeof error.response.data === 'string') {
                  message = error.response.data;
              } else if (error.response.data.message && typeof error.response.data.message === 'string') {
                  message = error.response.data.message;
              } else {
                  try {
                      message = JSON.stringify(error.response.data);
                  } catch (jsonError) {
                     // fallback if stringify fails
                  }
              }
          }
          throw new Error(`Confluence API error: ${error.response.status} - ${error.response.statusText || 'Status Text Unavailable'}. Message: ${message}`);
        } else if (error.request) {
          // Request made, no response received (network error)
          throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
        } else {
           // Error setting up the request (neither response nor request is present)
           throw new Error(`Axios setup error: ${error.message}`);
        }
      } else if (error instanceof Error) {
          // Generic error handling (non-Axios errors or unexpected issues)
          // Now this acts as a fallback if it wasn't an AxiosError
          throw new Error(`Failed to fetch Confluence page: ${error.message}`);
      } else E{
          // Handle cases where the thrown object is not an Error instance
          throw new Error(`Failed to fetch Confluence page: ${String(error)}`);
      }
    }
  }
 
  /**
   * Extract readable text content from HTML
   */
  private extractTextFromHtml(html: string): string {
    if (!html) return '';
 
    let text = html;
    
    // 1. Remove script and style blocks first
    text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
              .replace(/<style[^>]*>.*?<\/style>/gis, '');
 
    // 2. Replace line break elements with newlines
    text = text.replace(/<br\s*\/?>/gi, '\n'); 
 
    // 3. Replace block elements with newlines (add space before to prevent word joining)
    text = text.replace(/<\/(p|div|h[1-6]|li|blockquote|ul|ol|table|tr|td|th)>/gi, ' \n\n'); 
    
    // 4. Strip remaining HTML tags
    text = text.replace(/<[^>]*>/g, ''); // Remove all tags
 
    // 5. Decode HTML entities
    text = text.replace(/&nbsp;/g, ' ')
              .replace(/&amp;/g, '&')
              .replace(/&lt;/g, '<')
              .replace(/&gt;/g, '>')
              .replace(/&quot;/g, '"')
              .replace(/&#39;/g, "'");
    // Add more entities as needed
 
    // 6. Clean up excessive whitespace and newlines
    text = text.replace(/\r\n/g, '\n'); // Normalize line endings
    text = text.replace(/[ \t]+/g, ' ');      // Consolidate spaces/tabs
    text = text.replace(/ \n/g, '\n');       // Remove space before newline
    text = text.replace(/\n /g, '\n');       // Remove space after newline
    text = text.replace(/\n{3,}/g, '\n\n'); // Reduce multiple newlines to max 2
    text = text.trim();                    // Final trim
    
    return text;
  }
}