All files confluenceReader.ts

90.16% Statements 55/61
65.38% Branches 17/26
100% Functions 4/4
91.22% Lines 52/57
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167 1x
1x
 
 
 
1x
 
1x
 
 
 
 
 
16x
16x
16x
 
16x
1x
 
 
15x
1x
 
 
14x
1x
 
 
 
13x
 
13x
 
 
 
 
 
 
 
10x
1x
 
 
9x
9x
 
9x
 
 
 
8x
8x
 
 
2x
2x
 
 
2x
2x
 
 
1x
1x
 
 
 
 
 
1x
 
 
 
8x
 
 
 
8x
 
 
 
 
 
 
4x
4x
4x
 
 
4x
 
 
4x
 
 
4x
 
 
 
 
 
 
 
1x
 
 
1x
1x
 
 
1x
 
1x
 
3x
 
3x
 
 
 
 
 
 
 
 
 
 
3x
 
 
 
 
 
 
 
 
5x
 
 
5x
 
 
 
5x
 
 
 
 
 
 
5x
 
 
 
5x
 
 
5x
 
 
 
 
 
5x
 
  import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
 
// Load environment variables
dotenv.config();
 
export class ConfluenceReader {
  private baseUrl: string;
  private apiToken: string;
  private username: string;
 
  constructor() {
    this.baseUrl = process.env.CONFLUENCE_URL || '';
    this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
    this.username = process.env.CONFLUENCE_USERNAME || '';
    
    if (!this.baseUrl) {
      throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
    }
    
    if (!this.apiToken) {
      throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
    }
    
    if (!this.username) {
      throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
    }
    
    // Remove trailing slash if present
    this.baseUrl = this.baseUrl.replace(/\/$/, '');
    
    console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
  }
 
  /**
   * Extract page ID from a Confluence URL or return the ID if directly provided
   */
  private extractPageId(pageUrlOrId: string): string {
    // If it's just a number, assume it's already a page ID
    if (/^\d+$/.test(pageUrlOrId)) {
      return pageUrlOrId;
    }
    
    let pageId: string | null = null;
    try {
      // Parse the URL
      const parsedUrl = new URL(pageUrlOrId);
      
      // Extract the page ID from various Confluence URL formats
      // Format: .../pages/123456/Page+Title
      const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
      if (pagesMatch) pageId = pagesMatch[1];
      else {
        // Format: .../spaces/SPACE/pages/123456
        const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
        Iif (spacePagesMatch) pageId = spacePagesMatch[1];
        else {
          // Format: .../view/123456
          const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
          if (viewMatch) pageId = viewMatch[1];
          else {
            // Format: ...?pageId=123456
            const pageIdParam = parsedUrl.searchParams.get('pageId');
            if (pageIdParam) pageId = pageIdParam;
          }
        }
      }
    } catch (error) {
      // This catch block specifically handles URL parsing errors
      throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
    }
    
    // Check if an ID was found *after* trying to parse/extract
    Iif (!pageId) {
      throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
    }
 
    return pageId;
  }
 
  /**
   * Fetch content from a Confluence page by URL or ID
   */
  async fetchPageContent(pageUrlOrId: string): Promise<string> {
    try {
      const pageId = this.extractPageId(pageUrlOrId);
      console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
      
      // Construct API URL for the content endpoint
      const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
      
      // Create Basic Auth token from username and API token
      const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
      
      // Make the API request
      const response = await axios.get(apiUrl, {
        headers: {
          'Authorization': `Basic ${authToken}`,
          'Content-Type': 'application/json'
        }
      });
      
      // Extract the content from the response
      const htmlContent = response.data.body.storage.value;
      
      // Extract the title for reference
      const title = response.data.title;
      console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
      
      // Process HTML content to extract the text
      const textContent = this.extractTextFromHtml(htmlContent);
      
      return textContent;
    } catch (error) {
      console.error('[ConfluenceReader] Error fetching page content:', error);
      
      Iif (axios.isAxiosError(error)) {
        if (error.response) {
          // The request was made and the server responded with a status code outside of 2xx
          throw new Error(`Confluence API error: ${error.response.status} - ${error.response.data?.message || 'Unknown error'}`);
        } else Iif (error.request) {
          // The request was made but no response was received
          throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
        }
      }
      
      // Generic error handling
      throw new Error(`Failed to fetch Confluence page: ${error instanceof Error ? error.message : String(error)}`);
    }
  }
 
  /**
   * Extract readable text content from HTML
   * Very simple implementation - can be enhanced with a proper HTML parser if needed
   */
  private extractTextFromHtml(html: string): string {
    let text = html;
    
    // 1. Remove script and style blocks first
    text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
              .replace(/<style[^>]*>.*?<\/style>/gis, '');
 
    // 2. Replace common HTML entities
    text = text.replace(/&nbsp;/g, ' ')
              .replace(/&amp;/g, '&')
              .replace(/&lt;/g, '<')
              .replace(/&gt;/g, '>');
    
    // 3. Replace line break elements with newlines
    // Add double newline after block elements for better spacing
    text = text.replace(/<br\s*\/?>/gi, '\n') 
              .replace(/<\/(p|div|h[1-6]|li|blockquote)>/gi, '\n\n'); 
    
    // 4. Strip remaining HTML tags
    text = text.replace(/<[^>]*>/g, '');
    
    // 5. Clean up excessive whitespace
    text = text.replace(/[ \t]+/g, ' ')         // Consolidate spaces/tabs
              .replace(/\n[ \t]+/g, '\n')     // Remove leading space after newline
              .replace(/[ \t]+\n/g, '\n')     // Remove trailing space before newline
              .replace(/\n{3,}/g, '\n\n') // Reduce multiple newlines to max 2
              .trim();                       // Final trim
    
    return text;
  }
}