All files / modules confluenceReader.ts

81.25% Statements 65/80
55.55% Branches 20/36
100% Functions 4/4
81.33% Lines 61/75
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197 1x
1x
 
 
 
1x
 
1x
 
 
 
 
 
14x
14x
14x
 
14x
1x
 
 
13x
1x
 
 
12x
1x
 
 
 
11x
 
11x
 
 
 
 
 
 
 
9x
1x
 
 
8x
8x
 
8x
 
 
 
7x
7x
 
 
3x
3x
 
 
3x
3x
 
 
2x
2x
 
 
 
 
 
1x
 
 
 
7x
 
1x
 
 
6x
 
 
 
 
 
 
2x
2x
2x
 
 
2x
 
 
2x
 
 
2x
 
 
 
 
 
 
 
1x
 
 
1x
1x
 
 
1x
 
1x
 
1x
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
 
 
1x
 
 
 
 
 
 
 
 
 
 
 
6x
 
4x
 
 
4x
 
 
 
4x
 
 
4x
 
 
4x
 
 
4x
 
 
 
 
 
 
 
 
4x
4x
4x
4x
4x
4x
 
4x
 
  import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
 
// Load environment variables
dotenv.config();
 
export class ConfluenceReader {
  private baseUrl: string;
  private apiToken: string;
  private username: string;
 
  constructor() {
    this.baseUrl = process.env.CONFLUENCE_URL || '';
    this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
    this.username = process.env.CONFLUENCE_USERNAME || '';
    
    if (!this.baseUrl) {
      throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
    }
    
    if (!this.apiToken) {
      throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
    }
    
    if (!this.username) {
      throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
    }
    
    // Remove trailing slash if present
    this.baseUrl = this.baseUrl.replace(/\/$/, '');
    
    console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
  }
 
  /**
   * Extract page ID from a Confluence URL or return the ID if directly provided
   */
  private extractPageId(pageUrlOrId: string): string {
    // If it's just a number, assume it's already a page ID
    if (/^\d+$/.test(pageUrlOrId)) {
      return pageUrlOrId;
    }
    
    let pageId: string | null = null;
    try {
      // Parse the URL
      const parsedUrl = new URL(pageUrlOrId);
      
      // Order matters: check more specific paths first
      // Format: .../spaces/SPACE/pages/123456
      const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
      if (spacePagesMatch) pageId = spacePagesMatch[1];
      else {
          // Format: .../pages/123456/Page+Title or /pages/123456
          const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
          Iif (pagesMatch) pageId = pagesMatch[1];
          else {
              // Format: .../view/123456
              const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
              if (viewMatch) pageId = viewMatch[1];
              else {
                  // Format: ...?pageId=123456
                  const pageIdParam = parsedUrl.searchParams.get('pageId');
                  if (pageIdParam) pageId = pageIdParam;
              }
          }
      }
    } catch (error) {
      // This catch block specifically handles URL parsing errors
      throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
    }
    
    // Check if an ID was found *after* trying to parse/extract
    if (!pageId) {
      // Use the original input in the error message for clarity
      throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
    }
 
    return pageId;
  }
 
  /**
   * Fetch content from a Confluence page by URL or ID
   */
  async fetchPageContent(pageUrlOrId: string): Promise<string> {
    try {
      const pageId = this.extractPageId(pageUrlOrId);
      console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
      
      // Construct API URL for the content endpoint
      const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
      
      // Create Basic Auth token from username and API token
      const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
      
      // Make the API request
      const response = await axios.get(apiUrl, {
        headers: {
          'Authorization': `Basic ${authToken}`,
          'Content-Type': 'application/json'
        }
      });
      
      // Extract the content from the response
      const htmlContent = response.data.body.storage.value;
      
      // Extract the title for reference
      const title = response.data.title;
      console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
      
      // Process HTML content to extract the text
      const textContent = this.extractTextFromHtml(htmlContent);
      
      return textContent;
    } catch (error) {
      console.error('[ConfluenceReader] Error fetching page content:', error);
 
      // Check for Axios specific errors first
      Iif (axios.isAxiosError(error)) {
        if (error.response) {
          // Server responded with error status
          // Try to extract a meaningful message
          let message = 'Unknown server error';
          Iif (error.response.data) {
              if (typeof error.response.data === 'string') {
                  message = error.response.data;
              } else if (error.response.data.message && typeof error.response.data.message === 'string') {
                  message = error.response.data.message;
              } else {
                  try {
                      message = JSON.stringify(error.response.data);
                  } catch (jsonError) {
                     // fallback if stringify fails
                  }
              }
          }
          throw new Error(`Confluence API error: ${error.response.status} - ${error.response.statusText || 'Status Text Unavailable'}. Message: ${message}`);
        } else if (error.request) {
          // Request made, no response received (network error)
          throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
        } else {
           // Error setting up the request (neither response nor request is present)
           throw new Error(`Axios setup error: ${error.message}`);
        }
      } else if (error instanceof Error) {
          // Generic error handling (non-Axios errors or unexpected issues)
          // Now this acts as a fallback if it wasn't an AxiosError
          throw new Error(`Failed to fetch Confluence page: ${error.message}`);
      } else E{
          // Handle cases where the thrown object is not an Error instance
          throw new Error(`Failed to fetch Confluence page: ${String(error)}`);
      }
    }
  }
 
  /**
   * Extract readable text content from HTML
   */
  private extractTextFromHtml(html: string): string {
    if (!html) return '';
 
    let text = html;
    
    // 1. Remove script and style blocks first
    text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
              .replace(/<style[^>]*>.*?<\/style>/gis, '');
 
    // 2. Replace line break elements with newlines
    text = text.replace(/<br\s*\/?>/gi, '\n'); 
 
    // 3. Replace block elements with newlines (add space before to prevent word joining)
    text = text.replace(/<\/(p|div|h[1-6]|li|blockquote|ul|ol|table|tr|td|th)>/gi, ' \n\n'); 
    
    // 4. Strip remaining HTML tags
    text = text.replace(/<[^>]*>/g, ''); // Remove all tags
 
    // 5. Decode HTML entities
    text = text.replace(/&nbsp;/g, ' ')
              .replace(/&amp;/g, '&')
              .replace(/&lt;/g, '<')
              .replace(/&gt;/g, '>')
              .replace(/&quot;/g, '"')
              .replace(/&#39;/g, "'");
    // Add more entities as needed
 
    // 6. Clean up excessive whitespace and newlines
    text = text.replace(/\r\n/g, '\n'); // Normalize line endings
    text = text.replace(/[ \t]+/g, ' ');      // Consolidate spaces/tabs
    text = text.replace(/ \n/g, '\n');       // Remove space before newline
    text = text.replace(/\n /g, '\n');       // Remove space after newline
    text = text.replace(/\n{3,}/g, '\n\n'); // Reduce multiple newlines to max 2
    text = text.trim();                    // Final trim
    
    return text;
  }
}