Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | 1x 1x 1x 1x 14x 14x 14x 14x 1x 13x 1x 12x 1x 11x 11x 9x 1x 8x 8x 8x 7x 7x 3x 3x 3x 3x 2x 2x 1x 7x 1x 6x 2x 2x 2x 2x 2x 2x 1x 1x 1x 1x 1x 1x 1x 1x 1x 6x 4x 4x 4x 4x 4x 4x 4x 4x 4x 4x 4x 4x 4x | import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
// Load environment variables
dotenv.config();
export class ConfluenceReader {
private baseUrl: string;
private apiToken: string;
private username: string;
constructor() {
this.baseUrl = process.env.CONFLUENCE_URL || '';
this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
this.username = process.env.CONFLUENCE_USERNAME || '';
if (!this.baseUrl) {
throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
}
if (!this.apiToken) {
throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
}
if (!this.username) {
throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
}
// Remove trailing slash if present
this.baseUrl = this.baseUrl.replace(/\/$/, '');
console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
}
/**
* Extract page ID from a Confluence URL or return the ID if directly provided
*/
private extractPageId(pageUrlOrId: string): string {
// If it's just a number, assume it's already a page ID
if (/^\d+$/.test(pageUrlOrId)) {
return pageUrlOrId;
}
let pageId: string | null = null;
try {
// Parse the URL
const parsedUrl = new URL(pageUrlOrId);
// Order matters: check more specific paths first
// Format: .../spaces/SPACE/pages/123456
const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
if (spacePagesMatch) pageId = spacePagesMatch[1];
else {
// Format: .../pages/123456/Page+Title or /pages/123456
const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
Iif (pagesMatch) pageId = pagesMatch[1];
else {
// Format: .../view/123456
const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
if (viewMatch) pageId = viewMatch[1];
else {
// Format: ...?pageId=123456
const pageIdParam = parsedUrl.searchParams.get('pageId');
if (pageIdParam) pageId = pageIdParam;
}
}
}
} catch (error) {
// This catch block specifically handles URL parsing errors
throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
}
// Check if an ID was found *after* trying to parse/extract
if (!pageId) {
// Use the original input in the error message for clarity
throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
}
return pageId;
}
/**
* Fetch content from a Confluence page by URL or ID
*/
async fetchPageContent(pageUrlOrId: string): Promise<string> {
try {
const pageId = this.extractPageId(pageUrlOrId);
console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
// Construct API URL for the content endpoint
const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
// Create Basic Auth token from username and API token
const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
// Make the API request
const response = await axios.get(apiUrl, {
headers: {
'Authorization': `Basic ${authToken}`,
'Content-Type': 'application/json'
}
});
// Extract the content from the response
const htmlContent = response.data.body.storage.value;
// Extract the title for reference
const title = response.data.title;
console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
// Process HTML content to extract the text
const textContent = this.extractTextFromHtml(htmlContent);
return textContent;
} catch (error) {
console.error('[ConfluenceReader] Error fetching page content:', error);
// Check for Axios specific errors first
Iif (axios.isAxiosError(error)) {
if (error.response) {
// Server responded with error status
// Try to extract a meaningful message
let message = 'Unknown server error';
Iif (error.response.data) {
if (typeof error.response.data === 'string') {
message = error.response.data;
} else if (error.response.data.message && typeof error.response.data.message === 'string') {
message = error.response.data.message;
} else {
try {
message = JSON.stringify(error.response.data);
} catch (jsonError) {
// fallback if stringify fails
}
}
}
throw new Error(`Confluence API error: ${error.response.status} - ${error.response.statusText || 'Status Text Unavailable'}. Message: ${message}`);
} else if (error.request) {
// Request made, no response received (network error)
throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
} else {
// Error setting up the request (neither response nor request is present)
throw new Error(`Axios setup error: ${error.message}`);
}
} else if (error instanceof Error) {
// Generic error handling (non-Axios errors or unexpected issues)
// Now this acts as a fallback if it wasn't an AxiosError
throw new Error(`Failed to fetch Confluence page: ${error.message}`);
} else E{
// Handle cases where the thrown object is not an Error instance
throw new Error(`Failed to fetch Confluence page: ${String(error)}`);
}
}
}
/**
* Extract readable text content from HTML
*/
private extractTextFromHtml(html: string): string {
if (!html) return '';
let text = html;
// 1. Remove script and style blocks first
text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
.replace(/<style[^>]*>.*?<\/style>/gis, '');
// 2. Replace line break elements with newlines
text = text.replace(/<br\s*\/?>/gi, '\n');
// 3. Replace block elements with newlines (add space before to prevent word joining)
text = text.replace(/<\/(p|div|h[1-6]|li|blockquote|ul|ol|table|tr|td|th)>/gi, ' \n\n');
// 4. Strip remaining HTML tags
text = text.replace(/<[^>]*>/g, ''); // Remove all tags
// 5. Decode HTML entities
text = text.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, "'");
// Add more entities as needed
// 6. Clean up excessive whitespace and newlines
text = text.replace(/\r\n/g, '\n'); // Normalize line endings
text = text.replace(/[ \t]+/g, ' '); // Consolidate spaces/tabs
text = text.replace(/ \n/g, '\n'); // Remove space before newline
text = text.replace(/\n /g, '\n'); // Remove space after newline
text = text.replace(/\n{3,}/g, '\n\n'); // Reduce multiple newlines to max 2
text = text.trim(); // Final trim
return text;
}
} |