Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | 1x 1x 1x 1x 16x 16x 16x 16x 1x 15x 1x 14x 1x 13x 13x 10x 1x 9x 9x 9x 8x 8x 2x 2x 2x 2x 1x 1x 1x 8x 8x 4x 4x 4x 4x 4x 4x 1x 1x 1x 1x 1x 3x 3x 3x 5x 5x 5x 5x 5x 5x 5x | import axios from 'axios';
import dotenv from 'dotenv';
import * as url from 'url';
// Load environment variables
dotenv.config();
export class ConfluenceReader {
private baseUrl: string;
private apiToken: string;
private username: string;
constructor() {
this.baseUrl = process.env.CONFLUENCE_URL || '';
this.apiToken = process.env.CONFLUENCE_API_TOKEN || '';
this.username = process.env.CONFLUENCE_USERNAME || '';
if (!this.baseUrl) {
throw new Error('Confluence URL is required. Set CONFLUENCE_URL in your environment variables.');
}
if (!this.apiToken) {
throw new Error('Confluence API token is required. Set CONFLUENCE_API_TOKEN in your environment variables.');
}
if (!this.username) {
throw new Error('Confluence username is required. Set CONFLUENCE_USERNAME in your environment variables.');
}
// Remove trailing slash if present
this.baseUrl = this.baseUrl.replace(/\/$/, '');
console.log(`[ConfluenceReader] Initialized with base URL: ${this.baseUrl}`);
}
/**
* Extract page ID from a Confluence URL or return the ID if directly provided
*/
private extractPageId(pageUrlOrId: string): string {
// If it's just a number, assume it's already a page ID
if (/^\d+$/.test(pageUrlOrId)) {
return pageUrlOrId;
}
let pageId: string | null = null;
try {
// Parse the URL
const parsedUrl = new URL(pageUrlOrId);
// Extract the page ID from various Confluence URL formats
// Format: .../pages/123456/Page+Title
const pagesMatch = parsedUrl.pathname.match(/\/pages\/(\d+)/);
if (pagesMatch) pageId = pagesMatch[1];
else {
// Format: .../spaces/SPACE/pages/123456
const spacePagesMatch = parsedUrl.pathname.match(/\/spaces\/[^/]+\/pages\/(\d+)/);
Iif (spacePagesMatch) pageId = spacePagesMatch[1];
else {
// Format: .../view/123456
const viewMatch = parsedUrl.pathname.match(/\/view\/(\d+)/);
if (viewMatch) pageId = viewMatch[1];
else {
// Format: ...?pageId=123456
const pageIdParam = parsedUrl.searchParams.get('pageId');
if (pageIdParam) pageId = pageIdParam;
}
}
}
} catch (error) {
// This catch block specifically handles URL parsing errors
throw new Error(`Invalid Confluence URL format: ${pageUrlOrId}`);
}
// Check if an ID was found *after* trying to parse/extract
Iif (!pageId) {
throw new Error(`Could not extract page ID from URL: ${pageUrlOrId}`);
}
return pageId;
}
/**
* Fetch content from a Confluence page by URL or ID
*/
async fetchPageContent(pageUrlOrId: string): Promise<string> {
try {
const pageId = this.extractPageId(pageUrlOrId);
console.log(`[ConfluenceReader] Fetching page with ID: ${pageId}`);
// Construct API URL for the content endpoint
const apiUrl = `${this.baseUrl}/rest/api/content/${pageId}?expand=body.storage`;
// Create Basic Auth token from username and API token
const authToken = Buffer.from(`${this.username}:${this.apiToken}`).toString('base64');
// Make the API request
const response = await axios.get(apiUrl, {
headers: {
'Authorization': `Basic ${authToken}`,
'Content-Type': 'application/json'
}
});
// Extract the content from the response
const htmlContent = response.data.body.storage.value;
// Extract the title for reference
const title = response.data.title;
console.log(`[ConfluenceReader] Successfully fetched page: "${title}"`);
// Process HTML content to extract the text
const textContent = this.extractTextFromHtml(htmlContent);
return textContent;
} catch (error) {
console.error('[ConfluenceReader] Error fetching page content:', error);
Iif (axios.isAxiosError(error)) {
if (error.response) {
// The request was made and the server responded with a status code outside of 2xx
throw new Error(`Confluence API error: ${error.response.status} - ${error.response.data?.message || 'Unknown error'}`);
} else Iif (error.request) {
// The request was made but no response was received
throw new Error('No response received from Confluence. Please check your network connection and Confluence URL.');
}
}
// Generic error handling
throw new Error(`Failed to fetch Confluence page: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Extract readable text content from HTML
* Very simple implementation - can be enhanced with a proper HTML parser if needed
*/
private extractTextFromHtml(html: string): string {
let text = html;
// 1. Remove script and style blocks first
text = text.replace(/<script[^>]*>.*?<\/script>/gis, '')
.replace(/<style[^>]*>.*?<\/style>/gis, '');
// 2. Replace common HTML entities
text = text.replace(/ /g, ' ')
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>');
// 3. Replace line break elements with newlines
// Add double newline after block elements for better spacing
text = text.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/(p|div|h[1-6]|li|blockquote)>/gi, '\n\n');
// 4. Strip remaining HTML tags
text = text.replace(/<[^>]*>/g, '');
// 5. Clean up excessive whitespace
text = text.replace(/[ \t]+/g, ' ') // Consolidate spaces/tabs
.replace(/\n[ \t]+/g, '\n') // Remove leading space after newline
.replace(/[ \t]+\n/g, '\n') // Remove trailing space before newline
.replace(/\n{3,}/g, '\n\n') // Reduce multiple newlines to max 2
.trim(); // Final trim
return text;
}
} |