{"version":3,"file":"confluence.cjs","names":["BaseDocumentLoader","Document"],"sources":["../../../src/document_loaders/web/confluence.ts"],"sourcesContent":["import { htmlToText } from \"html-to-text\";\nimport { Document } from \"@langchain/core/documents\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * Interface representing the parameters for configuring the\n * ConfluencePagesLoader.\n */\nexport interface ConfluencePagesLoaderParams {\n  baseUrl: string;\n  spaceKey: string;\n  username?: string;\n  accessToken?: string;\n  personalAccessToken?: string;\n  limit?: number;\n  expand?: string;\n  maxRetries?: number;\n}\n\n/**\n * Interface representing a Confluence page.\n */\nexport interface ConfluencePage {\n  id: string;\n  title: string;\n  type: string;\n  body: {\n    storage: {\n      value: string;\n    };\n  };\n  status: string;\n  version?: {\n    number: number;\n    when: string;\n    by: {\n      displayName: string;\n    };\n  };\n}\n\n/**\n * Interface representing the response from the Confluence API.\n */\nexport interface ConfluenceAPIResponse {\n  size: number;\n  results: ConfluencePage[];\n}\n\n/**\n * Class representing a document loader for loading pages from Confluence.\n * @example\n * ```typescript\n * const loader = new ConfluencePagesLoader({\n *   baseUrl: \"https:\n *   spaceKey: \"~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE\",\n *   username: \"your-username\",\n *   accessToken: \"your-access-token\",\n * });\n * const documents = await loader.load();\n * console.log(documents);\n * ```\n */\nexport class ConfluencePagesLoader extends BaseDocumentLoader {\n  public readonly baseUrl: string;\n\n  public readonly spaceKey: string;\n\n  public readonly username?: string;\n\n  public readonly accessToken?: string;\n\n  public readonly limit: number;\n\n  public readonly maxRetries: number;\n\n  /**\n   * expand parameter for confluence rest api\n   * description can be found at https://developer.atlassian.com/server/confluence/expansions-in-the-rest-api/\n   */\n  public readonly expand?: string;\n\n  public readonly personalAccessToken?: string;\n\n  constructor({\n    baseUrl,\n    spaceKey,\n    username,\n    accessToken,\n    limit = 25,\n    expand = \"body.storage,version\",\n    personalAccessToken,\n    maxRetries = 5,\n  }: ConfluencePagesLoaderParams) {\n    super();\n    this.baseUrl = baseUrl;\n    this.spaceKey = spaceKey;\n    this.username = username;\n    this.accessToken = accessToken;\n    this.limit = limit;\n    this.expand = expand;\n    this.personalAccessToken = personalAccessToken;\n    this.maxRetries = maxRetries;\n  }\n\n  /**\n   * Returns the authorization header for the request.\n   * @returns The authorization header as a string, or undefined if no credentials were provided.\n   */\n  private get authorizationHeader(): string | undefined {\n    if (this.personalAccessToken) {\n      return `Bearer ${this.personalAccessToken}`;\n    } else if (this.username && this.accessToken) {\n      const authToken = Buffer.from(\n        `${this.username}:${this.accessToken}`\n      ).toString(\"base64\");\n      return `Basic ${authToken}`;\n    }\n\n    return undefined;\n  }\n\n  /**\n   * Fetches all the pages in the specified space and converts each page to\n   * a Document instance.\n   * @param options the extra options of the load function\n   * @param options.limit The limit parameter to overwrite the size to fetch pages.\n   * @param options.start The start parameter to set inital offset to fetch pages.\n   * @returns Promise resolving to an array of Document instances.\n   */\n  public async load(options?: {\n    start?: number;\n    limit?: number;\n  }): Promise<Document[]> {\n    try {\n      const pages = await this.fetchAllPagesInSpace(\n        options?.start,\n        options?.limit\n      );\n      return pages.map((page) => this.createDocumentFromPage(page));\n    } catch (error) {\n      console.error(\"Error:\", error);\n      return [];\n    }\n  }\n\n  /**\n   * Fetches data from the Confluence API using the provided URL.\n   * @param url The URL to fetch data from.\n   * @returns Promise resolving to the JSON response from the API.\n   */\n  protected async fetchConfluenceData(\n    url: string\n  ): Promise<ConfluenceAPIResponse> {\n    let retryCounter = 0;\n    while (true) {\n      retryCounter += 1;\n      try {\n        const initialHeaders: HeadersInit = {\n          \"Content-Type\": \"application/json\",\n          Accept: \"application/json\",\n        };\n\n        const authHeader = this.authorizationHeader;\n        if (authHeader) {\n          initialHeaders.Authorization = authHeader;\n        }\n\n        const response = await fetch(url, {\n          headers: initialHeaders,\n        });\n\n        if (!response.ok) {\n          throw new Error(\n            `Failed to fetch ${url} from Confluence: ${response.status}. Retrying...`\n          );\n        }\n\n        return await response.json();\n      } catch (error) {\n        if (retryCounter >= this.maxRetries)\n          throw new Error(\n            `Failed to fetch ${url} from Confluence (retry: ${retryCounter}): ${error}`\n          );\n      }\n    }\n  }\n\n  /**\n   * Recursively fetches all the pages in the specified space.\n   * @param start The start parameter to paginate through the results.\n   * @returns Promise resolving to an array of ConfluencePage objects.\n   */\n  private async fetchAllPagesInSpace(\n    start = 0,\n    limit = this.limit\n  ): Promise<ConfluencePage[]> {\n    const url = `${this.baseUrl}/rest/api/content?spaceKey=${this.spaceKey}&limit=${limit}&start=${start}&expand=${this.expand}`;\n    const data = await this.fetchConfluenceData(url);\n\n    if (data.size === 0) {\n      return [];\n    }\n\n    const nextPageStart = start + data.size;\n    const nextPageResults = await this.fetchAllPagesInSpace(\n      nextPageStart,\n      limit\n    );\n\n    return data.results.concat(nextPageResults);\n  }\n\n  /**\n   * Creates a Document instance from a ConfluencePage object.\n   * @param page The ConfluencePage object to convert.\n   * @returns A Document instance.\n   */\n  private createDocumentFromPage(page: ConfluencePage): Document {\n    const htmlContent = page.body.storage.value;\n\n    // Handle both self-closing and regular macros for attachments and view-file\n    const htmlWithoutOtherMacros = htmlContent.replace(\n      /<ac:structured-macro\\s+ac:name=\"(attachments|view-file)\"[^>]*(?:\\/?>|>.*?<\\/ac:structured-macro>)/gs,\n      \"[ATTACHMENT]\"\n    );\n\n    // Extract and preserve code blocks with unique placeholders\n    const codeBlocks: { language: string; code: string }[] = [];\n    const htmlWithPlaceholders = htmlWithoutOtherMacros.replace(\n      /<ac:structured-macro.*?<ac:parameter ac:name=\"language\">(.*?)<\\/ac:parameter>.*?<ac:plain-text-body><!\\[CDATA\\[([\\s\\S]*?)\\]\\]><\\/ac:plain-text-body><\\/ac:structured-macro>/g,\n      (_, language, code) => {\n        const placeholder = `CODE_BLOCK_${codeBlocks.length}`;\n        codeBlocks.push({ language, code: code.trim() });\n        return `\\n${placeholder}\\n`;\n      }\n    );\n\n    // Convert the HTML content to plain text\n    let plainTextContent = htmlToText(htmlWithPlaceholders, {\n      wordwrap: false,\n      preserveNewlines: true,\n    });\n\n    // Reinsert code blocks with proper markdown formatting\n    codeBlocks.forEach(({ language, code }, index) => {\n      const placeholder = `CODE_BLOCK_${index}`;\n      plainTextContent = plainTextContent.replace(\n        placeholder,\n        `\\`\\`\\`${language}\\n${code}\\n\\`\\`\\``\n      );\n    });\n\n    // Remove empty lines\n    const textWithoutEmptyLines = plainTextContent.replace(/^\\s*[\\r\\n]/gm, \"\");\n\n    // Rest of the method remains the same...\n    return new Document({\n      pageContent: textWithoutEmptyLines,\n      metadata: {\n        id: page.id,\n        status: page.status,\n        title: page.title,\n        type: page.type,\n        url: `${this.baseUrl}/spaces/${this.spaceKey}/pages/${page.id}`,\n        version: page.version?.number,\n        updated_by: page.version?.by?.displayName,\n        updated_at: page.version?.when,\n      },\n    });\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AA+DA,IAAa,wBAAb,cAA2CA,sCAAAA,mBAAmB;CAC5D;CAEA;CAEA;CAEA;CAEA;CAEA;;;;;CAMA;CAEA;CAEA,YAAY,EACV,SACA,UACA,UACA,aACA,QAAQ,IACR,SAAS,wBACT,qBACA,aAAa,KACiB;AAC9B,SAAO;AACP,OAAK,UAAU;AACf,OAAK,WAAW;AAChB,OAAK,WAAW;AAChB,OAAK,cAAc;AACnB,OAAK,QAAQ;AACb,OAAK,SAAS;AACd,OAAK,sBAAsB;AAC3B,OAAK,aAAa;;;;;;CAOpB,IAAY,sBAA0C;AACpD,MAAI,KAAK,oBACP,QAAO,UAAU,KAAK;WACb,KAAK,YAAY,KAAK,YAI/B,QAAO,SAHW,OAAO,KACvB,GAAG,KAAK,SAAS,GAAG,KAAK,cAC1B,CAAC,SAAS,SAAS;;;;;;;;;;CAexB,MAAa,KAAK,SAGM;AACtB,MAAI;AAKF,WAJc,MAAM,KAAK,qBACvB,SAAS,OACT,SAAS,MACV,EACY,KAAK,SAAS,KAAK,uBAAuB,KAAK,CAAC;WACtD,OAAO;AACd,WAAQ,MAAM,UAAU,MAAM;AAC9B,UAAO,EAAE;;;;;;;;CASb,MAAgB,oBACd,KACgC;EAChC,IAAI,eAAe;AACnB,SAAO,MAAM;AACX,mBAAgB;AAChB,OAAI;IACF,MAAM,iBAA8B;KAClC,gBAAgB;KAChB,QAAQ;KACT;IAED,MAAM,aAAa,KAAK;AACxB,QAAI,WACF,gBAAe,gBAAgB;IAGjC,MAAM,WAAW,MAAM,MAAM,KAAK,EAChC,SAAS,gBACV,CAAC;AAEF,QAAI,CAAC,SAAS,GACZ,OAAM,IAAI,MACR,mBAAmB,IAAI,oBAAoB,SAAS,OAAO,eAC5D;AAGH,WAAO,MAAM,SAAS,MAAM;YACrB,OAAO;AACd,QAAI,gBAAgB,KAAK,WACvB,OAAM,IAAI,MACR,mBAAmB,IAAI,2BAA2B,aAAa,KAAK,QACrE;;;;;;;;;CAUT,MAAc,qBACZ,QAAQ,GACR,QAAQ,KAAK,OACc;EAC3B,MAAM,MAAM,GAAG,KAAK,QAAQ,6BAA6B,KAAK,SAAS,SAAS,MAAM,SAAS,MAAM,UAAU,KAAK;EACpH,MAAM,OAAO,MAAM,KAAK,oBAAoB,IAAI;AAEhD,MAAI,KAAK,SAAS,EAChB,QAAO,EAAE;EAGX,MAAM,gBAAgB,QAAQ,KAAK;EACnC,MAAM,kBAAkB,MAAM,KAAK,qBACjC,eACA,MACD;AAED,SAAO,KAAK,QAAQ,OAAO,gBAAgB;;;;;;;CAQ7C,uBAA+B,MAAgC;EAI7D,MAAM,yBAHc,KAAK,KAAK,QAAQ,MAGK,QACzC,uGACA,eACD;EAGD,MAAM,aAAmD,EAAE;EAW3D,IAAI,oBAAA,GAAA,aAAA,YAVyB,uBAAuB,QAClD,iLACC,GAAG,UAAU,SAAS;GACrB,MAAM,cAAc,cAAc,WAAW;AAC7C,cAAW,KAAK;IAAE;IAAU,MAAM,KAAK,MAAM;IAAE,CAAC;AAChD,UAAO,KAAK,YAAY;IAE3B,EAGuD;GACtD,UAAU;GACV,kBAAkB;GACnB,CAAC;AAGF,aAAW,SAAS,EAAE,UAAU,QAAQ,UAAU;GAChD,MAAM,cAAc,cAAc;AAClC,sBAAmB,iBAAiB,QAClC,aACA,SAAS,SAAS,IAAI,KAAK,UAC5B;IACD;AAMF,SAAO,IAAIC,0BAAAA,SAAS;GAClB,aAJ4B,iBAAiB,QAAQ,gBAAgB,GAAG;GAKxE,UAAU;IACR,IAAI,KAAK;IACT,QAAQ,KAAK;IACb,OAAO,KAAK;IACZ,MAAM,KAAK;IACX,KAAK,GAAG,KAAK,QAAQ,UAAU,KAAK,SAAS,SAAS,KAAK;IAC3D,SAAS,KAAK,SAAS;IACvB,YAAY,KAAK,SAAS,IAAI;IAC9B,YAAY,KAAK,SAAS;IAC3B;GACF,CAAC"}