{"version":3,"file":"epub.cjs","names":["BaseDocumentLoader","Document"],"sources":["../../../src/document_loaders/fs/epub.ts"],"sourcesContent":["import type { EPub } from \"epub2\";\nimport { Document } from \"@langchain/core/documents\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * A class that extends the `BaseDocumentLoader` class. It represents a\n * document loader that loads documents from EPUB files.\n */\nexport class EPubLoader extends BaseDocumentLoader {\n  private splitChapters: boolean;\n\n  constructor(\n    public filePath: string,\n    { splitChapters = true } = {}\n  ) {\n    super();\n    this.splitChapters = splitChapters;\n  }\n\n  /**\n   * A protected method that takes an EPUB object as a parameter and returns\n   * a promise that resolves to an array of objects representing the content\n   * and metadata of each chapter.\n   * @param epub The EPUB object to parse.\n   * @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.\n   */\n  protected async parse(\n    epub: EPub\n  ): Promise<{ pageContent: string; metadata?: object }[]> {\n    const { htmlToText } = await HtmlToTextImport();\n    const chapters = await Promise.all(\n      epub.flow.map(async (chapter) => {\n        if (!chapter.id) return null as never;\n        const html: string = await epub.getChapterRawAsync(chapter.id);\n        if (!html) return null as never;\n        return {\n          html,\n          title: chapter.title,\n        };\n      })\n    );\n    return chapters.filter(Boolean).map((chapter) => ({\n      pageContent: htmlToText(chapter.html),\n      metadata: {\n        ...(chapter.title && { chapter: chapter.title }),\n      },\n    }));\n  }\n\n  /**\n   * A method that loads the EPUB file and returns a promise that resolves\n   * to an array of `Document` instances.\n   * @returns A promise that resolves to an array of `Document` instances.\n   */\n  public async load(): Promise<Document[]> {\n    const { EPub } = await EpubImport();\n    const epub = await EPub.createAsync(this.filePath);\n\n    const parsed = await this.parse(epub);\n    const metadata = { source: this.filePath };\n\n    if (parsed.length === 0) return [];\n\n    return this.splitChapters\n      ? parsed.map(\n          (chapter) =>\n            new Document({\n              pageContent: chapter.pageContent,\n              metadata: {\n                ...metadata,\n                ...chapter.metadata,\n              },\n            })\n        )\n      : [\n          new Document({\n            pageContent: parsed\n              .map((chapter) => chapter.pageContent)\n              .join(\"\\n\\n\"),\n            metadata,\n          }),\n        ];\n  }\n}\n\nasync function EpubImport() {\n  const { EPub } = await import(\"epub2\").catch(() => {\n    throw new Error(\n      \"Failed to load epub2. Please install it with eg. `npm install epub2`.\"\n    );\n  });\n  return { EPub };\n}\n\nasync function HtmlToTextImport() {\n  const { htmlToText } = await import(\"html-to-text\").catch(() => {\n    throw new Error(\n      \"Failed to load html-to-text. Please install it with eg. `npm install html-to-text`.\"\n    );\n  });\n  return { htmlToText };\n}\n"],"mappings":";;;;;;;;;;AAQA,IAAa,aAAb,cAAgCA,sCAAAA,mBAAmB;CACjD;CAEA,YACE,UACA,EAAE,gBAAgB,SAAS,EAAE,EAC7B;AACA,SAAO;AAHA,OAAA,WAAA;AAIP,OAAK,gBAAgB;;;;;;;;;CAUvB,MAAgB,MACd,MACuD;EACvD,MAAM,EAAE,eAAe,MAAM,kBAAkB;AAY/C,UAXiB,MAAM,QAAQ,IAC7B,KAAK,KAAK,IAAI,OAAO,YAAY;AAC/B,OAAI,CAAC,QAAQ,GAAI,QAAO;GACxB,MAAM,OAAe,MAAM,KAAK,mBAAmB,QAAQ,GAAG;AAC9D,OAAI,CAAC,KAAM,QAAO;AAClB,UAAO;IACL;IACA,OAAO,QAAQ;IAChB;IACD,CACH,EACe,OAAO,QAAQ,CAAC,KAAK,aAAa;GAChD,aAAa,WAAW,QAAQ,KAAK;GACrC,UAAU,EACR,GAAI,QAAQ,SAAS,EAAE,SAAS,QAAQ,OAAO,EAChD;GACF,EAAE;;;;;;;CAQL,MAAa,OAA4B;EACvC,MAAM,EAAE,SAAS,MAAM,YAAY;EACnC,MAAM,OAAO,MAAM,KAAK,YAAY,KAAK,SAAS;EAElD,MAAM,SAAS,MAAM,KAAK,MAAM,KAAK;EACrC,MAAM,WAAW,EAAE,QAAQ,KAAK,UAAU;AAE1C,MAAI,OAAO,WAAW,EAAG,QAAO,EAAE;AAElC,SAAO,KAAK,gBACR,OAAO,KACJ,YACC,IAAIC,0BAAAA,SAAS;GACX,aAAa,QAAQ;GACrB,UAAU;IACR,GAAG;IACH,GAAG,QAAQ;IACZ;GACF,CAAC,CACL,GACD,CACE,IAAIA,0BAAAA,SAAS;GACX,aAAa,OACV,KAAK,YAAY,QAAQ,YAAY,CACrC,KAAK,OAAO;GACf;GACD,CAAC,CACH;;;AAIT,eAAe,aAAa;CAC1B,MAAM,EAAE,SAAS,MAAM,OAAO,SAAS,YAAY;AACjD,QAAM,IAAI,MACR,wEACD;GACD;AACF,QAAO,EAAE,MAAM;;AAGjB,eAAe,mBAAmB;CAChC,MAAM,EAAE,eAAe,MAAM,OAAO,gBAAgB,YAAY;AAC9D,QAAM,IAAI,MACR,sFACD;GACD;AACF,QAAO,EAAE,YAAY"}