{"version":3,"file":"docx.cjs","names":["BufferLoader","Document"],"sources":["../../../src/document_loaders/fs/docx.ts"],"sourcesContent":["import { Document } from \"@langchain/core/documents\";\nimport { BufferLoader } from \"@langchain/classic/document_loaders/fs/buffer\";\n\ntype DocxLoaderOptions = {\n  type: \"docx\" | \"doc\";\n};\n/**\n * A class that extends the `BufferLoader` class. It represents a document\n * loader that loads documents from DOCX files.\n * It has a constructor that takes a `filePathOrBlob` parameter representing the path to the word\n * file or a Blob object, and an optional `options` parameter of type\n * `DocxLoaderOptions`\n */\nexport class DocxLoader extends BufferLoader {\n  protected options: DocxLoaderOptions = { type: \"docx\" };\n\n  constructor(filePathOrBlob: string | Blob, options?: DocxLoaderOptions) {\n    super(filePathOrBlob);\n    if (options) {\n      this.options = {\n        ...options,\n      };\n    }\n  }\n\n  /**\n   * A method that takes a `raw` buffer and `metadata` as parameters and\n   * returns a promise that resolves to an array of `Document` instances. It\n   * uses the `extractRawText` function from the `mammoth` module or\n   * `extract` method from the `word-extractor` module to extract\n   * the raw text content from the buffer. If the extracted text content is\n   * empty, it returns an empty array. Otherwise, it creates a new\n   * `Document` instance with the extracted text content and the provided\n   * metadata, and returns it as an array.\n   * @param raw The raw buffer from which to extract text content.\n   * @param metadata The metadata to be associated with the created `Document` instance.\n   * @returns A promise that resolves to an array of `Document` instances.\n   */\n  public async parse(\n    raw: Buffer,\n    metadata: Document[\"metadata\"]\n  ): Promise<Document[]> {\n    if (this.options.type === \"doc\") {\n      return this.parseDoc(raw, metadata);\n    }\n    return this.parseDocx(raw, metadata);\n  }\n\n  /**\n   * A private method that takes a `raw` buffer and `metadata` as parameters and\n   * returns a promise that resolves to an array of `Document` instances. It\n   * uses the `extractRawText` function from the `mammoth` module to extract\n   * the raw text content from the buffer. If the extracted text content is\n   * empty, it returns an empty array. Otherwise, it creates a new\n   * `Document` instance with the extracted text content and the provided\n   * metadata, and returns it as an array.\n   * @param raw The raw buffer from which to extract text content.\n   * @param metadata The metadata to be associated with the created `Document` instance.\n   * @returns A promise that resolves to an array of `Document` instances.\n   */\n  private async parseDocx(\n    raw: Buffer,\n    metadata: Document[\"metadata\"]\n  ): Promise<Document[]> {\n    if (this.options.type === \"doc\") {\n      return this.parseDoc(raw, metadata);\n    }\n    const { extractRawText } = await DocxLoaderImports();\n    const docx = await extractRawText({\n      buffer: raw,\n    });\n\n    if (!docx.value) return [];\n\n    return [\n      new Document({\n        pageContent: docx.value,\n        metadata,\n      }),\n    ];\n  }\n\n  /**\n   * A private method that takes a `raw` buffer and `metadata` as parameters and\n   * returns a promise that resolves to an array of `Document` instances. It\n   * uses the `extract` method from the `word-extractor` module to extract\n   * the raw text content from the buffer. If the extracted text content is\n   * empty, it returns an empty array. Otherwise, it creates a new\n   * `Document` instance with the extracted text content and the provided\n   * metadata, and returns it as an array.\n   * @param raw The raw buffer from which to extract text content.\n   * @param metadata The metadata to be associated with the created `Document` instance.\n   * @returns A promise that resolves to an array of `Document` instances.\n   */\n  private async parseDoc(\n    raw: Buffer,\n    metadata: Document[\"metadata\"]\n  ): Promise<Document[]> {\n    const WordExtractor = await DocLoaderImports();\n    const extractor = new WordExtractor();\n    const doc = await extractor.extract(raw);\n    return [\n      new Document({\n        pageContent: doc.getBody(),\n        metadata,\n      }),\n    ];\n  }\n}\n\nasync function DocxLoaderImports() {\n  try {\n    const { extractRawText } = await import(\"mammoth\");\n    return { extractRawText };\n  } catch (e) {\n    console.error(e);\n    throw new Error(\n      \"Failed to load mammoth. Please install it with eg. `npm install mammoth`.\"\n    );\n  }\n}\n\nasync function DocLoaderImports() {\n  try {\n    const WordExtractor = await import(\"word-extractor\");\n    return WordExtractor.default;\n  } catch (e) {\n    console.error(e);\n    throw new Error(\n      \"Failed to load word-extractor. Please install it with eg. `npm install word-extractor`.\"\n    );\n  }\n}\n"],"mappings":";;;;;;;;;;;;;AAaA,IAAa,aAAb,cAAgCA,8CAAAA,aAAa;CAC3C,UAAuC,EAAE,MAAM,QAAQ;CAEvD,YAAY,gBAA+B,SAA6B;AACtE,QAAM,eAAe;AACrB,MAAI,QACF,MAAK,UAAU,EACb,GAAG,SACJ;;;;;;;;;;;;;;;CAiBL,MAAa,MACX,KACA,UACqB;AACrB,MAAI,KAAK,QAAQ,SAAS,MACxB,QAAO,KAAK,SAAS,KAAK,SAAS;AAErC,SAAO,KAAK,UAAU,KAAK,SAAS;;;;;;;;;;;;;;CAetC,MAAc,UACZ,KACA,UACqB;AACrB,MAAI,KAAK,QAAQ,SAAS,MACxB,QAAO,KAAK,SAAS,KAAK,SAAS;EAErC,MAAM,EAAE,mBAAmB,MAAM,mBAAmB;EACpD,MAAM,OAAO,MAAM,eAAe,EAChC,QAAQ,KACT,CAAC;AAEF,MAAI,CAAC,KAAK,MAAO,QAAO,EAAE;AAE1B,SAAO,CACL,IAAIC,0BAAAA,SAAS;GACX,aAAa,KAAK;GAClB;GACD,CAAC,CACH;;;;;;;;;;;;;;CAeH,MAAc,SACZ,KACA,UACqB;AAIrB,SAAO,CACL,IAAIA,0BAAAA,SAAS;GACX,cAHQ,MADM,KADI,OAAM,kBAAkB,IACT,CACT,QAAQ,IAAI,EAGnB,SAAS;GAC1B;GACD,CAAC,CACH;;;AAIL,eAAe,oBAAoB;AACjC,KAAI;EACF,MAAM,EAAE,mBAAmB,MAAM,OAAO;AACxC,SAAO,EAAE,gBAAgB;UAClB,GAAG;AACV,UAAQ,MAAM,EAAE;AAChB,QAAM,IAAI,MACR,4EACD;;;AAIL,eAAe,mBAAmB;AAChC,KAAI;AAEF,UADsB,MAAM,OAAO,mBACd;UACd,GAAG;AACV,UAAQ,MAAM,EAAE;AAChB,QAAM,IAAI,MACR,0FACD"}