{"version":3,"file":"html_to_text.cjs","names":["MappingDocumentTransformer","Document"],"sources":["../../src/document_transformers/html_to_text.ts"],"sourcesContent":["import { htmlToText, type HtmlToTextOptions } from \"html-to-text\";\nimport {\n  MappingDocumentTransformer,\n  Document,\n} from \"@langchain/core/documents\";\n\n/**\n * A transformer that converts HTML content to plain text.\n * @example\n * ```typescript\n * const loader = new CheerioWebBaseLoader(\"https://example.com/some-page\");\n * const docs = await loader.load();\n *\n * const splitter = new RecursiveCharacterTextSplitter({\n *  maxCharacterCount: 1000,\n * });\n * const transformer = new HtmlToTextTransformer();\n *\n * // The sequence of text splitting followed by HTML to text transformation\n * const sequence = splitter.pipe(transformer);\n *\n * // Processing the loaded documents through the sequence\n * const newDocuments = await sequence.invoke(docs);\n *\n * console.log(newDocuments);\n * ```\n */\nexport class HtmlToTextTransformer extends MappingDocumentTransformer {\n  static lc_name() {\n    return \"HtmlToTextTransformer\";\n  }\n\n  constructor(protected options: HtmlToTextOptions = {}) {\n    super(options);\n  }\n\n  async _transformDocument(document: Document): Promise<Document> {\n    const extractedContent = htmlToText(document.pageContent, this.options);\n    return new Document({\n      pageContent: extractedContent,\n      metadata: { ...document.metadata },\n    });\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AA2BA,IAAa,wBAAb,cAA2CA,0BAAAA,2BAA2B;CACpE,OAAO,UAAU;AACf,SAAO;;CAGT,YAAY,UAAuC,EAAE,EAAE;AACrD,QAAM,QAAQ;AADM,OAAA,UAAA;;CAItB,MAAM,mBAAmB,UAAuC;AAE9D,SAAO,IAAIC,0BAAAA,SAAS;GAClB,cAAA,GAAA,aAAA,YAFkC,SAAS,aAAa,KAAK,QAAQ;GAGrE,UAAU,EAAE,GAAG,SAAS,UAAU;GACnC,CAAC"}