{"version":3,"file":"mozilla_readability.cjs","names":["MappingDocumentTransformer","Document","Readability","JSDOM"],"sources":["../../src/document_transformers/mozilla_readability.ts"],"sourcesContent":["import { Readability } from \"@mozilla/readability\";\nimport { JSDOM } from \"jsdom\";\nimport type { Options } from \"mozilla-readability\";\nimport {\n  MappingDocumentTransformer,\n  Document,\n} from \"@langchain/core/documents\";\n\n/**\n * A transformer that uses the Mozilla Readability library to extract the\n * main content from a web page.\n * @example\n * ```typescript\n * const loader = new HTMLWebBaseLoader(\"https://example.com/article\");\n * const docs = await loader.load();\n *\n * const splitter = new RecursiveCharacterTextSplitter({\n *  maxCharacterCount: 5000,\n * });\n * const transformer = new MozillaReadabilityTransformer();\n *\n * // The sequence processes the loaded documents through the splitter and then the transformer.\n * const sequence = transformer.pipe(splitter);\n *\n * // Invoke the sequence to transform the documents into a more readable format.\n * const newDocuments = await sequence.invoke(docs);\n *\n * console.log(newDocuments);\n * ```\n */\nexport class MozillaReadabilityTransformer extends MappingDocumentTransformer {\n  static lc_name() {\n    return \"MozillaReadabilityTransformer\";\n  }\n\n  constructor(protected options: Options = {}) {\n    super(options);\n  }\n\n  async _transformDocument(document: Document): Promise<Document> {\n    const doc = new JSDOM(document.pageContent);\n\n    const readability = new Readability(doc.window.document, this.options);\n\n    const result = readability.parse();\n\n    return new Document({\n      pageContent: result?.textContent ?? \"\",\n      metadata: {\n        ...document.metadata,\n      },\n    });\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8BA,IAAa,gCAAb,cAAmDA,0BAAAA,2BAA2B;CAC5E,OAAO,UAAU;AACf,SAAO;;CAGT,YAAY,UAA6B,EAAE,EAAE;AAC3C,QAAM,QAAQ;AADM,OAAA,UAAA;;CAItB,MAAM,mBAAmB,UAAuC;AAO9D,SAAO,IAAIC,0BAAAA,SAAS;GAClB,aALkB,IAAIC,qBAAAA,YAFZ,IAAIC,MAAAA,MAAM,SAAS,YAAY,CAEH,OAAO,UAAU,KAAK,QAAQ,CAE3C,OAAO,EAGX,eAAe;GACpC,UAAU,EACR,GAAG,SAAS,UACb;GACF,CAAC"}