{"version":3,"file":"hn.cjs","names":["CheerioWebBaseLoader","Document"],"sources":["../../../src/document_loaders/web/hn.ts"],"sourcesContent":["import type { CheerioAPI } from \"cheerio\";\nimport { Document } from \"@langchain/core/documents\";\nimport { CheerioWebBaseLoader } from \"./cheerio.js\";\n\n/**\n * A class that extends the CheerioWebBaseLoader class. It represents a\n * loader for loading web pages from the Hacker News website.\n */\nexport class HNLoader extends CheerioWebBaseLoader {\n  constructor(public webPath: string) {\n    super(webPath);\n  }\n\n  /**\n   * An asynchronous method that loads the web page. If the webPath includes\n   * \"item\", it calls the loadComments() method to load the comments from\n   * the web page. Otherwise, it calls the loadResults() method to load the\n   * results from the web page.\n   * @returns A Promise that resolves to an array of Document instances.\n   */\n  public async load(): Promise<Document[]> {\n    const $ = await this.scrape();\n    if (this.webPath.includes(\"item\")) {\n      return this.loadComments($);\n    }\n    return this.loadResults($);\n  }\n\n  /**\n   * A private method that loads the comments from the web page. It selects\n   * the elements with the class \"athing comtr\" using the $ function\n   * provided by Cheerio. It also extracts the title of the web page from\n   * the element with the id \"pagespace\". It creates Document instances for\n   * each comment, with the comment text as the page content and the source\n   * and title as metadata.\n   * @param $ A CheerioAPI instance.\n   * @returns An array of Document instances.\n   */\n  private loadComments($: CheerioAPI): Document[] {\n    const comments = $(\"tr[class='athing comtr']\");\n    const title = $(\"tr[id='pagespace']\").attr(\"title\");\n    const documents: Document[] = [];\n    comments.each((_index, comment) => {\n      const text = $(comment).text().trim();\n      const metadata = { source: this.webPath, title };\n      documents.push(new Document({ pageContent: text, metadata }));\n    });\n    return documents;\n  }\n\n  /**\n   * A private method that loads the results from the web page. It selects\n   * the elements with the class \"athing\" using the $ function provided by\n   * Cheerio. It extracts the ranking, link, title, and other metadata from\n   * each result item. It creates Document instances for each result item,\n   * with the title as the page content and the source, title, link, and\n   * ranking as metadata.\n   * @param $ A CheerioAPI instance.\n   * @returns An array of Document instances.\n   */\n  private loadResults($: CheerioAPI): Document[] {\n    const items = $(\"tr[class='athing']\");\n    const documents: Document[] = [];\n    items.each((_index, item) => {\n      const ranking = $(item).find(\"span[class='rank']\").text();\n      const link = $(item).find(\"span[class='titleline'] a\").attr(\"href\");\n      const title = $(item).find(\"span[class='titleline']\").text().trim();\n      const metadata = {\n        source: this.webPath,\n        title,\n        link,\n        ranking,\n      };\n      documents.push(new Document({ pageContent: title, metadata }));\n    });\n    return documents;\n  }\n}\n"],"mappings":";;;;;;;;;;AAQA,IAAa,WAAb,cAA8BA,qCAAAA,qBAAqB;CACjD,YAAY,SAAwB;AAClC,QAAM,QAAQ;AADG,OAAA,UAAA;;;;;;;;;CAWnB,MAAa,OAA4B;EACvC,MAAM,IAAI,MAAM,KAAK,QAAQ;AAC7B,MAAI,KAAK,QAAQ,SAAS,OAAO,CAC/B,QAAO,KAAK,aAAa,EAAE;AAE7B,SAAO,KAAK,YAAY,EAAE;;;;;;;;;;;;CAa5B,aAAqB,GAA2B;EAC9C,MAAM,WAAW,EAAE,2BAA2B;EAC9C,MAAM,QAAQ,EAAE,qBAAqB,CAAC,KAAK,QAAQ;EACnD,MAAM,YAAwB,EAAE;AAChC,WAAS,MAAM,QAAQ,YAAY;GACjC,MAAM,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM;GACrC,MAAM,WAAW;IAAE,QAAQ,KAAK;IAAS;IAAO;AAChD,aAAU,KAAK,IAAIC,0BAAAA,SAAS;IAAE,aAAa;IAAM;IAAU,CAAC,CAAC;IAC7D;AACF,SAAO;;;;;;;;;;;;CAaT,YAAoB,GAA2B;EAC7C,MAAM,QAAQ,EAAE,qBAAqB;EACrC,MAAM,YAAwB,EAAE;AAChC,QAAM,MAAM,QAAQ,SAAS;GAC3B,MAAM,UAAU,EAAE,KAAK,CAAC,KAAK,qBAAqB,CAAC,MAAM;GACzD,MAAM,OAAO,EAAE,KAAK,CAAC,KAAK,4BAA4B,CAAC,KAAK,OAAO;GACnE,MAAM,QAAQ,EAAE,KAAK,CAAC,KAAK,0BAA0B,CAAC,MAAM,CAAC,MAAM;GACnE,MAAM,WAAW;IACf,QAAQ,KAAK;IACb;IACA;IACA;IACD;AACD,aAAU,KAAK,IAAIA,0BAAAA,SAAS;IAAE,aAAa;IAAO;IAAU,CAAC,CAAC;IAC9D;AACF,SAAO"}