{"version":3,"file":"playwright.cjs","names":["BaseDocumentLoader","Document"],"sources":["../../../src/document_loaders/web/playwright.ts"],"sourcesContent":["import type { LaunchOptions, Page, Browser, Response } from \"playwright\";\n\nimport { Document } from \"@langchain/core/documents\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport type { DocumentLoader } from \"@langchain/core/document_loaders/base\";\n\nexport { Page, Browser, Response };\n\nexport type PlaywrightGotoOptions = {\n  referer?: string;\n  timeout?: number;\n  waitUntil?: \"load\" | \"domcontentloaded\" | \"networkidle\" | \"commit\";\n};\n\n/**\n * Type representing a function for evaluating JavaScript code on a web\n * page using Playwright. Takes a Page, Browser, and Response object as\n * parameters and returns a Promise that resolves to a string.\n */\nexport type PlaywrightEvaluate = (\n  page: Page,\n  browser: Browser,\n  response: Response | null\n) => Promise<string>;\n\nexport type PlaywrightWebBaseLoaderOptions = {\n  launchOptions?: LaunchOptions;\n  gotoOptions?: PlaywrightGotoOptions;\n  evaluate?: PlaywrightEvaluate;\n};\n\n/**\n * Class representing a document loader for scraping web pages using\n * Playwright. Extends the BaseDocumentLoader class and implements the\n * DocumentLoader interface.\n */\nexport class PlaywrightWebBaseLoader\n  extends BaseDocumentLoader\n  implements DocumentLoader\n{\n  options: PlaywrightWebBaseLoaderOptions | undefined;\n\n  constructor(\n    public webPath: string,\n    options?: PlaywrightWebBaseLoaderOptions\n  ) {\n    super();\n    this.options = options ?? undefined;\n  }\n\n  static async _scrape(\n    url: string,\n    options?: PlaywrightWebBaseLoaderOptions\n  ): Promise<string> {\n    const { chromium } = await PlaywrightWebBaseLoader.imports();\n\n    const browser = await chromium.launch({\n      headless: true,\n      ...options?.launchOptions,\n    });\n    const page = await browser.newPage();\n\n    const response = await page.goto(url, {\n      timeout: 180000,\n      waitUntil: \"domcontentloaded\",\n      ...options?.gotoOptions,\n    });\n    const bodyHTML = options?.evaluate\n      ? await options?.evaluate(page, browser, response)\n      : await page.content();\n\n    await browser.close();\n\n    return bodyHTML;\n  }\n\n  /**\n   * Method that calls the _scrape method to perform the scraping of the web\n   * page specified by the webPath property. Returns a Promise that resolves\n   * to the scraped HTML content of the web page.\n   * @returns Promise that resolves to the scraped HTML content of the web page.\n   */\n  async scrape(): Promise<string> {\n    return PlaywrightWebBaseLoader._scrape(this.webPath, this.options);\n  }\n\n  /**\n   * Method that calls the scrape method and returns the scraped HTML\n   * content as a Document object. Returns a Promise that resolves to an\n   * array of Document objects.\n   * @returns Promise that resolves to an array of Document objects.\n   */\n  async load(): Promise<Document[]> {\n    const text = await this.scrape();\n\n    const metadata = { source: this.webPath };\n    return [new Document({ pageContent: text, metadata })];\n  }\n\n  /**\n   * Static method that imports the necessary Playwright modules. Returns a\n   * Promise that resolves to an object containing the imported modules.\n   * @returns Promise that resolves to an object containing the imported modules.\n   */\n  static async imports(): Promise<{\n    chromium: typeof import(\"playwright\").chromium;\n  }> {\n    try {\n      const { chromium } = await import(\"playwright\");\n\n      return { chromium };\n    } catch (e) {\n      console.error(e);\n      throw new Error(\n        \"Please install playwright as a dependency with, e.g. `pnpm install playwright`\"\n      );\n    }\n  }\n}\n"],"mappings":";;;;;;;;;;;AAoCA,IAAa,0BAAb,MAAa,gCACHA,sCAAAA,mBAEV;CACE;CAEA,YACE,SACA,SACA;AACA,SAAO;AAHA,OAAA,UAAA;AAIP,OAAK,UAAU,WAAW,KAAA;;CAG5B,aAAa,QACX,KACA,SACiB;EACjB,MAAM,EAAE,aAAa,MAAM,wBAAwB,SAAS;EAE5D,MAAM,UAAU,MAAM,SAAS,OAAO;GACpC,UAAU;GACV,GAAG,SAAS;GACb,CAAC;EACF,MAAM,OAAO,MAAM,QAAQ,SAAS;EAEpC,MAAM,WAAW,MAAM,KAAK,KAAK,KAAK;GACpC,SAAS;GACT,WAAW;GACX,GAAG,SAAS;GACb,CAAC;EACF,MAAM,WAAW,SAAS,WACtB,MAAM,SAAS,SAAS,MAAM,SAAS,SAAS,GAChD,MAAM,KAAK,SAAS;AAExB,QAAM,QAAQ,OAAO;AAErB,SAAO;;;;;;;;CAST,MAAM,SAA0B;AAC9B,SAAO,wBAAwB,QAAQ,KAAK,SAAS,KAAK,QAAQ;;;;;;;;CASpE,MAAM,OAA4B;AAIhC,SAAO,CAAC,IAAIC,0BAAAA,SAAS;GAAE,aAHV,MAAM,KAAK,QAAQ;GAGU,UADzB,EAAE,QAAQ,KAAK,SAAS;GACW,CAAC,CAAC;;;;;;;CAQxD,aAAa,UAEV;AACD,MAAI;GACF,MAAM,EAAE,aAAa,MAAM,OAAO;AAElC,UAAO,EAAE,UAAU;WACZ,GAAG;AACV,WAAQ,MAAM,EAAE;AAChB,SAAM,IAAI,MACR,iFACD"}