{"version":3,"file":"spider.cjs","names":["BaseDocumentLoader","Spider","Document"],"sources":["../../../src/document_loaders/web/spider.ts"],"sourcesContent":["import { Spider } from \"@spider-cloud/spider-client\";\nimport { Document, type DocumentInterface } from \"@langchain/core/documents\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\n\n/**\n * Interface representing the parameters for the Spider loader. It\n * includes properties such as the URL to scrape or crawl and the API key.\n */\ninterface SpiderLoaderParameters {\n  /**\n   * URL to scrape or crawl\n   */\n  url: string;\n\n  /**\n   * API key for Spider. If not provided, the default value is the value of the SPIDER_API_KEY environment variable.\n   */\n  apiKey?: string;\n\n  /**\n   * Mode of operation. Can be either \"crawl\" or \"scrape\". If not provided, the default value is \"scrape\".\n   */\n  mode?: \"crawl\" | \"scrape\";\n  params?: Record<string, unknown>;\n}\ninterface SpiderDocument {\n  content: string;\n  metadata: Record<string, unknown>;\n}\n\n/**\n * Class representing a document loader for loading data from\n * Spider (spider.cloud). It extends the BaseDocumentLoader class.\n * @example\n * ```typescript\n * const loader = new SpiderLoader({\n *   url: \"{url}\",\n *   apiKey: \"{apiKey}\",\n *   mode: \"crawl\"\n * });\n * const docs = await loader.load();\n * ```\n */\nexport class SpiderLoader extends BaseDocumentLoader {\n  private apiKey: string;\n\n  private url: string;\n\n  private mode: \"crawl\" | \"scrape\";\n\n  private params?: Record<string, unknown>;\n\n  constructor(loaderParams: SpiderLoaderParameters) {\n    super();\n    const {\n      apiKey = getEnvironmentVariable(\"SPIDER_API_KEY\"),\n      url,\n      mode = \"scrape\",\n      params,\n    } = loaderParams;\n    if (!apiKey) {\n      throw new Error(\n        \"Spider API key not set. You can set it as SPIDER_API_KEY in your .env file, or pass it to Spider.\"\n      );\n    }\n\n    this.apiKey = apiKey;\n    this.url = url;\n    this.mode = mode;\n    this.params = params || { metadata: true, return_format: \"markdown\" };\n  }\n\n  /**\n   * Loads the data from the Spider.\n   * @returns An array of Documents representing the retrieved data.\n   * @throws An error if the data could not be loaded.\n   */\n  public async load(): Promise<DocumentInterface[]> {\n    const app = new Spider({ apiKey: this.apiKey });\n    let spiderDocs: SpiderDocument[];\n\n    if (this.mode === \"scrape\") {\n      const response = await app.scrapeUrl(this.url, this.params);\n      if (response.error) {\n        throw new Error(\n          `Spider: Failed to scrape URL. Error: ${response.error}`\n        );\n      }\n      spiderDocs = response as SpiderDocument[];\n    } else if (this.mode === \"crawl\") {\n      const response = await app.crawlUrl(this.url, this.params);\n      if (response.error) {\n        throw new Error(\n          `Spider: Failed to crawl URL. Error: ${response.error}`\n        );\n      }\n      spiderDocs = response as SpiderDocument[];\n    } else {\n      throw new Error(\n        `Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`\n      );\n    }\n\n    return spiderDocs.map(\n      (doc) =>\n        new Document({\n          pageContent: doc.content || \"\",\n          metadata: doc.metadata || {},\n        })\n    );\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;AA4CA,IAAa,eAAb,cAAkCA,sCAAAA,mBAAmB;CACnD;CAEA;CAEA;CAEA;CAEA,YAAY,cAAsC;AAChD,SAAO;EACP,MAAM,EACJ,UAAA,GAAA,0BAAA,wBAAgC,iBAAiB,EACjD,KACA,OAAO,UACP,WACE;AACJ,MAAI,CAAC,OACH,OAAM,IAAI,MACR,oGACD;AAGH,OAAK,SAAS;AACd,OAAK,MAAM;AACX,OAAK,OAAO;AACZ,OAAK,SAAS,UAAU;GAAE,UAAU;GAAM,eAAe;GAAY;;;;;;;CAQvE,MAAa,OAAqC;EAChD,MAAM,MAAM,IAAIC,4BAAAA,OAAO,EAAE,QAAQ,KAAK,QAAQ,CAAC;EAC/C,IAAI;AAEJ,MAAI,KAAK,SAAS,UAAU;GAC1B,MAAM,WAAW,MAAM,IAAI,UAAU,KAAK,KAAK,KAAK,OAAO;AAC3D,OAAI,SAAS,MACX,OAAM,IAAI,MACR,wCAAwC,SAAS,QAClD;AAEH,gBAAa;aACJ,KAAK,SAAS,SAAS;GAChC,MAAM,WAAW,MAAM,IAAI,SAAS,KAAK,KAAK,KAAK,OAAO;AAC1D,OAAI,SAAS,MACX,OAAM,IAAI,MACR,uCAAuC,SAAS,QACjD;AAEH,gBAAa;QAEb,OAAM,IAAI,MACR,sBAAsB,KAAK,KAAK,uCACjC;AAGH,SAAO,WAAW,KACf,QACC,IAAIC,0BAAAA,SAAS;GACX,aAAa,IAAI,WAAW;GAC5B,UAAU,IAAI,YAAY,EAAE;GAC7B,CAAC,CACL"}