{"version":3,"file":"apify_dataset.cjs","names":["BaseDocumentLoader","AsyncCaller","ApifyClient"],"sources":["../../../src/document_loaders/web/apify_dataset.ts"],"sourcesContent":["/* oxlint-disable typescript/no-explicit-any */\n\nimport {\n  ActorCallOptions,\n  ApifyClient,\n  ApifyClientOptions,\n  TaskCallOptions,\n} from \"apify-client\";\n\nimport { Document } from \"@langchain/core/documents\";\nimport {\n  AsyncCaller,\n  AsyncCallerParams,\n} from \"@langchain/core/utils/async_caller\";\nimport { getEnvironmentVariable } from \"@langchain/core/utils/env\";\nimport {\n  BaseDocumentLoader,\n  DocumentLoader,\n} from \"@langchain/core/document_loaders/base\";\n\n/**\n * A type that represents a function that takes a single object (an Apify\n * dataset item) and converts it to an instance of the Document class.\n *\n * Change function signature to only be asynchronous for simplicity in v0.1.0\n * https://github.com/langchain-ai/langchainjs/pull/3262\n */\nexport type ApifyDatasetMappingFunction<Metadata extends Record<string, any>> =\n  (\n    item: Record<string | number, unknown>\n  ) =>\n    | Document<Metadata>\n    | Array<Document<Metadata>>\n    | Promise<Document<Metadata> | Array<Document<Metadata>>>;\n\nexport interface ApifyDatasetLoaderConfig<\n  Metadata extends Record<string, any>,\n> extends AsyncCallerParams {\n  datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;\n  clientOptions?: ApifyClientOptions;\n}\n\n/**\n * A class that extends the BaseDocumentLoader and implements the\n * DocumentLoader interface. It represents a document loader that loads\n * documents from an Apify dataset.\n * @example\n * ```typescript\n * const loader = new ApifyDatasetLoader(\"your-dataset-id\", {\n *   datasetMappingFunction: (item) =>\n *     new Document({\n *       pageContent: item.text || \"\",\n *       metadata: { source: item.url },\n *     }),\n *   clientOptions: {\n *     token: \"your-apify-token\",\n *   },\n * });\n *\n * const docs = await loader.load();\n *\n * const chain = new RetrievalQAChain();\n * const res = await chain.invoke({ query: \"What is LangChain?\" });\n *\n * console.log(res.text);\n * console.log(res.sourceDocuments.map((d) => d.metadata.source));\n * ```\n */\nexport class ApifyDatasetLoader<Metadata extends Record<string, any>>\n  extends BaseDocumentLoader\n  implements DocumentLoader\n{\n  protected apifyClient: ApifyClient;\n\n  protected datasetId: string;\n\n  protected datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;\n\n  protected caller: AsyncCaller;\n\n  constructor(datasetId: string, config: ApifyDatasetLoaderConfig<Metadata>) {\n    super();\n    const { clientOptions, datasetMappingFunction, ...asyncCallerParams } =\n      config;\n    this.apifyClient = ApifyDatasetLoader._getApifyClient(clientOptions);\n    this.datasetId = datasetId;\n    this.datasetMappingFunction = datasetMappingFunction;\n    this.caller = new AsyncCaller(asyncCallerParams);\n  }\n\n  /**\n   * Creates an instance of the ApifyClient class with the provided clientOptions.\n   * Adds a User-Agent header to the request config for langchainjs attribution.\n   * @param clientOptions\n   * @private\n   */\n  private static _getApifyClient(\n    clientOptions?: ApifyClientOptions\n  ): ApifyClient {\n    const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);\n    const updatedClientOptions = {\n      ...clientOptions,\n      token,\n      requestInterceptors: [\n        ...(clientOptions?.requestInterceptors ?? []),\n        ApifyDatasetLoader._addUserAgent,\n      ],\n    };\n    return new ApifyClient({ ...updatedClientOptions, token });\n  }\n\n  private static _getApifyApiToken(config?: { token?: string }) {\n    return config?.token ?? getEnvironmentVariable(\"APIFY_API_TOKEN\");\n  }\n\n  /**\n   * Adds a User-Agent header to the request config.\n   * @param config\n   * @private\n   */\n  private static _addUserAgent(config: any): any {\n    const updatedConfig = { ...config };\n    updatedConfig.headers ??= {};\n    updatedConfig.headers[\"User-Agent\"] = `${\n      updatedConfig.headers[\"User-Agent\"] ?? \"\"\n    }; Origin/langchainjs`;\n    return updatedConfig;\n  }\n\n  /**\n   * Retrieves the dataset items from the Apify platform and applies the\n   * datasetMappingFunction to each item to create an array of Document\n   * instances.\n   * @returns An array of Document instances.\n   */\n  async load(): Promise<Document<Metadata>[]> {\n    const dataset = await this.apifyClient\n      .dataset(this.datasetId)\n      .listItems({ clean: true });\n\n    const documentList = await Promise.all(\n      dataset.items.map((item) =>\n        this.caller.call(async () => this.datasetMappingFunction(item))\n      )\n    );\n\n    return documentList.flat();\n  }\n\n  /**\n   * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.\n   * @param actorId The ID or name of the Actor on the Apify platform.\n   * @param input The input object of the Actor that you're trying to run.\n   * @param config Options specifying settings for the Actor run.\n   * @param config.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.\n   * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.\n   */\n  static async fromActorCall<Metadata extends Record<string, any>>(\n    actorId: string,\n    input: Record<string | number, unknown>,\n    config: {\n      callOptions?: ActorCallOptions;\n      clientOptions?: ApifyClientOptions;\n      datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;\n    }\n  ): Promise<ApifyDatasetLoader<Metadata>> {\n    const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(\n      config.clientOptions\n    );\n    const apifyClient = ApifyDatasetLoader._getApifyClient(\n      config.clientOptions\n    );\n    const actorCall = await apifyClient\n      .actor(actorId)\n      .call(input, config.callOptions ?? {});\n\n    return new ApifyDatasetLoader(actorCall.defaultDatasetId, {\n      datasetMappingFunction: config.datasetMappingFunction,\n      clientOptions: { ...config.clientOptions, token: apifyApiToken },\n    });\n  }\n\n  /**\n   * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.\n   * @param taskId The ID or name of the task on the Apify platform.\n   * @param input The input object of the task that you're trying to run. Overrides the task's saved input.\n   * @param config Options specifying settings for the task run.\n   * @param config.callOptions Options specifying settings for the task run.\n   * @param config.clientOptions Options specifying settings for the Apify client.\n   * @param config.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.\n   * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.\n   */\n  static async fromActorTaskCall<Metadata extends Record<string, any>>(\n    taskId: string,\n    input: Record<string | number, unknown>,\n    config: {\n      callOptions?: TaskCallOptions;\n      clientOptions?: ApifyClientOptions;\n      datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;\n    }\n  ): Promise<ApifyDatasetLoader<Metadata>> {\n    const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(\n      config.clientOptions\n    );\n    const apifyClient = ApifyDatasetLoader._getApifyClient(\n      config.clientOptions\n    );\n    const taskCall = await apifyClient\n      .task(taskId)\n      .call(input, config.callOptions ?? {});\n\n    return new ApifyDatasetLoader(taskCall.defaultDatasetId, {\n      datasetMappingFunction: config.datasetMappingFunction,\n      clientOptions: { ...config.clientOptions, token: apifyApiToken },\n    });\n  }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoEA,IAAa,qBAAb,MAAa,2BACHA,sCAAAA,mBAEV;CACE;CAEA;CAEA;CAEA;CAEA,YAAY,WAAmB,QAA4C;AACzE,SAAO;EACP,MAAM,EAAE,eAAe,wBAAwB,GAAG,sBAChD;AACF,OAAK,cAAc,mBAAmB,gBAAgB,cAAc;AACpE,OAAK,YAAY;AACjB,OAAK,yBAAyB;AAC9B,OAAK,SAAS,IAAIC,mCAAAA,YAAY,kBAAkB;;;;;;;;CASlD,OAAe,gBACb,eACa;EACb,MAAM,QAAQ,mBAAmB,kBAAkB,cAAc;AASjE,SAAO,IAAIC,aAAAA,YAAY;GAPrB,GAAG;GACH;GACA,qBAAqB,CACnB,GAAI,eAAe,uBAAuB,EAAE,EAC5C,mBAAmB,cACpB;GAE+C;GAAO,CAAC;;CAG5D,OAAe,kBAAkB,QAA6B;AAC5D,SAAO,QAAQ,UAAA,GAAA,0BAAA,wBAAgC,kBAAkB;;;;;;;CAQnE,OAAe,cAAc,QAAkB;EAC7C,MAAM,gBAAgB,EAAE,GAAG,QAAQ;AACnC,gBAAc,YAAY,EAAE;AAC5B,gBAAc,QAAQ,gBAAgB,GACpC,cAAc,QAAQ,iBAAiB,GACxC;AACD,SAAO;;;;;;;;CAST,MAAM,OAAsC;EAC1C,MAAM,UAAU,MAAM,KAAK,YACxB,QAAQ,KAAK,UAAU,CACvB,UAAU,EAAE,OAAO,MAAM,CAAC;AAQ7B,UANqB,MAAM,QAAQ,IACjC,QAAQ,MAAM,KAAK,SACjB,KAAK,OAAO,KAAK,YAAY,KAAK,uBAAuB,KAAK,CAAC,CAChE,CACF,EAEmB,MAAM;;;;;;;;;;CAW5B,aAAa,cACX,SACA,OACA,QAKuC;EACvC,MAAM,gBAAgB,mBAAmB,kBACvC,OAAO,cACR;AAQD,SAAO,IAAI,oBAJO,MAHE,mBAAmB,gBACrC,OAAO,cACR,CAEE,MAAM,QAAQ,CACd,KAAK,OAAO,OAAO,eAAe,EAAE,CAAC,EAEA,kBAAkB;GACxD,wBAAwB,OAAO;GAC/B,eAAe;IAAE,GAAG,OAAO;IAAe,OAAO;IAAe;GACjE,CAAC;;;;;;;;;;;;CAaJ,aAAa,kBACX,QACA,OACA,QAKuC;EACvC,MAAM,gBAAgB,mBAAmB,kBACvC,OAAO,cACR;AAQD,SAAO,IAAI,oBAJM,MAHG,mBAAmB,gBACrC,OAAO,cACR,CAEE,KAAK,OAAO,CACZ,KAAK,OAAO,OAAO,eAAe,EAAE,CAAC,EAED,kBAAkB;GACvD,wBAAwB,OAAO;GAC/B,eAAe;IAAE,GAAG,OAAO;IAAe,OAAO;IAAe;GACjE,CAAC"}