UNPKG

8.38 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.ProxyConfiguration = void 0;
4const tslib_1 = require("tslib");
5const ow_1 = tslib_1.__importDefault(require("ow"));
6const log_1 = tslib_1.__importDefault(require("@apify/log"));
7/**
8 * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
9 * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
10 * them to use the selected proxies for all connections. You can get information about the currently used proxy by inspecting
11 * the {@apilink ProxyInfo} property in your crawler's page function. There, you can inspect the proxy's URL and other attributes.
12 *
13 * If you want to use your own proxies, use the {@apilink ProxyConfigurationOptions.proxyUrls} option. Your list of proxy URLs will
14 * be rotated by the configuration if this option is provided.
15 *
16 * **Example usage:**
17 *
18 * ```javascript
19 *
20 * const proxyConfiguration = new ProxyConfiguration({
21 * proxyUrls: ['...', '...'],
22 * });
23 *
24 * const crawler = new CheerioCrawler({
25 * // ...
26 * proxyConfiguration,
27 * requestHandler({ proxyInfo }) {
28 * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
29 * }
30 * })
31 *
32 * ```
33 * @category Scaling
34 */
35class ProxyConfiguration {
36 /**
37 * Creates a {@apilink ProxyConfiguration} instance based on the provided options. Proxy servers are used to prevent target websites from
38 * blocking your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
39 * them to use the selected proxies for all connections.
40 *
41 * ```javascript
42 * const proxyConfiguration = new ProxyConfiguration({
43 * proxyUrls: ['http://user:pass@proxy-1.com', 'http://user:pass@proxy-2.com'],
44 * });
45 *
46 * const crawler = new CheerioCrawler({
47 * // ...
48 * proxyConfiguration,
49 * requestHandler({ proxyInfo }) {
50 * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
51 * }
52 * })
53 *
54 * ```
55 */
56 constructor(options = {}) {
57 Object.defineProperty(this, "isManInTheMiddle", {
58 enumerable: true,
59 configurable: true,
60 writable: true,
61 value: false
62 });
63 Object.defineProperty(this, "nextCustomUrlIndex", {
64 enumerable: true,
65 configurable: true,
66 writable: true,
67 value: 0
68 });
69 Object.defineProperty(this, "proxyUrls", {
70 enumerable: true,
71 configurable: true,
72 writable: true,
73 value: void 0
74 });
75 Object.defineProperty(this, "usedProxyUrls", {
76 enumerable: true,
77 configurable: true,
78 writable: true,
79 value: new Map()
80 });
81 Object.defineProperty(this, "newUrlFunction", {
82 enumerable: true,
83 configurable: true,
84 writable: true,
85 value: void 0
86 });
87 Object.defineProperty(this, "log", {
88 enumerable: true,
89 configurable: true,
90 writable: true,
91 value: log_1.default.child({ prefix: 'ProxyConfiguration' })
92 });
93 const { validateRequired, ...rest } = options;
94 (0, ow_1.default)(rest, ow_1.default.object.exactShape({
95 proxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.string.url),
96 newUrlFunction: ow_1.default.optional.function,
97 }));
98 const { proxyUrls, newUrlFunction } = options;
99 if (proxyUrls && newUrlFunction)
100 this._throwCannotCombineCustomMethods();
101 if (!proxyUrls && !newUrlFunction && validateRequired)
102 this._throwNoOptionsProvided();
103 this.proxyUrls = proxyUrls;
104 this.newUrlFunction = newUrlFunction;
105 }
106 /**
107 * This function creates a new {@apilink ProxyInfo} info object.
108 * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
109 * the currently used proxy via the requestHandler parameter `proxyInfo`.
110 * Use it if you want to work with a rich representation of a proxy URL.
111 * If you need the URL string only, use {@apilink ProxyConfiguration.newUrl}.
112 * @param [sessionId]
113 * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or
114 * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
115 * When the provided sessionId is a number, it's converted to a string. Property sessionId of
116 * {@apilink ProxyInfo} is always returned as a type string.
117 *
118 * All the HTTP requests going through the proxy with the same session identifier
119 * will use the same target proxy server (i.e. the same IP address).
120 * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
121 * @return Represents information about used proxy and its configuration.
122 */
123 async newProxyInfo(sessionId) {
124 if (typeof sessionId === 'number')
125 sessionId = `${sessionId}`;
126 const url = await this.newUrl(sessionId);
127 const { username, password, port, hostname } = new URL(url);
128 return {
129 sessionId,
130 url,
131 username,
132 password,
133 hostname,
134 port: port,
135 };
136 }
137 /**
138 * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
139 * @param [sessionId]
140 * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or
141 * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
142 * When the provided sessionId is a number, it's converted to a string.
143 *
144 * All the HTTP requests going through the proxy with the same session identifier
145 * will use the same target proxy server (i.e. the same IP address).
146 * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
147 * @return A string with a proxy URL, including authentication credentials and port number.
148 * For example, `http://bob:password123@proxy.example.com:8000`
149 */
150 async newUrl(sessionId) {
151 if (typeof sessionId === 'number')
152 sessionId = `${sessionId}`;
153 if (this.newUrlFunction) {
154 return this._callNewUrlFunction(sessionId);
155 }
156 return this._handleCustomUrl(sessionId);
157 }
158 /**
159 * Handles custom url rotation with session
160 */
161 _handleCustomUrl(sessionId) {
162 let customUrlToUse;
163 if (!sessionId) {
164 return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
165 }
166 if (this.usedProxyUrls.has(sessionId)) {
167 customUrlToUse = this.usedProxyUrls.get(sessionId);
168 }
169 else {
170 customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
171 this.usedProxyUrls.set(sessionId, customUrlToUse);
172 }
173 return customUrlToUse;
174 }
175 /**
176 * Calls the custom newUrlFunction and checks format of its return value
177 */
178 async _callNewUrlFunction(sessionId) {
179 let proxyUrl;
180 try {
181 proxyUrl = await this.newUrlFunction(sessionId);
182 new URL(proxyUrl); // eslint-disable-line no-new
183 return proxyUrl;
184 }
185 catch (err) {
186 this._throwNewUrlFunctionInvalid(err);
187 }
188 }
189 _throwNewUrlFunctionInvalid(err) {
190 throw new Error(`The provided newUrlFunction did not return a valid URL.\nCause: ${err.message}`);
191 }
192 _throwCannotCombineCustomMethods() {
193 throw new Error('Cannot combine custom proxies "options.proxyUrls" with custom generating function "options.newUrlFunction".');
194 }
195 _throwNoOptionsProvided() {
196 throw new Error('One of "options.proxyUrls" or "options.newUrlFunction" needs to be provided.');
197 }
198}
199exports.ProxyConfiguration = ProxyConfiguration;
200//# sourceMappingURL=proxy_configuration.js.map
\No newline at end of file