1 | ;
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.ProxyConfiguration = void 0;
|
4 | const tslib_1 = require("tslib");
|
5 | const ow_1 = tslib_1.__importDefault(require("ow"));
|
6 | const log_1 = tslib_1.__importDefault(require("@apify/log"));
|
7 | /**
|
8 | * Configures connection to a proxy server with the provided options. Proxy servers are used to prevent target websites from blocking
|
9 | * your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
10 | * them to use the selected proxies for all connections. You can get information about the currently used proxy by inspecting
|
11 | * the {@apilink ProxyInfo} property in your crawler's page function. There, you can inspect the proxy's URL and other attributes.
|
12 | *
|
13 | * If you want to use your own proxies, use the {@apilink ProxyConfigurationOptions.proxyUrls} option. Your list of proxy URLs will
|
14 | * be rotated by the configuration if this option is provided.
|
15 | *
|
16 | * **Example usage:**
|
17 | *
|
18 | * ```javascript
|
19 | *
|
20 | * const proxyConfiguration = new ProxyConfiguration({
|
21 | * proxyUrls: ['...', '...'],
|
22 | * });
|
23 | *
|
24 | * const crawler = new CheerioCrawler({
|
25 | * // ...
|
26 | * proxyConfiguration,
|
27 | * requestHandler({ proxyInfo }) {
|
28 | * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
29 | * }
|
30 | * })
|
31 | *
|
32 | * ```
|
33 | * @category Scaling
|
34 | */
|
35 | class ProxyConfiguration {
|
36 | /**
|
37 | * Creates a {@apilink ProxyConfiguration} instance based on the provided options. Proxy servers are used to prevent target websites from
|
38 | * blocking your crawlers based on IP address rate limits or blacklists. Setting proxy configuration in your crawlers automatically configures
|
39 | * them to use the selected proxies for all connections.
|
40 | *
|
41 | * ```javascript
|
42 | * const proxyConfiguration = new ProxyConfiguration({
|
43 | * proxyUrls: ['http://user:pass@proxy-1.com', 'http://user:pass@proxy-2.com'],
|
44 | * });
|
45 | *
|
46 | * const crawler = new CheerioCrawler({
|
47 | * // ...
|
48 | * proxyConfiguration,
|
49 | * requestHandler({ proxyInfo }) {
|
50 | * const usedProxyUrl = proxyInfo.url; // Getting the proxy URL
|
51 | * }
|
52 | * })
|
53 | *
|
54 | * ```
|
55 | */
|
56 | constructor(options = {}) {
|
57 | Object.defineProperty(this, "isManInTheMiddle", {
|
58 | enumerable: true,
|
59 | configurable: true,
|
60 | writable: true,
|
61 | value: false
|
62 | });
|
63 | Object.defineProperty(this, "nextCustomUrlIndex", {
|
64 | enumerable: true,
|
65 | configurable: true,
|
66 | writable: true,
|
67 | value: 0
|
68 | });
|
69 | Object.defineProperty(this, "proxyUrls", {
|
70 | enumerable: true,
|
71 | configurable: true,
|
72 | writable: true,
|
73 | value: void 0
|
74 | });
|
75 | Object.defineProperty(this, "usedProxyUrls", {
|
76 | enumerable: true,
|
77 | configurable: true,
|
78 | writable: true,
|
79 | value: new Map()
|
80 | });
|
81 | Object.defineProperty(this, "newUrlFunction", {
|
82 | enumerable: true,
|
83 | configurable: true,
|
84 | writable: true,
|
85 | value: void 0
|
86 | });
|
87 | Object.defineProperty(this, "log", {
|
88 | enumerable: true,
|
89 | configurable: true,
|
90 | writable: true,
|
91 | value: log_1.default.child({ prefix: 'ProxyConfiguration' })
|
92 | });
|
93 | const { validateRequired, ...rest } = options;
|
94 | (0, ow_1.default)(rest, ow_1.default.object.exactShape({
|
95 | proxyUrls: ow_1.default.optional.array.nonEmpty.ofType(ow_1.default.string.url),
|
96 | newUrlFunction: ow_1.default.optional.function,
|
97 | }));
|
98 | const { proxyUrls, newUrlFunction } = options;
|
99 | if (proxyUrls && newUrlFunction)
|
100 | this._throwCannotCombineCustomMethods();
|
101 | if (!proxyUrls && !newUrlFunction && validateRequired)
|
102 | this._throwNoOptionsProvided();
|
103 | this.proxyUrls = proxyUrls;
|
104 | this.newUrlFunction = newUrlFunction;
|
105 | }
|
106 | /**
|
107 | * This function creates a new {@apilink ProxyInfo} info object.
|
108 | * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect
|
109 | * the currently used proxy via the requestHandler parameter `proxyInfo`.
|
110 | * Use it if you want to work with a rich representation of a proxy URL.
|
111 | * If you need the URL string only, use {@apilink ProxyConfiguration.newUrl}.
|
112 | * @param [sessionId]
|
113 | * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or
|
114 | * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
115 | * When the provided sessionId is a number, it's converted to a string. Property sessionId of
|
116 | * {@apilink ProxyInfo} is always returned as a type string.
|
117 | *
|
118 | * All the HTTP requests going through the proxy with the same session identifier
|
119 | * will use the same target proxy server (i.e. the same IP address).
|
120 | * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
121 | * @return Represents information about used proxy and its configuration.
|
122 | */
|
123 | async newProxyInfo(sessionId) {
|
124 | if (typeof sessionId === 'number')
|
125 | sessionId = `${sessionId}`;
|
126 | const url = await this.newUrl(sessionId);
|
127 | const { username, password, port, hostname } = new URL(url);
|
128 | return {
|
129 | sessionId,
|
130 | url,
|
131 | username,
|
132 | password,
|
133 | hostname,
|
134 | port: port,
|
135 | };
|
136 | }
|
137 | /**
|
138 | * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter.
|
139 | * @param [sessionId]
|
140 | * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or
|
141 | * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier.
|
142 | * When the provided sessionId is a number, it's converted to a string.
|
143 | *
|
144 | * All the HTTP requests going through the proxy with the same session identifier
|
145 | * will use the same target proxy server (i.e. the same IP address).
|
146 | * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
|
147 | * @return A string with a proxy URL, including authentication credentials and port number.
|
148 | * For example, `http://bob:password123@proxy.example.com:8000`
|
149 | */
|
150 | async newUrl(sessionId) {
|
151 | if (typeof sessionId === 'number')
|
152 | sessionId = `${sessionId}`;
|
153 | if (this.newUrlFunction) {
|
154 | return this._callNewUrlFunction(sessionId);
|
155 | }
|
156 | return this._handleCustomUrl(sessionId);
|
157 | }
|
158 | /**
|
159 | * Handles custom url rotation with session
|
160 | */
|
161 | _handleCustomUrl(sessionId) {
|
162 | let customUrlToUse;
|
163 | if (!sessionId) {
|
164 | return this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
165 | }
|
166 | if (this.usedProxyUrls.has(sessionId)) {
|
167 | customUrlToUse = this.usedProxyUrls.get(sessionId);
|
168 | }
|
169 | else {
|
170 | customUrlToUse = this.proxyUrls[this.nextCustomUrlIndex++ % this.proxyUrls.length];
|
171 | this.usedProxyUrls.set(sessionId, customUrlToUse);
|
172 | }
|
173 | return customUrlToUse;
|
174 | }
|
175 | /**
|
176 | * Calls the custom newUrlFunction and checks format of its return value
|
177 | */
|
178 | async _callNewUrlFunction(sessionId) {
|
179 | let proxyUrl;
|
180 | try {
|
181 | proxyUrl = await this.newUrlFunction(sessionId);
|
182 | new URL(proxyUrl); // eslint-disable-line no-new
|
183 | return proxyUrl;
|
184 | }
|
185 | catch (err) {
|
186 | this._throwNewUrlFunctionInvalid(err);
|
187 | }
|
188 | }
|
189 | _throwNewUrlFunctionInvalid(err) {
|
190 | throw new Error(`The provided newUrlFunction did not return a valid URL.\nCause: ${err.message}`);
|
191 | }
|
192 | _throwCannotCombineCustomMethods() {
|
193 | throw new Error('Cannot combine custom proxies "options.proxyUrls" with custom generating function "options.newUrlFunction".');
|
194 | }
|
195 | _throwNoOptionsProvided() {
|
196 | throw new Error('One of "options.proxyUrls" or "options.newUrlFunction" needs to be provided.');
|
197 | }
|
198 | }
|
199 | exports.ProxyConfiguration = ProxyConfiguration;
|
200 | //# sourceMappingURL=proxy_configuration.js.map |
\ | No newline at end of file |