UNPKG

42.1 kBJavaScriptView Raw
1'use strict';
2
3var debug = require('debug')('urllib');
4var path = require('path');
5var dns = require('dns');
6var http = require('http');
7var https = require('https');
8var urlutil = require('url');
9var URL = urlutil.URL;
10var util = require('util');
11var qs = require('qs');
12var ip = require('ip');
13var querystring = require('querystring');
14var zlib = require('zlib');
15var ua = require('default-user-agent');
16var digestAuthHeader = require('digest-header');
17var ms = require('humanize-ms');
18var statuses = require('statuses');
19var contentTypeParser = require('content-type');
20var first = require('ee-first');
21var pump = require('pump');
22var utility = require('utility');
23var FormStream = require('formstream');
24var detectProxyAgent = require('./detect_proxy_agent');
25
26var _Promise;
27var _iconv;
28
29var pkg = require('../package.json');
30
31var USER_AGENT = exports.USER_AGENT = ua('node-urllib', pkg.version);
32var NODE_MAJOR_VERSION = parseInt(process.versions.node.split('.')[0]);
33
34// change Agent.maxSockets to 1000
35exports.agent = new http.Agent();
36exports.agent.maxSockets = 1000;
37
38exports.httpsAgent = new https.Agent();
39exports.httpsAgent.maxSockets = 1000;
40
41var LONG_STACK_DELIMITER = '\n --------------------\n';
42
43/**
44 * The default request timeout(in milliseconds).
45 * @type {Number}
46 * @const
47 */
48
49exports.TIMEOUT = ms('5s');
50exports.TIMEOUTS = [ms('5s'), ms('5s')];
51
52var REQUEST_ID = 0;
53var MAX_VALUE = Math.pow(2, 31) - 10;
54var isNode010 = /^v0\.10\.\d+$/.test(process.version);
55var isNode012 = /^v0\.12\.\d+$/.test(process.version);
56
57/**
58 * support data types
59 * will auto decode response body
60 * @type {Array}
61 */
62var TEXT_DATA_TYPES = [
63 'json',
64 'text'
65];
66
67var PROTO_RE = /^https?:\/\//i;
68
69// Keep-Alive: timeout=5, max=100
70var KEEP_ALIVE_RE = /^timeout=(\d+)/i;
71
72var SOCKET_REQUEST_COUNT = '_URLLIB_SOCKET_REQUEST_COUNT';
73var SOCKET_RESPONSE_COUNT = '_URLLIB_SOCKET_RESPONSE_COUNT';
74
75/**
76 * Handle all http request, both http and https support well.
77 *
78 * @example
79 *
80 * ```js
81 * // GET https://nodejs.org
82 * urllib.request('https://nodejs.org', function(err, data, res) {});
83 * // POST https://nodejs.org
84 * var args = { type: 'post', data: { foo: 'bar' } };
85 * urllib.request('https://nodejs.org', args, function(err, data, res) {});
86 * ```
87 *
88 * @param {String|Object} url: the request full URL.
89 * @param {Object} [args]: optional
90 * - {Object} [data]: request data, will auto be query stringify.
91 * - {Boolean} [dataAsQueryString]: force convert `data` to query string.
92 * - {String|Buffer} [content]: optional, if set content, `data` will ignore.
93 * - {ReadStream} [stream]: read stream to sent.
94 * - {WriteStream} [writeStream]: writable stream to save response data.
95 * If you use this, callback's data should be null.
96 * We will just `pipe(ws, {end: true})`.
97 * - {consumeWriteStream} [true]: consume the writeStream, invoke the callback after writeStream close.
98 * - {Array<ReadStream|Buffer|String>|Object|ReadStream|Buffer|String} [files]: optional,
99 * The files will send with `multipart/form-data` format, base on `formstream`.
100 * If `method` not set, will use `POST` method by default.
101 * - {String} [method]: optional, could be GET | POST | DELETE | PUT, default is GET
102 * - {String} [contentType]: optional, request data type, could be `json`, default is undefined
103 * - {String} [dataType]: optional, response data type, could be `text` or `json`, default is buffer
104 * - {Boolean|Function} [fixJSONCtlChars]: optional, fix the control characters (U+0000 through U+001F)
105 * before JSON parse response. Default is `false`.
106 * `fixJSONCtlChars` can be a function, will pass data to the first argument. e.g.: `data = fixJSONCtlChars(data)`
107 * - {Object} [headers]: optional, request headers
108 * - {Boolean} [keepHeaderCase]: optional, by default will convert header keys to lowercase
109 * - {Number|Array} [timeout]: request timeout(in milliseconds), default is `exports.TIMEOUTS containing connect timeout and response timeout`
110 * - {Agent} [agent]: optional, http agent. Set `false` if you does not use agent.
111 * - {Agent} [httpsAgent]: optional, https agent. Set `false` if you does not use agent.
112 * - {String} [auth]: Basic authentication i.e. 'user:password' to compute an Authorization header.
113 * - {String} [digestAuth]: Digest authentication i.e. 'user:password' to compute an Authorization header.
114 * - {String|Buffer|Array} [ca]: An array of strings or Buffers of trusted certificates.
115 * If this is omitted several well known "root" CAs will be used, like VeriSign.
116 * These are used to authorize connections.
117 * Notes: This is necessary only if the server uses the self-signed certificate
118 * - {Boolean} [rejectUnauthorized]: If true, the server certificate is verified against the list of supplied CAs.
119 * An 'error' event is emitted if verification fails. Default: true.
120 * - {String|Buffer} [pfx]: A string or Buffer containing the private key,
121 * certificate and CA certs of the server in PFX or PKCS12 format.
122 * - {String|Buffer} [key]: A string or Buffer containing the private key of the client in PEM format.
123 * Notes: This is necessary only if using the client certificate authentication
124 * - {String|Buffer} [cert]: A string or Buffer containing the certificate key of the client in PEM format.
125 * Notes: This is necessary only if using the client certificate authentication
126 * - {String} [passphrase]: A string of passphrase for the private key or pfx.
127 * - {String} [ciphers]: A string describing the ciphers to use or exclude.
128 * - {String} [secureProtocol]: The SSL method to use, e.g. SSLv3_method to force SSL version 3.
129 * The possible values depend on your installation of OpenSSL and are defined in the constant SSL_METHODS.
130 * - {Boolean} [followRedirect]: Follow HTTP 3xx responses as redirects. defaults to false.
131 * - {Number} [maxRedirects]: The maximum number of redirects to follow, defaults to 10.
132 * - {Function(from, to)} [formatRedirectUrl]: Format the redirect url by your self. Default is `url.resolve(from, to)`
133 * - {Function(options)} [beforeRequest]: Before request hook, you can change every thing here.
134 * - {Boolean} [streaming]: let you get the res object when request connected, default is `false`. alias `customResponse`
135 * - {Boolean} [gzip]: Accept gzip response content and auto decode it, default is `false`.
136 * - {Boolean} [timing]: Enable timing or not, default is `false`.
137 * - {Function} [lookup]: Custom DNS lookup function, default is `dns.lookup`.
138 * Require node >= 4.0.0 and only work on `http` protocol.
139 * - {Boolean} [enableProxy]: optional, enable proxy request. Default is `false`.
140 * - {String|Object} [proxy]: optional proxy agent uri or options. Default is `null`.
141 * - {String} [socketPath]: optional, unix domain socket file path.
142 * - {Function} checkAddress: optional, check request address to protect from SSRF and similar attacks.
143 * @param {Function} [callback]: callback(error, data, res). If missing callback, will return a promise object.
144 * @return {HttpRequest} req object.
145 * @api public
146 */
147exports.request = function request(url, args, callback) {
148 // request(url, callback)
149 if (arguments.length === 2 && typeof args === 'function') {
150 callback = args;
151 args = null;
152 }
153 if (typeof callback === 'function') {
154 return exports.requestWithCallback(url, args, callback);
155 }
156
157 // Promise
158 if (!_Promise) {
159 _Promise = require('any-promise');
160 }
161 return new _Promise(function (resolve, reject) {
162 exports.requestWithCallback(url, args, makeCallback(resolve, reject));
163 });
164};
165
166// alias to curl
167exports.curl = exports.request;
168
169function makeCallback(resolve, reject) {
170 return function (err, data, res) {
171 if (err) {
172 return reject(err);
173 }
174 resolve({
175 data: data,
176 status: res.statusCode,
177 headers: res.headers,
178 res: res
179 });
180 };
181}
182
183// yield urllib.requestThunk(url, args)
184exports.requestThunk = function requestThunk(url, args) {
185 return function (callback) {
186 exports.requestWithCallback(url, args, function (err, data, res) {
187 if (err) {
188 return callback(err);
189 }
190 callback(null, {
191 data: data,
192 status: res.statusCode,
193 headers: res.headers,
194 res: res
195 });
196 });
197 };
198};
199
200function requestWithCallback(url, args, callback) {
201 var req;
202 // requestWithCallback(url, callback)
203 if (!url || (typeof url !== 'string' && typeof url !== 'object')) {
204 var msg = util.format('expect request url to be a string or a http request options, but got %j', url);
205 throw new Error(msg);
206 }
207
208 if (arguments.length === 2 && typeof args === 'function') {
209 callback = args;
210 args = null;
211 }
212
213 args = args || {};
214 if (REQUEST_ID >= MAX_VALUE) {
215 REQUEST_ID = 0;
216 }
217 var reqId = ++REQUEST_ID;
218
219 args.requestUrls = args.requestUrls || [];
220
221 args.timeout = args.timeout || exports.TIMEOUTS;
222 args.maxRedirects = args.maxRedirects || 10;
223 args.streaming = args.streaming || args.customResponse;
224 var requestStartTime = Date.now();
225 var parsedUrl;
226
227 if (typeof url === 'string') {
228 if (!PROTO_RE.test(url)) {
229 // Support `request('www.server.com')`
230 url = 'http://' + url;
231 }
232 if (URL) {
233 parsedUrl = urlutil.parse(new URL(url).href);
234 } else {
235 parsedUrl = urlutil.parse(url);
236 }
237 } else {
238 parsedUrl = url;
239 }
240
241 var reqMeta = {
242 requestId: reqId,
243 url: parsedUrl.href,
244 args: args,
245 ctx: args.ctx,
246 };
247 if (args.emitter) {
248 args.emitter.emit('request', reqMeta);
249 }
250
251 var method = (args.type || args.method || parsedUrl.method || 'GET').toUpperCase();
252 var port = parsedUrl.port || 80;
253 var httplib = http;
254 var agent = getAgent(args.agent, exports.agent);
255 var fixJSONCtlChars = args.fixJSONCtlChars;
256
257 if (parsedUrl.protocol === 'https:') {
258 httplib = https;
259 agent = getAgent(args.httpsAgent, exports.httpsAgent);
260
261 if (!parsedUrl.port) {
262 port = 443;
263 }
264 }
265
266 // request through proxy tunnel
267 var proxyTunnelAgent = detectProxyAgent(parsedUrl, args);
268 if (proxyTunnelAgent) {
269 agent = proxyTunnelAgent;
270 }
271
272 var lookup = args.lookup;
273 // check address to protect from SSRF and similar attacks
274 if (args.checkAddress) {
275 var _lookup = lookup || dns.lookup;
276 lookup = function(host, dnsopts, callback) {
277 _lookup(host, dnsopts, function emitLookup(err, ip, family) {
278 // add check address logic in custom dns lookup
279 if (!err && !args.checkAddress(ip, family)) {
280 err = new Error('illegal address');
281 err.name = 'IllegalAddressError';
282 err.hostname = host;
283 err.ip = ip;
284 err.family = family;
285 }
286 callback(err, ip, family);
287 });
288 };
289 }
290
291 var requestSize = 0;
292 var options = {
293 host: parsedUrl.hostname || parsedUrl.host || 'localhost',
294 path: parsedUrl.path || '/',
295 method: method,
296 port: port,
297 agent: agent,
298 headers: {},
299 // default is dns.lookup
300 // https://github.com/nodejs/node/blob/master/lib/net.js#L986
301 // custom dnslookup require node >= 4.0.0 (for http), node >=8 (for https)
302 // https://github.com/nodejs/node/blob/archived-io.js-v0.12/lib/net.js#L952
303 lookup: lookup,
304 };
305
306 var originHeaderKeys = {};
307 if (args.headers) {
308 // only allow enumerable and ownProperty value of args.headers
309 var names = utility.getOwnEnumerables(args.headers, true);
310 for (var i = 0; i < names.length; i++) {
311 var name = names[i];
312 var key = name.toLowerCase();
313 if (key !== name) {
314 originHeaderKeys[key] = name;
315 }
316 options.headers[key] = args.headers[name];
317 }
318 }
319 if (args.socketPath) {
320 options.socketPath = args.socketPath;
321 }
322
323 var sslNames = [
324 'pfx',
325 'key',
326 'passphrase',
327 'cert',
328 'ca',
329 'ciphers',
330 'rejectUnauthorized',
331 'secureProtocol',
332 'secureOptions',
333 ];
334 for (var i = 0; i < sslNames.length; i++) {
335 var name = sslNames[i];
336 if (args.hasOwnProperty(name)) {
337 options[name] = args[name];
338 }
339 }
340
341 // fix rejectUnauthorized when major version < 12
342 if (NODE_MAJOR_VERSION < 12) {
343 if (options.rejectUnauthorized === false && !options.hasOwnProperty('secureOptions')) {
344 options.secureOptions = require('constants').SSL_OP_NO_TLSv1_2;
345 }
346 }
347
348 var auth = args.auth || parsedUrl.auth;
349 if (auth) {
350 options.auth = auth;
351 }
352
353 var body = null;
354 var dataAsQueryString = false;
355
356 if (args.files) {
357 if (!options.method || options.method === 'GET' || options.method === 'HEAD') {
358 options.method = 'POST';
359 }
360 var files = args.files;
361 var uploadFiles = [];
362 if (Array.isArray(files)) {
363 for (var i = 0; i < files.length; i++) {
364 var field = 'file' + (i === 0 ? '' : i);
365 uploadFiles.push([ field, files[i] ]);
366 }
367 } else {
368 if (Buffer.isBuffer(files) || typeof files.pipe === 'function' || typeof files === 'string') {
369 uploadFiles.push([ 'file', files ]);
370 } else if (typeof files === 'object') {
371 for (var field in files) {
372 uploadFiles.push([ field, files[field] ]);
373 }
374 }
375 }
376 var form = new FormStream();
377 // set normal fields first
378 if (args.data) {
379 for (var fieldName in args.data) {
380 form.field(fieldName, args.data[fieldName]);
381 }
382 }
383
384 for (var i = 0; i < uploadFiles.length; i++) {
385 var item = uploadFiles[i];
386 if (Buffer.isBuffer(item[1])) {
387 form.buffer(item[0], item[1], 'bufferfile' + i);
388 } else if (typeof item[1].pipe === 'function') {
389 var filename = item[1].path || ('streamfile' + i);
390 filename = path.basename(filename);
391 form.stream(item[0], item[1], filename);
392 } else {
393 form.file(item[0], item[1]);
394 }
395 }
396
397 var formHeaders = form.headers();
398 var formHeaderNames = utility.getOwnEnumerables(formHeaders, true);
399 for (var i = 0; i < formHeaderNames.length; i++) {
400 var name = formHeaderNames[i];
401 options.headers[name.toLowerCase()] = formHeaders[name];
402 }
403 debug('set multipart headers: %j, method: %s', formHeaders, options.method);
404 args.stream = form;
405 } else {
406 body = args.content || args.data;
407 dataAsQueryString = method === 'GET' || method === 'HEAD' || args.dataAsQueryString;
408 if (!args.content) {
409 if (body && !(typeof body === 'string' || Buffer.isBuffer(body))) {
410 if (dataAsQueryString) {
411 // read: GET, HEAD, use query string
412 body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
413 } else {
414 var contentType = options.headers['content-type'];
415 // auto add application/x-www-form-urlencoded when using urlencode form request
416 if (!contentType) {
417 if (args.contentType === 'json') {
418 contentType = 'application/json';
419 } else {
420 contentType = 'application/x-www-form-urlencoded';
421 }
422 options.headers['content-type'] = contentType;
423 }
424
425 if (parseContentType(contentType).type === 'application/json') {
426 body = JSON.stringify(body);
427 } else {
428 // 'application/x-www-form-urlencoded'
429 body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
430 }
431 }
432 }
433 }
434 }
435
436 if (body) {
437 // if it's a GET or HEAD request, data should be sent as query string
438 if (dataAsQueryString) {
439 options.path += (parsedUrl.query ? '&' : '?') + body;
440 body = null;
441 }
442
443 if (body) {
444 var length = body.length;
445 if (!Buffer.isBuffer(body)) {
446 length = Buffer.byteLength(body);
447 }
448 requestSize = length;
449
450 options.headers['content-length'] = length.toString();
451 }
452 }
453
454 if (args.dataType === 'json') {
455 if (!options.headers.accept) {
456 options.headers.accept = 'application/json';
457 }
458 }
459
460 if (typeof args.beforeRequest === 'function') {
461 // you can use this hook to change every thing.
462 args.beforeRequest(options);
463 }
464
465 var connectTimer = null;
466 var responseTimer = null;
467 var __err = null;
468 var connected = false; // socket connected or not
469 var keepAliveSocket = false; // request with keepalive socket
470 var socketHandledRequests = 0; // socket already handled request count
471 var socketHandledResponses = 0; // socket already handled response count
472 var responseSize = 0;
473 var statusCode = -1;
474 var statusMessage = null;
475 var responseAborted = false;
476 var remoteAddress = '';
477 var remotePort = '';
478 var timing = null;
479 if (args.timing) {
480 timing = {
481 // socket assigned
482 queuing: 0,
483 // dns lookup time
484 dnslookup: 0,
485 // socket connected
486 connected: 0,
487 // request sent
488 requestSent: 0,
489 // Time to first byte (TTFB)
490 waiting: 0,
491 contentDownload: 0,
492 };
493 }
494
495 function cancelConnectTimer() {
496 if (connectTimer) {
497 clearTimeout(connectTimer);
498 connectTimer = null;
499 debug('Request#%d connect timer canceled', reqId);
500 }
501 }
502 function cancelResponseTimer() {
503 if (responseTimer) {
504 clearTimeout(responseTimer);
505 responseTimer = null;
506 debug('Request#%d response timer canceled', reqId);
507 }
508 }
509
510 function done(err, data, res) {
511 cancelConnectTimer();
512 cancelResponseTimer();
513 if (!callback) {
514 console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s %s callback twice!!!',
515 Date(), reqId, process.pid, options.method, url);
516 // https://github.com/node-modules/urllib/pull/30
517 if (err) {
518 console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s: %s\nstack: %s',
519 Date(), reqId, process.pid, err.name, err.message, err.stack);
520 }
521 return;
522 }
523 var cb = callback;
524 callback = null;
525 var headers = {};
526 if (res) {
527 statusCode = res.statusCode;
528 statusMessage = res.statusMessage;
529 headers = res.headers;
530 }
531
532 // handle digest auth
533 if (statusCode === 401 && headers['www-authenticate']
534 && !options.headers.authorization && args.digestAuth) {
535 var authenticate = headers['www-authenticate'];
536 if (authenticate.indexOf('Digest ') >= 0) {
537 debug('Request#%d %s: got digest auth header WWW-Authenticate: %s', reqId, url, authenticate);
538 options.headers.authorization = digestAuthHeader(options.method, options.path, authenticate, args.digestAuth);
539 debug('Request#%d %s: auth with digest header: %s', reqId, url, options.headers.authorization);
540 if (res.headers['set-cookie']) {
541 options.headers.cookie = res.headers['set-cookie'].join(';');
542 }
543 args.headers = options.headers;
544 return exports.requestWithCallback(url, args, cb);
545 }
546 }
547
548 var requestUseTime = Date.now() - requestStartTime;
549 if (timing) {
550 timing.contentDownload = requestUseTime;
551 }
552
553 debug('[%sms] done, %s bytes HTTP %s %s %s %s, keepAliveSocket: %s, timing: %j, socketHandledRequests: %s, socketHandledResponses: %s',
554 requestUseTime, responseSize, statusCode, options.method, options.host, options.path,
555 keepAliveSocket, timing, socketHandledRequests, socketHandledResponses);
556
557 var response = {
558 status: statusCode,
559 statusCode: statusCode,
560 statusMessage: statusMessage,
561 headers: headers,
562 size: responseSize,
563 aborted: responseAborted,
564 rt: requestUseTime,
565 keepAliveSocket: keepAliveSocket,
566 data: data,
567 requestUrls: args.requestUrls,
568 timing: timing,
569 remoteAddress: remoteAddress,
570 remotePort: remotePort,
571 socketHandledRequests: socketHandledRequests,
572 socketHandledResponses: socketHandledResponses,
573 };
574
575 if (err) {
576 var agentStatus = '';
577 if (agent && typeof agent.getCurrentStatus === 'function') {
578 // add current agent status to error message for logging and debug
579 agentStatus = ', agent status: ' + JSON.stringify(agent.getCurrentStatus());
580 }
581 err.message += ', ' + options.method + ' ' + url + ' ' + statusCode
582 + ' (connected: ' + connected + ', keepalive socket: ' + keepAliveSocket + agentStatus
583 + ', socketHandledRequests: ' + socketHandledRequests
584 + ', socketHandledResponses: ' + socketHandledResponses + ')'
585 + '\nheaders: ' + JSON.stringify(headers);
586 err.data = data;
587 err.path = options.path;
588 err.status = statusCode;
589 err.headers = headers;
590 err.res = response;
591 addLongStackTrace(err, req);
592 }
593
594 // only support agentkeepalive module for now
595 // agentkeepalive@4: agent.options.freeSocketTimeout
596 // agentkeepalive@3: agent.freeSocketKeepAliveTimeout
597 var freeSocketTimeout = agent && (agent.options && agent.options.freeSocketTimeout || agent.freeSocketKeepAliveTimeout);
598 if (agent && agent.keepAlive && freeSocketTimeout > 0 &&
599 statusCode >= 200 && headers.connection === 'keep-alive' && headers['keep-alive']) {
600 // adjust freeSocketTimeout on the socket
601 var m = KEEP_ALIVE_RE.exec(headers['keep-alive']);
602 if (m) {
603 var seconds = parseInt(m[1]);
604 if (seconds > 0) {
605 // network delay 500ms
606 var serverSocketTimeout = seconds * 1000 - 500;
607 if (serverSocketTimeout < freeSocketTimeout) {
608 // https://github.com/node-modules/agentkeepalive/blob/master/lib/agent.js#L127
609 // agentkeepalive@4
610 var socket = res.socket || (req && req.socket);
611 if (agent.options && agent.options.freeSocketTimeout) {
612 socket.freeSocketTimeout = serverSocketTimeout;
613 } else {
614 socket.freeSocketKeepAliveTimeout = serverSocketTimeout;
615 }
616 }
617 }
618 }
619 }
620
621 cb(err, data, args.streaming ? res : response);
622
623 if (args.emitter) {
624 // keep to use the same reqMeta object on request event before
625 reqMeta.url = parsedUrl.href;
626 reqMeta.socket = req && req.connection;
627 reqMeta.options = options;
628 reqMeta.size = requestSize;
629
630 args.emitter.emit('response', {
631 requestId: reqId,
632 error: err,
633 ctx: args.ctx,
634 req: reqMeta,
635 res: response,
636 });
637 }
638 }
639
640 function handleRedirect(res) {
641 var err = null;
642 if (args.followRedirect && statuses.redirect[res.statusCode]) { // handle redirect
643 args._followRedirectCount = (args._followRedirectCount || 0) + 1;
644 var location = res.headers.location;
645 if (!location) {
646 err = new Error('Got statusCode ' + res.statusCode + ' but cannot resolve next location from headers');
647 err.name = 'FollowRedirectError';
648 } else if (args._followRedirectCount > args.maxRedirects) {
649 err = new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + url);
650 err.name = 'MaxRedirectError';
651 } else {
652 var newUrl = args.formatRedirectUrl ? args.formatRedirectUrl(url, location) : urlutil.resolve(url, location);
653 debug('Request#%d %s: `redirected` from %s to %s', reqId, options.path, url, newUrl);
654 // make sure timer stop
655 cancelResponseTimer();
656 // should clean up headers.host on `location: http://other-domain/url`
657 if (options.headers.host && PROTO_RE.test(location)) {
658 options.headers.host = null;
659 args.headers = options.headers;
660 }
661 // avoid done will be execute in the future change.
662 var cb = callback;
663 callback = null;
664 exports.requestWithCallback(newUrl, args, cb);
665 return {
666 redirect: true,
667 error: null
668 };
669 }
670 }
671 return {
672 redirect: false,
673 error: err
674 };
675 }
676
677 // don't set user-agent
678 if (args.headers && (args.headers['User-Agent'] === null || args.headers['user-agent'] === null)) {
679 if (options.headers['user-agent']) {
680 delete options.headers['user-agent'];
681 }
682 } else {
683 // need to set user-agent
684 var hasAgentHeader = options.headers['user-agent'];
685 if (!hasAgentHeader) {
686 options.headers['user-agent'] = USER_AGENT;
687 }
688 }
689
690 if (args.gzip) {
691 var isAcceptEncodingNull = (args.headers && (args.headers['Accept-Encoding'] === null || args.headers['accept-encoding'] === null));
692 if (!isAcceptEncodingNull) {
693 var hasAcceptEncodingHeader = options.headers['accept-encoding'];
694 if (!hasAcceptEncodingHeader) {
695 options.headers['accept-encoding'] = 'gzip, deflate';
696 }
697 }
698 }
699
700 function decodeContent(res, body, cb) {
701 var encoding = res.headers['content-encoding'];
702 if (body.length === 0 || !encoding) {
703 return cb(null, body, encoding);
704 }
705
706 encoding = encoding.toLowerCase();
707 switch (encoding) {
708 case 'gzip':
709 case 'deflate':
710 debug('unzip %d length body', body.length);
711 zlib.unzip(body, function(err, data) {
712 if (err && err.name === 'Error') {
713 err.name = 'UnzipError';
714 }
715 cb(err, data);
716 });
717 break;
718 default:
719 cb(null, body, encoding);
720 }
721 }
722
723 var writeStream = args.writeStream;
724 var isWriteStreamClose = false;
725
726 debug('Request#%d %s %s with headers %j, options.path: %s',
727 reqId, method, url, options.headers, options.path);
728
729 args.requestUrls.push(parsedUrl.href);
730
731 function onResponse(res) {
732 socketHandledResponses = res.socket[SOCKET_RESPONSE_COUNT] = (res.socket[SOCKET_RESPONSE_COUNT] || 0) + 1;
733 if (timing) {
734 timing.waiting = Date.now() - requestStartTime;
735 }
736 debug('Request#%d %s `req response` event emit: status %d, headers: %j',
737 reqId, url, res.statusCode, res.headers);
738
739 if (args.streaming) {
740 var result = handleRedirect(res);
741 if (result.redirect) {
742 res.resume();
743 return;
744 }
745 if (result.error) {
746 res.resume();
747 return done(result.error, null, res);
748 }
749
750 return done(null, null, res);
751 }
752
753 res.on('error', function () {
754 debug('Request#%d %s: `res error` event emit, total size %d, socket handled %s requests and %s responses',
755 reqId, url, responseSize, socketHandledRequests, socketHandledResponses);
756 });
757
758 res.on('aborted', function () {
759 responseAborted = true;
760 debug('Request#%d %s: `res aborted` event emit, total size %d',
761 reqId, url, responseSize);
762 });
763
764 if (writeStream) {
765 // If there's a writable stream to recieve the response data, just pipe the
766 // response stream to that writable stream and call the callback when it has
767 // finished writing.
768 //
769 // NOTE that when the response stream `res` emits an 'end' event it just
770 // means that it has finished piping data to another stream. In the
771 // meanwhile that writable stream may still writing data to the disk until
772 // it emits a 'close' event.
773 //
774 // That means that we should not apply callback until the 'close' of the
775 // writable stream is emited.
776 //
777 // See also:
778 // - https://github.com/TBEDP/urllib/commit/959ac3365821e0e028c231a5e8efca6af410eabb
779 // - http://nodejs.org/api/stream.html#stream_event_end
780 // - http://nodejs.org/api/stream.html#stream_event_close_1
781 var result = handleRedirect(res);
782 if (result.redirect) {
783 res.resume();
784 return;
785 }
786 if (result.error) {
787 res.resume();
788 // end ths stream first
789 writeStream.end();
790 done(result.error, null, res);
791 return;
792 }
793
794 // you can set consumeWriteStream false that only wait response end
795 if (args.consumeWriteStream === false) {
796 res.on('end', done.bind(null, null, null, res));
797 pump(res, writeStream, function(err) {
798 if (isWriteStreamClose) {
799 return;
800 }
801 isWriteStreamClose = true;
802 debug('Request#%d %s: writeStream close, error: %s', reqId, url, err);
803 });
804 return;
805 }
806
807 // node 0.10, 0.12: only emit res aborted, writeStream close not fired
808 if (isNode010 || isNode012) {
809 first([
810 [ writeStream, 'close' ],
811 [ res, 'aborted' ],
812 ], function(_, stream, event) {
813 debug('Request#%d %s: writeStream or res %s event emitted', reqId, url, event);
814 done(__err || null, null, res);
815 });
816 res.pipe(writeStream);
817 return;
818 }
819
820 debug('Request#%d %s: pump res to writeStream', reqId, url);
821 pump(res, writeStream, function(err) {
822 debug('Request#%d %s: writeStream close event emitted, error: %s, isWriteStreamClose: %s',
823 reqId, url, err, isWriteStreamClose);
824 if (isWriteStreamClose) {
825 return;
826 }
827 isWriteStreamClose = true;
828 done(__err || err, null, res);
829 });
830 return;
831 }
832
833 // Otherwise, just concat those buffers.
834 //
835 // NOTE that the `chunk` is not a String but a Buffer. It means that if
836 // you simply concat two chunk with `+` you're actually converting both
837 // Buffers into Strings before concating them. It'll cause problems when
838 // dealing with multi-byte characters.
839 //
840 // The solution is to store each chunk in an array and concat them with
841 // 'buffer-concat' when all chunks is recieved.
842 //
843 // See also:
844 // http://cnodejs.org/topic/4faf65852e8fb5bc65113403
845
846 var chunks = [];
847
848 res.on('data', function (chunk) {
849 debug('Request#%d %s: `res data` event emit, size %d', reqId, url, chunk.length);
850 responseSize += chunk.length;
851 chunks.push(chunk);
852 });
853
854 var isEmitted = false;
855 function handleResponseCloseAndEnd(event) {
856 debug('Request#%d %s: `res %s` event emit, total size %d, socket handled %s requests and %s responses',
857 reqId, url, event, responseSize, socketHandledRequests, socketHandledResponses);
858 if (isEmitted) {
859 return;
860 }
861 isEmitted = true;
862
863 var body = Buffer.concat(chunks, responseSize);
864 debug('Request#%d %s: _dumped: %s',
865 reqId, url, res._dumped);
866
867 if (__err) {
868 // req.abort() after `res data` event emit.
869 return done(__err, body, res);
870 }
871
872 var result = handleRedirect(res);
873 if (result.error) {
874 return done(result.error, body, res);
875 }
876 if (result.redirect) {
877 return;
878 }
879
880 decodeContent(res, body, function (err, data, encoding) {
881 if (err) {
882 return done(err, body, res);
883 }
884 // if body not decode, dont touch it
885 if (!encoding && TEXT_DATA_TYPES.indexOf(args.dataType) >= 0) {
886 // try to decode charset
887 try {
888 data = decodeBodyByCharset(data, res);
889 } catch (e) {
890 debug('decodeBodyByCharset error: %s', e);
891 // if error, dont touch it
892 return done(null, data, res);
893 }
894
895 if (args.dataType === 'json') {
896 if (responseSize === 0) {
897 data = null;
898 } else {
899 var r = parseJSON(data, fixJSONCtlChars);
900 if (r.error) {
901 err = r.error;
902 } else {
903 data = r.data;
904 }
905 }
906 }
907 }
908
909 if (responseAborted) {
910 // err = new Error('Remote socket was terminated before `response.end()` was called');
911 // err.name = 'RemoteSocketClosedError';
912 debug('Request#%d %s: Remote socket was terminated before `response.end()` was called', reqId, url);
913 }
914
915 done(err, data, res);
916 });
917 }
918
919 // node >= 14 only emit close if req abort
920 res.on('close', function () {
921 handleResponseCloseAndEnd('close');
922 });
923 res.on('end', function () {
924 handleResponseCloseAndEnd('end');
925 });
926 }
927
928 var connectTimeout, responseTimeout;
929 if (Array.isArray(args.timeout)) {
930 connectTimeout = ms(args.timeout[0]);
931 responseTimeout = ms(args.timeout[1]);
932 } else { // set both timeout equal
933 connectTimeout = responseTimeout = ms(args.timeout);
934 }
935 debug('ConnectTimeout: %d, ResponseTimeout: %d', connectTimeout, responseTimeout);
936
937 function startConnectTimer() {
938 debug('Connect timer ticking, timeout: %d', connectTimeout);
939 connectTimer = setTimeout(function () {
940 connectTimer = null;
941 if (statusCode === -1) {
942 statusCode = -2;
943 }
944 var msg = 'Connect timeout for ' + connectTimeout + 'ms';
945 var errorName = 'ConnectionTimeoutError';
946 if (!req.socket) {
947 errorName = 'SocketAssignTimeoutError';
948 msg += ', working sockets is full';
949 }
950 __err = new Error(msg);
951 __err.name = errorName;
952 __err.requestId = reqId;
953 debug('ConnectTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
954 abortRequest();
955 }, connectTimeout);
956 }
957
958 function startResposneTimer() {
959 debug('Response timer ticking, timeout: %d', responseTimeout);
960 responseTimer = setTimeout(function () {
961 responseTimer = null;
962 var msg = 'Response timeout for ' + responseTimeout + 'ms';
963 var errorName = 'ResponseTimeoutError';
964 __err = new Error(msg);
965 __err.name = errorName;
966 __err.requestId = reqId;
967 debug('ResponseTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
968 abortRequest();
969 }, responseTimeout);
970 }
971
972 if (args.checkAddress) {
973 var hostname = parsedUrl.hostname;
974 // if request hostname is ip, custom lookup wont excute
975 var family = null;
976 if (ip.isV4Format(hostname)) {
977 family = 4;
978 } else if (ip.isV6Format(hostname)) {
979 family = 6;
980 }
981 if (family) {
982 if (!args.checkAddress(hostname, family)) {
983 var err = new Error('illegal address');
984 err.name = 'IllegalAddressError';
985 err.hostname = hostname;
986 err.ip = hostname;
987 err.family = family;
988 return done(err);
989 }
990 }
991 }
992
993 // request headers checker will throw error
994 try {
995 var finalOptions = options;
996
997 // restore origin header key
998 if (args.keepHeaderCase) {
999 var originKeys = Object.keys(originHeaderKeys);
1000 if (originKeys.length) {
1001 var finalHeaders = {};
1002 var names = utility.getOwnEnumerables(options.headers, true);
1003 for (var i = 0; i < names.length; i++) {
1004 var name = names[i];
1005 finalHeaders[originHeaderKeys[name] || name] = options.headers[name];
1006 }
1007
1008 finalOptions = Object.assign({}, options);
1009 finalOptions.headers = finalHeaders;
1010 }
1011 }
1012
1013 req = httplib.request(finalOptions, onResponse);
1014 if (args.trace) {
1015 req._callSite = {};
1016 Error.captureStackTrace(req._callSite, requestWithCallback);
1017 }
1018 } catch (err) {
1019 return done(err);
1020 }
1021
1022 // environment detection: browser or nodejs
1023 if (typeof(window) === 'undefined') {
1024 // start connect timer just after `request` return, and just in nodejs environment
1025 startConnectTimer();
1026 }
1027
1028 var isRequestAborted = false;
1029 function abortRequest() {
1030 if (isRequestAborted) {
1031 return;
1032 }
1033 isRequestAborted = true;
1034
1035 debug('Request#%d %s abort, connected: %s', reqId, url, connected);
1036 // it wont case error event when req haven't been assigned a socket yet.
1037 if (!req.socket) {
1038 __err.noSocket = true;
1039 done(__err);
1040 }
1041 req.abort();
1042 }
1043
1044 if (timing) {
1045 // request sent
1046 req.on('finish', function() {
1047 timing.requestSent = Date.now() - requestStartTime;
1048 });
1049 }
1050
1051 req.once('socket', function (socket) {
1052 if (timing) {
1053 // socket queuing time
1054 timing.queuing = Date.now() - requestStartTime;
1055 }
1056
1057 // https://github.com/nodejs/node/blob/master/lib/net.js#L377
1058 // https://github.com/nodejs/node/blob/v0.10.40-release/lib/net.js#L352
1059 // should use socket.socket on 0.10.x
1060 if (isNode010 && socket.socket) {
1061 socket = socket.socket;
1062 }
1063
1064 var orginalSocketTimeout = getSocketTimeout(socket);
1065 if (orginalSocketTimeout && orginalSocketTimeout < responseTimeout) {
1066 // make sure socket live longer than the response timer
1067 var socketTimeout = responseTimeout + 500;
1068 debug('Request#%d socket.timeout(%s) < responseTimeout(%s), reset socket timeout to %s',
1069 reqId, orginalSocketTimeout, responseTimeout, socketTimeout);
1070 socket.setTimeout(socketTimeout);
1071 }
1072
1073 socketHandledRequests = socket[SOCKET_REQUEST_COUNT] = (socket[SOCKET_REQUEST_COUNT] || 0) + 1;
1074 if (socket[SOCKET_RESPONSE_COUNT]) {
1075 socketHandledResponses = socket[SOCKET_RESPONSE_COUNT];
1076 }
1077
1078 var readyState = socket.readyState;
1079 if (readyState === 'opening') {
1080 socket.once('lookup', function(err, ip, addressType) {
1081 debug('Request#%d %s lookup: %s, %s, %s', reqId, url, err, ip, addressType);
1082 if (timing) {
1083 timing.dnslookup = Date.now() - requestStartTime;
1084 }
1085 if (ip) {
1086 remoteAddress = ip;
1087 }
1088 });
1089 socket.once('connect', function() {
1090 if (timing) {
1091 // socket connected
1092 timing.connected = Date.now() - requestStartTime;
1093 }
1094
1095 // cancel socket timer at first and start tick for TTFB
1096 cancelConnectTimer();
1097 startResposneTimer();
1098
1099 debug('Request#%d %s new socket connected', reqId, url);
1100 connected = true;
1101 if (!remoteAddress) {
1102 remoteAddress = socket.remoteAddress;
1103 }
1104 remotePort = socket.remotePort;
1105 });
1106 return;
1107 }
1108
1109 debug('Request#%d %s reuse socket connected, readyState: %s', reqId, url, readyState);
1110 connected = true;
1111 keepAliveSocket = true;
1112 if (!remoteAddress) {
1113 remoteAddress = socket.remoteAddress;
1114 }
1115 remotePort = socket.remotePort;
1116
1117 // reuse socket, timer should be canceled.
1118 cancelConnectTimer();
1119 startResposneTimer();
1120 });
1121
1122 if (writeStream) {
1123 writeStream.once('error', function(err) {
1124 err.message += ' (writeStream "error")';
1125 __err = err;
1126 debug('Request#%d %s `writeStream error` event emit, %s: %s', reqId, url, err.name, err.message);
1127 abortRequest();
1128 });
1129 }
1130
1131 var isRequestError = false;
1132 function handleRequestError(err) {
1133 if (isRequestError || !err) {
1134 return;
1135 }
1136 isRequestError = true;
1137
1138 if (err.name === 'Error') {
1139 err.name = connected ? 'ResponseError' : 'RequestError';
1140 }
1141 debug('Request#%d %s `req error` event emit, %s: %s', reqId, url, err.name, err.message);
1142 done(__err || err);
1143 }
1144 if (args.stream) {
1145 debug('Request#%d pump args.stream to req', reqId);
1146 pump(args.stream, req, handleRequestError);
1147 } else {
1148 req.end(body);
1149 }
1150 // when stream already consumed, req's `finish` event is emitted and pump will ignore error after pipe finished
1151 // but if server response timeout later, we will abort the request and emit an error in req
1152 // so we must always manually listen to req's `error` event here to ensure this error is handled
1153 req.on('error', handleRequestError);
1154 req.requestId = reqId;
1155 return req;
1156}
1157
1158exports.requestWithCallback = requestWithCallback;
1159
1160var JSONCtlCharsMap = {
1161 '"': '\\"', // \u0022
1162 '\\': '\\\\', // \u005c
1163 '\b': '\\b', // \u0008
1164 '\f': '\\f', // \u000c
1165 '\n': '\\n', // \u000a
1166 '\r': '\\r', // \u000d
1167 '\t': '\\t' // \u0009
1168};
1169var JSONCtlCharsRE = /[\u0000-\u001F\u005C]/g;
1170
1171function _replaceOneChar(c) {
1172 return JSONCtlCharsMap[c] || '\\u' + (c.charCodeAt(0) + 0x10000).toString(16).substr(1);
1173}
1174
1175function replaceJSONCtlChars(str) {
1176 return str.replace(JSONCtlCharsRE, _replaceOneChar);
1177}
1178
1179function parseJSON(data, fixJSONCtlChars) {
1180 var result = {
1181 error: null,
1182 data: null
1183 };
1184 if (fixJSONCtlChars) {
1185 if (typeof fixJSONCtlChars === 'function') {
1186 data = fixJSONCtlChars(data);
1187 } else {
1188 // https://github.com/node-modules/urllib/pull/77
1189 // remote the control characters (U+0000 through U+001F)
1190 data = replaceJSONCtlChars(data);
1191 }
1192 }
1193 try {
1194 result.data = JSON.parse(data);
1195 } catch (err) {
1196 if (err.name === 'SyntaxError') {
1197 err.name = 'JSONResponseFormatError';
1198 }
1199 if (data.length > 1024) {
1200 // show 0~512 ... -512~end data
1201 err.message += ' (data json format: ' +
1202 JSON.stringify(data.slice(0, 512)) + ' ...skip... ' + JSON.stringify(data.slice(data.length - 512)) + ')';
1203 } else {
1204 err.message += ' (data json format: ' + JSON.stringify(data) + ')';
1205 }
1206 result.error = err;
1207 }
1208 return result;
1209}
1210
1211
1212/**
1213 * decode response body by parse `content-type`'s charset
1214 * @param {Buffer} data
1215 * @param {Http(s)Response} res
1216 * @return {String}
1217 */
1218function decodeBodyByCharset(data, res) {
1219 var type = res.headers['content-type'];
1220 if (!type) {
1221 return data.toString();
1222 }
1223
1224 var type = parseContentType(type);
1225 var charset = type.parameters.charset || 'utf-8';
1226
1227 if (!Buffer.isEncoding(charset)) {
1228 if (!_iconv) {
1229 _iconv = require('iconv-lite');
1230 }
1231 return _iconv.decode(data, charset);
1232 }
1233
1234 return data.toString(charset);
1235}
1236
1237function getAgent(agent, defaultAgent) {
1238 return agent === undefined ? defaultAgent : agent;
1239}
1240
1241function parseContentType(str) {
1242 try {
1243 return contentTypeParser.parse(str);
1244 } catch (err) {
1245 // ignore content-type error, tread as default
1246 return { parameters: {} };
1247 }
1248}
1249
1250function addLongStackTrace(err, req) {
1251 if (!req) {
1252 return;
1253 }
1254 var callSiteStack = req._callSite && req._callSite.stack;
1255 if (!callSiteStack || typeof callSiteStack !== 'string') {
1256 return;
1257 }
1258 if (err._longStack) {
1259 return;
1260 }
1261 var index = callSiteStack.indexOf('\n');
1262 if (index !== -1) {
1263 err._longStack = true;
1264 err.stack += LONG_STACK_DELIMITER + callSiteStack.substr(index + 1);
1265 }
1266}
1267
1268// node 8 don't has timeout attribute on socket
1269// https://github.com/nodejs/node/pull/21204/files#diff-e6ef024c3775d787c38487a6309e491dR408
1270function getSocketTimeout(socket) {
1271 return socket.timeout || socket._idleTimeout;
1272}