UNPKG

23.4 kBJavaScriptView Raw
1'use strict';
2
3var util = require('util');
4var urlutil = require('url');
5var http = require('http');
6var https = require('https');
7var debug = require('debug')('urllib');
8var ms = require('humanize-ms');
9
10var _Promise;
11
12var REQUEST_ID = 0;
13var MAX_VALUE = Math.pow(2, 31) - 10;
14var PROTO_RE = /^https?:\/\//i;
15
16function getAgent(agent, defaultAgent) {
17 return agent === undefined ? defaultAgent : agent;
18}
19
20function makeCallback(resolve, reject) {
21 return function (err, data, res) {
22 if (err) {
23 return reject(err);
24 }
25 resolve({
26 data: data,
27 status: res.statusCode,
28 headers: res.headers,
29 res: res
30 });
31 };
32}
33
34// exports.TIMEOUT = ms('5s');
35exports.TIMEOUTS = [ms('300s'), ms('300s')];
36
37var TEXT_DATA_TYPES = [
38 'json',
39 'text'
40];
41
42exports.request = function request(url, args, callback) {
43 // request(url, callback)
44 if (arguments.length === 2 && typeof args === 'function') {
45 callback = args;
46 args = null;
47 }
48 if (typeof callback === 'function') {
49 return exports.requestWithCallback(url, args, callback);
50 }
51
52 return new Promise(function (resolve, reject) {
53 exports.requestWithCallback(url, args, makeCallback(resolve, reject));
54 });
55};
56
57
58exports.requestWithCallback = function requestWithCallback(url, args, callback) {
59 // requestWithCallback(url, callback)
60 if (!url || (typeof url !== 'string' && typeof url !== 'object')) {
61 var msg = util.format('expect request url to be a string or a http request options, but got %j', url);
62 throw new Error(msg);
63 }
64
65 if (arguments.length === 2 && typeof args === 'function') {
66 callback = args;
67 args = null;
68 }
69
70 args = args || {};
71 if (REQUEST_ID >= MAX_VALUE) {
72 REQUEST_ID = 0;
73 }
74 var reqId = ++REQUEST_ID;
75
76 args.requestUrls = args.requestUrls || [];
77
78 var reqMeta = {
79 requestId: reqId,
80 url: url,
81 args: args,
82 ctx: args.ctx,
83 };
84 if (args.emitter) {
85 args.emitter.emit('request', reqMeta);
86 }
87
88 args.timeout = args.timeout || exports.TIMEOUTS;
89 args.maxRedirects = args.maxRedirects || 10;
90 args.streaming = args.streaming || args.customResponse;
91 var requestStartTime = Date.now();
92 var parsedUrl;
93
94 if (typeof url === 'string') {
95 if (!PROTO_RE.test(url)) {
96 // Support `request('www.server.com')`
97 url = 'http://' + url;
98 }
99 parsedUrl = urlutil.parse(url);
100 } else {
101 parsedUrl = url;
102 }
103
104 var method = (args.type || args.method || parsedUrl.method || 'GET').toUpperCase();
105 var port = parsedUrl.port || 80;
106 var httplib = http;
107 var agent = getAgent(args.agent, exports.agent);
108 var fixJSONCtlChars = args.fixJSONCtlChars;
109
110 if (parsedUrl.protocol === 'https:') {
111 httplib = https;
112 agent = getAgent(args.httpsAgent, exports.httpsAgent);
113
114 if (!parsedUrl.port) {
115 port = 443;
116 }
117 }
118
119 // request through proxy tunnel
120 // var proxyTunnelAgent = detectProxyAgent(parsedUrl, args);
121 // if (proxyTunnelAgent) {
122 // agent = proxyTunnelAgent;
123 // }
124
125 var options = {
126 host: parsedUrl.hostname || parsedUrl.host || 'localhost',
127 path: parsedUrl.path || '/',
128 method: method,
129 port: port,
130 agent: agent,
131 headers: args.headers || {},
132 // default is dns.lookup
133 // https://github.com/nodejs/node/blob/master/lib/net.js#L986
134 // custom dnslookup require node >= 4.0.0
135 // https://github.com/nodejs/node/blob/archived-io.js-v0.12/lib/net.js#L952
136 lookup: args.lookup,
137 };
138
139 if (Array.isArray(args.timeout)) {
140 options.requestTimeout = args.timeout[args.timeout.length - 1];
141 } else if (typeof args.timeout !== 'undefined') {
142 options.requestTimeout = args.timeout;
143 }
144
145 var sslNames = [
146 'pfx',
147 'key',
148 'passphrase',
149 'cert',
150 'ca',
151 'ciphers',
152 'rejectUnauthorized',
153 'secureProtocol',
154 'secureOptions',
155 ];
156 for (var i = 0; i < sslNames.length; i++) {
157 var name = sslNames[i];
158 if (args.hasOwnProperty(name)) {
159 options[name] = args[name];
160 }
161 }
162
163 // don't check ssl
164 if (options.rejectUnauthorized === false && !options.hasOwnProperty('secureOptions')) {
165 options.secureOptions = require('constants').SSL_OP_NO_TLSv1_2;
166 }
167
168 var auth = args.auth || parsedUrl.auth;
169 if (auth) {
170 options.auth = auth;
171 }
172
173 var body = args.content || args.data;
174 var dataAsQueryString = method === 'GET' || method === 'HEAD' || args.dataAsQueryString;
175 if (!args.content) {
176 if (body && !(typeof body === 'string' || Buffer.isBuffer(body))) {
177 if (dataAsQueryString) {
178 // read: GET, HEAD, use query string
179 body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
180 } else {
181 var contentType = options.headers['Content-Type'] || options.headers['content-type'];
182 // auto add application/x-www-form-urlencoded when using urlencode form request
183 if (!contentType) {
184 if (args.contentType === 'json') {
185 contentType = 'application/json';
186 } else {
187 contentType = 'application/x-www-form-urlencoded';
188 }
189 options.headers['Content-Type'] = contentType;
190 }
191
192 if (parseContentType(contentType).type === 'application/json') {
193 body = JSON.stringify(body);
194 } else {
195 // 'application/x-www-form-urlencoded'
196 body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body);
197 }
198 }
199 }
200 }
201
202 // if it's a GET or HEAD request, data should be sent as query string
203 if (dataAsQueryString && body) {
204 options.path += (parsedUrl.query ? '&' : '?') + body;
205 body = null;
206 }
207
208 var requestSize = 0;
209 if (body) {
210 var length = body.length;
211 if (!Buffer.isBuffer(body)) {
212 length = Buffer.byteLength(body);
213 }
214 requestSize = options.headers['Content-Length'] = length;
215 }
216
217 if (args.dataType === 'json') {
218 options.headers.Accept = 'application/json';
219 }
220
221 if (typeof args.beforeRequest === 'function') {
222 // you can use this hook to change every thing.
223 args.beforeRequest(options);
224 }
225 var connectTimer = null;
226 var responseTimer = null;
227 var __err = null;
228 var connected = false; // socket connected or not
229 var keepAliveSocket = false; // request with keepalive socket
230 var responseSize = 0;
231 var statusCode = -1;
232 var responseAborted = false;
233 var remoteAddress = '';
234 var remotePort = '';
235 var timing = null;
236 if (args.timing) {
237 timing = {
238 // socket assigned
239 queuing: 0,
240 // dns lookup time
241 dnslookup: 0,
242 // socket connected
243 connected: 0,
244 // request sent
245 requestSent: 0,
246 // Time to first byte (TTFB)
247 waiting: 0,
248 contentDownload: 0,
249 };
250 }
251
252 function cancelConnectTimer() {
253 if (connectTimer) {
254 clearTimeout(connectTimer);
255 connectTimer = null;
256 }
257 }
258 function cancelResponseTimer() {
259 if (responseTimer) {
260 clearTimeout(responseTimer);
261 responseTimer = null;
262 }
263 }
264
265 function done(err, data, res) {
266 cancelResponseTimer();
267 if (!callback) {
268 console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s %s callback twice!!!',
269 Date(), reqId, process.pid, options.method, url);
270 // https://github.com/node-modules/urllib/pull/30
271 if (err) {
272 console.warn('[urllib:warn] [%s] [%s] [worker:%s] %s: %s\nstack: %s',
273 Date(), reqId, process.pid, err.name, err.message, err.stack);
274 }
275 return;
276 }
277 var cb = callback;
278 callback = null;
279 var headers = {};
280 if (res) {
281 statusCode = res.statusCode;
282 headers = res.headers;
283 }
284
285 // handle digest auth
286 if (statusCode === 401 && headers['www-authenticate']
287 && (!args.headers || !args.headers.Authorization) && args.digestAuth) {
288 var authenticate = headers['www-authenticate'];
289 if (authenticate.indexOf('Digest ') >= 0) {
290 debug('Request#%d %s: got digest auth header WWW-Authenticate: %s', reqId, url, authenticate);
291 args.headers = args.headers || {};
292 args.headers.Authorization = digestAuthHeader(options.method, options.path, authenticate, args.digestAuth);
293 debug('Request#%d %s: auth with digest header: %s', reqId, url, args.headers.Authorization);
294 if (res.headers['set-cookie']) {
295 args.headers.Cookie = res.headers['set-cookie'].join(';');
296 }
297 return exports.requestWithCallback(url, args, cb);
298 }
299 }
300
301 var requestUseTime = Date.now() - requestStartTime;
302 if (timing) {
303 timing.contentDownload = requestUseTime;
304 }
305
306 debug('[%sms] done, %s bytes HTTP %s %s %s %s, keepAliveSocket: %s, timing: %j',
307 requestUseTime, responseSize, statusCode, options.method, options.host, options.path,
308 keepAliveSocket, timing);
309
310 var response = {
311 status: statusCode,
312 statusCode: statusCode,
313 headers: headers,
314 size: responseSize,
315 aborted: responseAborted,
316 rt: requestUseTime,
317 keepAliveSocket: keepAliveSocket,
318 data: data,
319 requestUrls: args.requestUrls,
320 timing: timing,
321 remoteAddress: remoteAddress,
322 remotePort: remotePort,
323 };
324
325 if (err) {
326 var agentStatus = '';
327 if (agent && typeof agent.getCurrentStatus === 'function') {
328 // add current agent status to error message for logging and debug
329 agentStatus = ', agent status: ' + JSON.stringify(agent.getCurrentStatus());
330 }
331 err.message += ', ' + options.method + ' ' + url + ' ' + statusCode
332 + ' (connected: ' + connected + ', keepalive socket: ' + keepAliveSocket + agentStatus + ')'
333 + '\nheaders: ' + JSON.stringify(headers);
334 err.data = data;
335 err.path = options.path;
336 err.status = statusCode;
337 err.headers = headers;
338 err.res = response;
339 }
340
341 cb(err, data, args.streaming ? res : response);
342
343 if (args.emitter) {
344 // keep to use the same reqMeta object on request event before
345 reqMeta.url = url;
346 reqMeta.socket = req && req.connection;
347 reqMeta.options = options;
348 reqMeta.size = requestSize;
349
350 args.emitter.emit('response', {
351 requestId: reqId,
352 error: err,
353 ctx: args.ctx,
354 req: reqMeta,
355 res: response,
356 });
357 }
358 }
359
360 function handleRedirect(res) {
361 var err = null;
362 if (args.followRedirect && statuses.redirect[res.statusCode]) { // handle redirect
363 args._followRedirectCount = (args._followRedirectCount || 0) + 1;
364 var location = res.headers.location;
365 if (!location) {
366 err = new Error('Got statusCode ' + res.statusCode + ' but cannot resolve next location from headers');
367 err.name = 'FollowRedirectError';
368 } else if (args._followRedirectCount > args.maxRedirects) {
369 err = new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + url);
370 err.name = 'MaxRedirectError';
371 } else {
372 var newUrl = args.formatRedirectUrl ? args.formatRedirectUrl(url, location) : urlutil.resolve(url, location);
373 debug('Request#%d %s: `redirected` from %s to %s', reqId, options.path, url, newUrl);
374 // make sure timer stop
375 cancelResponseTimer();
376 // should clean up headers.Host on `location: http://other-domain/url`
377 if (args.headers && args.headers.Host && PROTO_RE.test(location)) {
378 args.headers.Host = null;
379 }
380 // avoid done will be execute in the future change.
381 var cb = callback;
382 callback = null;
383 exports.requestWithCallback(newUrl, args, cb);
384 return {
385 redirect: true,
386 error: null
387 };
388 }
389 }
390 return {
391 redirect: false,
392 error: err
393 };
394 }
395
396
397 if (args.gzip) {
398 if (!options.headers['Accept-Encoding'] && !options.headers['accept-encoding']) {
399 options.headers['Accept-Encoding'] = 'gzip';
400 }
401 }
402
403 function decodeContent(res, body, cb) {
404 var encoding = res.headers['content-encoding'];
405 // if (body.length === 0) {
406 // return cb(null, body, encoding);
407 // }
408
409 // if (!encoding || encoding.toLowerCase() !== 'gzip') {
410 return cb(null, body, encoding);
411 // }
412
413 // debug('gunzip %d length body', body.length);
414 // zlib.gunzip(body, cb);
415 }
416
417 var writeStream = args.writeStream;
418
419 debug('Request#%d %s %s with headers %j, options.path: %s',
420 reqId, method, url, options.headers, options.path);
421
422 args.requestUrls.push(url);
423
424 function onResponse(res) {
425 if (timing) {
426 timing.waiting = Date.now() - requestStartTime;
427 }
428 debug('Request#%d %s `req response` event emit: status %d, headers: %j',
429 reqId, url, res.statusCode, res.headers);
430
431 if (args.streaming) {
432 var result = handleRedirect(res);
433 if (result.redirect) {
434 res.resume();
435 return;
436 }
437 if (result.error) {
438 res.resume();
439 return done(result.error, null, res);
440 }
441
442 return done(null, null, res);
443 }
444
445 res.on('close', function () {
446 debug('Request#%d %s: `res close` event emit, total size %d',
447 reqId, url, responseSize);
448 });
449
450 res.on('error', function () {
451 debug('Request#%d %s: `res error` event emit, total size %d',
452 reqId, url, responseSize);
453 });
454
455 res.on('aborted', function () {
456 responseAborted = true;
457 debug('Request#%d %s: `res aborted` event emit, total size %d',
458 reqId, url, responseSize);
459 });
460
461 if (writeStream) {
462 // If there's a writable stream to recieve the response data, just pipe the
463 // response stream to that writable stream and call the callback when it has
464 // finished writing.
465 //
466 // NOTE that when the response stream `res` emits an 'end' event it just
467 // means that it has finished piping data to another stream. In the
468 // meanwhile that writable stream may still writing data to the disk until
469 // it emits a 'close' event.
470 //
471 // That means that we should not apply callback until the 'close' of the
472 // writable stream is emited.
473 //
474 // See also:
475 // - https://github.com/TBEDP/urllib/commit/959ac3365821e0e028c231a5e8efca6af410eabb
476 // - http://nodejs.org/api/stream.html#stream_event_end
477 // - http://nodejs.org/api/stream.html#stream_event_close_1
478 var result = handleRedirect(res);
479 if (result.redirect) {
480 res.resume();
481 return;
482 }
483 if (result.error) {
484 res.resume();
485 // end ths stream first
486 writeStream.end();
487 return done(result.error, null, res);
488 }
489 // you can set consumeWriteStream false that only wait response end
490 if (args.consumeWriteStream === false) {
491 res.on('end', done.bind(null, null, null, res));
492 } else {
493 // node 0.10, 0.12: only emit res aborted, writeStream close not fired
494 if (isNode010 || isNode012) {
495 first([
496 [ writeStream, 'close' ],
497 [ res, 'aborted' ],
498 ], function(_, stream, event) {
499 debug('Request#%d %s: writeStream or res %s event emitted', reqId, url, event);
500 done(__err || null, null, res);
501 });
502 } else {
503 writeStream.on('close', function() {
504 debug('Request#%d %s: writeStream close event emitted', reqId, url);
505 done(__err || null, null, res);
506 });
507 }
508 }
509 return res.pipe(writeStream);
510 }
511
512 // Otherwise, just concat those buffers.
513 //
514 // NOTE that the `chunk` is not a String but a Buffer. It means that if
515 // you simply concat two chunk with `+` you're actually converting both
516 // Buffers into Strings before concating them. It'll cause problems when
517 // dealing with multi-byte characters.
518 //
519 // The solution is to store each chunk in an array and concat them with
520 // 'buffer-concat' when all chunks is recieved.
521 //
522 // See also:
523 // http://cnodejs.org/topic/4faf65852e8fb5bc65113403
524
525 var chunks = [];
526
527 res.on('data', function (chunk) {
528 debug('Request#%d %s: `res data` event emit, size %d', reqId, url, chunk.length);
529 responseSize += chunk.length;
530 chunks.push(chunk);
531 });
532
533 res.on('end', function () {
534 var body = Buffer.concat(chunks, responseSize);
535 debug('Request#%d %s: `res end` event emit, total size %d, _dumped: %s',
536 reqId, url, responseSize, res._dumped);
537
538 if (__err) {
539 // req.abort() after `res data` event emit.
540 return done(__err, body, res);
541 }
542
543 var result = handleRedirect(res);
544 if (result.error) {
545 return done(result.error, body, res);
546 }
547 if (result.redirect) {
548 return;
549 }
550
551 decodeContent(res, body, function (err, data, encoding) {
552 if (err) {
553 return done(err, body, res);
554 }
555 // if body not decode, dont touch it
556 if (!encoding && TEXT_DATA_TYPES.indexOf(args.dataType) >= 0) {
557 // try to decode charset
558 try {
559 data = decodeBodyByCharset(data, res);
560 } catch (e) {
561 debug('decodeBodyByCharset error: %s', e);
562 // if error, dont touch it
563 return done(null, data, res);
564 }
565
566 if (args.dataType === 'json') {
567 if (responseSize === 0) {
568 data = null;
569 } else {
570 var r = parseJSON(data, fixJSONCtlChars);
571 if (r.error) {
572 err = r.error;
573 } else {
574 data = r.data;
575 }
576 }
577 }
578 }
579
580 if (responseAborted) {
581 // err = new Error('Remote socket was terminated before `response.end()` was called');
582 // err.name = 'RemoteSocketClosedError';
583 debug('Request#%d %s: Remote socket was terminated before `response.end()` was called', reqId, url);
584 }
585
586 done(err, data, res);
587 });
588 });
589 }
590
591 var connectTimeout, responseTimeout;
592 if (Array.isArray(args.timeout)) {
593 connectTimeout = ms(args.timeout[0]);
594 responseTimeout = ms(args.timeout[1]);
595 } else { // set both timeout equal
596 connectTimeout = responseTimeout = ms(args.timeout);
597 }
598 debug('ConnectTimeout: %d, ResponseTimeout: %d', connectTimeout, responseTimeout);
599
600 function startConnectTimer() {
601 debug('Connect timer ticking, timeout: %d', connectTimeout);
602 connectTimer = setTimeout(function () {
603 connectTimer = null;
604 if (statusCode === -1) {
605 statusCode = -2;
606 }
607 var msg = 'Connect timeout for ' + connectTimeout + 'ms';
608 var errorName = 'ConnectionTimeoutError';
609 if (!req.socket) {
610 errorName = 'SocketAssignTimeoutError';
611 msg += ', working sockets is full';
612 }
613 __err = new Error(msg);
614 __err.name = errorName;
615 __err.requestId = reqId;
616 debug('ConnectTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
617 abortRequest();
618 }, connectTimeout);
619 }
620
621 function startResposneTimer() {
622 debug('Response timer ticking, timeout: %d', responseTimeout);
623 responseTimer = setTimeout(function () {
624 responseTimer = null;
625 var msg = 'Response timeout for ' + responseTimeout + 'ms';
626 var errorName = 'ResponseTimeoutError';
627 __err = new Error(msg);
628 __err.name = errorName;
629 __err.requestId = reqId;
630 debug('ResponseTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected);
631 abortRequest();
632 }, responseTimeout);
633 }
634
635 var req;
636 // request headers checker will throw error
637 options.mode = args.mode ? args.mode : '';
638 try {
639 req = httplib.request(options, onResponse);
640 } catch (err) {
641 return done(err);
642 }
643
644 // environment detection: browser or nodejs
645 if (typeof(window) === 'undefined') {
646 // start connect timer just after `request` return, and just in nodejs environment
647 startConnectTimer();
648 } else {
649 req.on('requestTimeout', function () {
650 if (statusCode === -1) {
651 statusCode = -2;
652 }
653 var msg = 'Connect timeout for ' + connectTimeout + 'ms';
654 var errorName = 'ConnectionTimeoutError';
655 __err = new Error(msg);
656 __err.name = errorName;
657 __err.requestId = reqId;
658 abortRequest();
659 });
660 }
661
662 function abortRequest() {
663 debug('Request#%d %s abort, connected: %s', reqId, url, connected);
664 // it wont case error event when req haven't been assigned a socket yet.
665 if (!req.socket) {
666 __err.noSocket = true;
667 done(__err);
668 }
669 req.abort();
670 }
671
672 if (timing) {
673 // request sent
674 req.on('finish', function() {
675 timing.requestSent = Date.now() - requestStartTime;
676 });
677 }
678
679 req.once('socket', function (socket) {
680 if (timing) {
681 // socket queuing time
682 timing.queuing = Date.now() - requestStartTime;
683 }
684
685 // https://github.com/nodejs/node/blob/master/lib/net.js#L377
686 // https://github.com/nodejs/node/blob/v0.10.40-release/lib/net.js#L352
687 // should use socket.socket on 0.10.x
688 if (isNode010 && socket.socket) {
689 socket = socket.socket;
690 }
691
692 var readyState = socket.readyState;
693 if (readyState === 'opening') {
694 socket.once('lookup', function(err, ip, addressType) {
695 debug('Request#%d %s lookup: %s, %s, %s', reqId, url, err, ip, addressType);
696 if (timing) {
697 timing.dnslookup = Date.now() - requestStartTime;
698 }
699 if (ip) {
700 remoteAddress = ip;
701 }
702 });
703 socket.once('connect', function() {
704 if (timing) {
705 // socket connected
706 timing.connected = Date.now() - requestStartTime;
707 }
708
709 // cancel socket timer at first and start tick for TTFB
710 cancelConnectTimer();
711 startResposneTimer();
712
713 debug('Request#%d %s new socket connected', reqId, url);
714 connected = true;
715 if (!remoteAddress) {
716 remoteAddress = socket.remoteAddress;
717 }
718 remotePort = socket.remotePort;
719 });
720 return;
721 }
722
723 debug('Request#%d %s reuse socket connected, readyState: %s', reqId, url, readyState);
724 connected = true;
725 keepAliveSocket = true;
726 if (!remoteAddress) {
727 remoteAddress = socket.remoteAddress;
728 }
729 remotePort = socket.remotePort;
730
731 // reuse socket, timer should be canceled.
732 cancelConnectTimer();
733 startResposneTimer();
734 });
735
736 req.on('error', function (err) {
737 //TypeError for browser fetch api, Error for browser xmlhttprequest api
738 if (err.name === 'Error' || err.name === 'TypeError') {
739 err.name = connected ? 'ResponseError' : 'RequestError';
740 }
741 err.message += ' (req "error")';
742 debug('Request#%d %s `req error` event emit, %s: %s', reqId, url, err.name, err.message);
743 done(__err || err);
744 });
745
746 if (writeStream) {
747 writeStream.once('error', function (err) {
748 err.message += ' (writeStream "error")';
749 __err = err;
750 debug('Request#%d %s `writeStream error` event emit, %s: %s', reqId, url, err.name, err.message);
751 abortRequest();
752 });
753 }
754
755 if (args.stream) {
756 args.stream.pipe(req);
757 args.stream.once('error', function (err) {
758 err.message += ' (stream "error")';
759 __err = err;
760 debug('Request#%d %s `readStream error` event emit, %s: %s', reqId, url, err.name, err.message);
761 abortRequest();
762 });
763 } else {
764 req.end(body);
765 }
766
767 req.requestId = reqId;
768 return req;
769};
\No newline at end of file