UNPKG

5.34 kBJavaScriptView Raw
1"use strict";
2
3Object.defineProperty(exports, "__esModule", {
4 value: true
5});
6exports.extract = extract;
7
8var _vm = _interopRequireDefault(require("vm"));
9
10var _url = _interopRequireDefault(require("url"));
11
12var _nodeFetch = _interopRequireDefault(require("node-fetch"));
13
14var _data = require("./data");
15
16function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
17
18/**
19 * The default request implementation.
20 *
21 * @param options Options object.
22 * @param cb Callback function.
23 */
24function request(options, cb) {
25 let response = {
26 statusCode: 0,
27 headers: {}
28 };
29 const {
30 encoding
31 } = options;
32 (async () => {
33 const res = await (0, _nodeFetch.default)(options.url, {
34 method: options.method || 'GET',
35 headers: {
36 'User-Agent': '-',
37 ...(options.headers || {})
38 },
39 compress: !!options.gzip
40 });
41 const {
42 status,
43 headers
44 } = res;
45 const headersRaw = headers.raw();
46 const headersObject = {};
47
48 for (const p of Object.keys(headersRaw)) {
49 headersObject[p] = headersRaw[p].join(', ');
50 }
51
52 response = {
53 statusCode: status,
54 headers: headersObject
55 };
56 const data = await res.buffer();
57 return encoding === null ? data : data.toString(encoding);
58 })().then(data => {
59 cb(null, response, data);
60 }, err => {
61 cb(err, response, null);
62 });
63}
64/**
65 * A request promise wrapper.
66 *
67 * @param req Request function.
68 * @param options Request options.
69 * @returns Request response and body.
70 */
71
72
73async function requestP(req, options) {
74 const r = await new Promise((resolve, reject) => {
75 req(options, (error, response, body) => {
76 if (error) {
77 reject(error);
78 return;
79 }
80
81 resolve({
82 response,
83 body
84 });
85 });
86 });
87 return r;
88}
89/**
90 * Create a VM sandbox safe from context leakage.
91 *
92 * @returns Methods to run code in the VM sandbox.
93 */
94
95
96function createSandbox() {
97 // Create a context with which to run code in.
98 // Creating the object with a null prototype is very important.
99 // Prevents host variables from leaking into the sanbox.
100 const ctxObj = Object.create(null);
101
102 if (ctxObj.toString) {
103 throw new Error('Failed to create object without prototype');
104 }
105
106 const ctx = _vm.default.createContext(ctxObj);
107
108 return {
109 /**
110 * Run code, no return.
111 *
112 * @param code Code string.
113 * @param opts VM options.
114 */
115 run: (code, opts) => {
116 let error = false;
117
118 try {
119 _vm.default.runInContext(code, ctx, opts);
120 } catch (err) {
121 error = true;
122 }
123
124 if (error) {
125 throw new Error('Error running sandboxed script');
126 }
127 },
128
129 /**
130 * Run code, return data.
131 *
132 * @param data The data to get.
133 * @param opts VM options.
134 * @returns Data object.
135 */
136 data: (data, opts) => {
137 const body = Object.entries(data).map(a => `${JSON.stringify(a[0])}:${a[1]}`).join(',');
138 const script = `(""+JSON.stringify({${body}}))`;
139 let r = null;
140
141 try {
142 // Force return value string with concatenation, NOT casting.
143 // This prevents any funny business from sandboxed code.
144 r = JSON.parse( // eslint-disable-next-line
145 '' + _vm.default.runInContext(script, ctx, opts));
146 } catch (err) {// Do nothing.
147 }
148
149 if (!r) {
150 throw new Error('Error running sandboxed script');
151 }
152
153 return r;
154 }
155 };
156}
157/**
158 * Code to create window.
159 *
160 * @param body HTML body.
161 * @returns JavaScript code.
162 */
163
164
165function codeWindow(body) {
166 return `(${_data.WINDOW})(this,${JSON.stringify(body)})`;
167}
168/**
169 * Extract file info from a URL.
170 *
171 * @param uri The URI to extract info from.
172 * @param req Optional custom request function or null.
173 * @returns File info.
174 */
175
176
177async function extract(uri, req = null) {
178 const requester = req || request;
179 const {
180 response,
181 body
182 } = await requestP(requester, {
183 url: uri,
184 gzip: true
185 });
186 const {
187 statusCode
188 } = response;
189
190 if (statusCode !== 200) {
191 throw new Error(`Invalid status code: ${statusCode}`);
192 }
193
194 const bodyType = typeof body;
195
196 if (bodyType !== 'string') {
197 throw new Error(`Invalid body type: ${bodyType}`);
198 }
199
200 const sandbox = createSandbox();
201 const timeout = 1000; // Setup environment.
202
203 sandbox.run(codeWindow(body.toString()), {}); // Extract info from environment.
204
205 const info = sandbox.data({
206 scripts: '(function(i,r,l){' + 'while(++i<l.length){' + 'r.push(l[i].textContent)' + '}' + 'return r' + '})(-1,[],document.getElementsByTagName("script"))'
207 }, {
208 timeout
209 }); // Run the scripts that modify the download button.
210
211 for (const script of info.scripts) {
212 if (script.includes('dlbutton')) {
213 sandbox.run(script, {
214 timeout
215 });
216 }
217 } // Extract info about environment.
218
219
220 const result = sandbox.data({
221 dlbutton: 'document.getElementById("dlbutton").href'
222 }, {
223 timeout
224 }); // Check result.
225
226 if (!result.dlbutton) {
227 throw new Error('Failed to extract info');
228 } // Parse download link and file name.
229
230
231 const u = new _url.default.URL(result.dlbutton, uri);
232 return {
233 download: u.href,
234 filename: decodeURI(u.pathname.split('/').pop() || '') || null
235 };
236}
237//# sourceMappingURL=extract.js.map