UNPKG

5.24 kBJavaScriptView Raw
1"use strict";
2
3Object.defineProperty(exports, "__esModule", {
4 value: true
5});
6exports.extract = extract;
7
8var _vm = _interopRequireDefault(require("vm"));
9
10var _url = _interopRequireDefault(require("url"));
11
12var _nodeFetch = _interopRequireDefault(require("node-fetch"));
13
14var _data = require("./data");
15
16function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
17
18/**
19 * The default request implementation.
20 *
21 * @param options Options object.
22 * @param cb Callback function.
23 */
24function request(options, cb) {
25 let response = {
26 statusCode: 0,
27 headers: {}
28 };
29 const {
30 encoding
31 } = options;
32 (async () => {
33 const res = await (0, _nodeFetch.default)(options.url, {
34 method: options.method || 'GET',
35 headers: {
36 'User-Agent': '-',
37 ...(options.headers || {})
38 },
39 compress: !!options.gzip
40 });
41 const {
42 status,
43 headers
44 } = res;
45 const headersRaw = headers.raw();
46 const headersObject = {};
47
48 for (const p of Object.keys(headersRaw)) {
49 headersObject[p] = headersRaw[p].join(', ');
50 }
51
52 response = {
53 statusCode: status,
54 headers: headersObject
55 };
56 const data = await res.buffer();
57 return encoding === null ? data : data.toString(encoding);
58 })().then(data => {
59 cb(null, response, data);
60 }, err => {
61 cb(err, response, null);
62 });
63}
64/**
65 * A request promise wrapper.
66 *
67 * @param req Request function.
68 * @param options Request options.
69 * @returns Request response and body.
70 */
71
72
73async function requestP(req, options) {
74 const r = await new Promise((resolve, reject) => {
75 req(options, (error, response, body) => {
76 if (error) {
77 reject(error);
78 return;
79 }
80
81 resolve({
82 response,
83 body
84 });
85 });
86 });
87 return r;
88}
89/**
90 * Create a VM sandbox safe from context leakage.
91 *
92 * @returns Methods to run code in the VM sandbox.
93 */
94
95
96function createSandbox() {
97 // Create a context with which to run code in.
98 // Creating the object with a null prototype is very important.
99 // Prevents host variables from leaking into the sanbox.
100 const ctxObj = Object.create(null);
101
102 if (ctxObj.toString) {
103 throw new Error('Failed to create object without prototype');
104 }
105
106 const ctx = _vm.default.createContext(ctxObj);
107
108 return {
109 /**
110 * Run code, no return.
111 *
112 * @param code Code string.
113 * @param opts VM options.
114 */
115 run: (code, opts) => {
116 try {
117 _vm.default.runInContext(code, ctx, opts);
118 } catch (err) {// Do nothing.
119 }
120 },
121
122 /**
123 * Run code, return data.
124 *
125 * @param data The data to get.
126 * @param opts VM options.
127 * @returns Data object.
128 */
129 data: (data, opts) => {
130 const body = Object.entries(data).map(a => `${JSON.stringify(a[0])}:${a[1]}`).join(',');
131 const script = `(""+JSON.stringify({${body}}))`;
132 let r = null;
133
134 try {
135 // Force return value string with concatenation, NOT casting.
136 // This prevents any funny business from sandboxed code.
137 r = JSON.parse( // eslint-disable-next-line
138 '' + _vm.default.runInContext(script, ctx, opts));
139 } catch (err) {// Do nothing.
140 }
141
142 return r;
143 }
144 };
145}
146/**
147 * Code to create window.
148 *
149 * @param body HTML body.
150 * @returns JavaScript code.
151 */
152
153
154function codeWindow(body) {
155 return `(${_data.WINDOW})(this,${JSON.stringify(body)})`;
156}
157/**
158 * Extract file info from a URL.
159 *
160 * @param uri The URI to extract info from.
161 * @param req Optional custom request function or null.
162 * @returns File info.
163 */
164
165
166async function extract(uri, req = null) {
167 const requester = req || request;
168 const {
169 response,
170 body
171 } = await requestP(requester, {
172 url: uri,
173 gzip: true
174 });
175 const {
176 statusCode
177 } = response;
178
179 if (statusCode !== 200) {
180 throw new Error(`Invalid status code: ${statusCode}`);
181 }
182
183 const bodyType = typeof body;
184
185 if (bodyType !== 'string') {
186 throw new Error(`Invalid body type: ${bodyType}`);
187 }
188
189 const sandbox = createSandbox();
190 const timeout = 1000; // Setup environment.
191
192 sandbox.run(codeWindow(body.toString()), {}); // Extract info from environment.
193
194 const info = sandbox.data({
195 scripts: '(function(i,r,l){' + 'while(++i<l.length){' + 'r.push(l[i].textContent)' + '}' + 'return r' + '})(-1,[],document.getElementsByTagName("script"))'
196 }, {
197 timeout
198 });
199
200 if (!info) {
201 // Should not be possible.
202 throw new Error('Internal error');
203 } // Run the scripts that modify the download button.
204
205
206 for (const script of info.scripts) {
207 if (script.includes('dlbutton')) {
208 sandbox.run(script, {
209 timeout
210 });
211 }
212 } // Extract info about environment.
213
214
215 const result = sandbox.data({
216 dlbutton: 'document.getElementById("dlbutton").href'
217 }, {
218 timeout
219 }); // Check result.
220
221 if (!result || !result.dlbutton) {
222 throw new Error('Failed to extract info');
223 } // Parse download link and file name.
224
225
226 const u = new _url.default.URL(result.dlbutton, uri);
227 return {
228 download: u.href,
229 filename: decodeURI(u.pathname.split('/').pop() || '') || null
230 };
231}
232//# sourceMappingURL=extract.js.map