UNPKG

4.9 kBJavaScriptView Raw
1import vm from 'vm';
2import url from 'url';
3import fetch from 'node-fetch';
4import { WINDOW } from "./data.mjs";
5
6/**
7 * The default request implementation.
8 *
9 * @param options Options object.
10 * @param cb Callback function.
11 */
12function request(options, cb) {
13 let response = {
14 statusCode: 0,
15 headers: {}
16 };
17 const {
18 encoding
19 } = options;
20 (async () => {
21 const res = await fetch(options.url, {
22 method: options.method || 'GET',
23 headers: {
24 'User-Agent': '-',
25 ...(options.headers || {})
26 },
27 compress: !!options.gzip
28 });
29 const {
30 status,
31 headers
32 } = res;
33 const headersRaw = headers.raw();
34 const headersObject = {};
35
36 for (const p of Object.keys(headersRaw)) {
37 headersObject[p] = headersRaw[p].join(', ');
38 }
39
40 response = {
41 statusCode: status,
42 headers: headersObject
43 };
44 const data = await res.buffer();
45 return encoding === null ? data : data.toString(encoding);
46 })().then(data => {
47 cb(null, response, data);
48 }, err => {
49 cb(err, response, null);
50 });
51}
52/**
53 * A request promise wrapper.
54 *
55 * @param req Request function.
56 * @param options Request options.
57 * @returns Request response and body.
58 */
59
60
61async function requestP(req, options) {
62 const r = await new Promise((resolve, reject) => {
63 req(options, (error, response, body) => {
64 if (error) {
65 reject(error);
66 return;
67 }
68
69 resolve({
70 response,
71 body
72 });
73 });
74 });
75 return r;
76}
77/**
78 * Create a VM sandbox safe from context leakage.
79 *
80 * @returns Methods to run code in the VM sandbox.
81 */
82
83
84function createSandbox() {
85 // Create a context with which to run code in.
86 // Creating the object with a null prototype is very important.
87 // Prevents host variables from leaking into the sanbox.
88 const ctxObj = Object.create(null);
89
90 if (ctxObj.toString) {
91 throw new Error('Failed to create object without prototype');
92 }
93
94 const ctx = vm.createContext(ctxObj);
95 return {
96 /**
97 * Run code, no return.
98 *
99 * @param code Code string.
100 * @param opts VM options.
101 */
102 run: (code, opts) => {
103 try {
104 vm.runInContext(code, ctx, opts);
105 } catch (err) {// Do nothing.
106 }
107 },
108
109 /**
110 * Run code, return data.
111 *
112 * @param data The data to get.
113 * @param opts VM options.
114 * @returns Data object.
115 */
116 data: (data, opts) => {
117 const body = Object.entries(data).map(a => `${JSON.stringify(a[0])}:${a[1]}`).join(',');
118 const script = `(""+JSON.stringify({${body}}))`;
119 let r = null;
120
121 try {
122 // Force return value string with concatenation, NOT casting.
123 // This prevents any funny business from sandboxed code.
124 r = JSON.parse( // eslint-disable-next-line
125 '' + vm.runInContext(script, ctx, opts));
126 } catch (err) {// Do nothing.
127 }
128
129 return r;
130 }
131 };
132}
133/**
134 * Code to create window.
135 *
136 * @param body HTML body.
137 * @returns JavaScript code.
138 */
139
140
141function codeWindow(body) {
142 return `(${WINDOW})(this,${JSON.stringify(body)})`;
143}
144/**
145 * Extract file info from a URL.
146 *
147 * @param uri The URI to extract info from.
148 * @param req Optional custom request function or null.
149 * @returns File info.
150 */
151
152
153export async function extract(uri, req = null) {
154 const requester = req || request;
155 const {
156 response,
157 body
158 } = await requestP(requester, {
159 url: uri,
160 gzip: true
161 });
162 const {
163 statusCode
164 } = response;
165
166 if (statusCode !== 200) {
167 throw new Error(`Invalid status code: ${statusCode}`);
168 }
169
170 const bodyType = typeof body;
171
172 if (bodyType !== 'string') {
173 throw new Error(`Invalid body type: ${bodyType}`);
174 }
175
176 const sandbox = createSandbox();
177 const timeout = 1000; // Setup environment.
178
179 sandbox.run(codeWindow(body.toString()), {}); // Extract info from environment.
180
181 const info = sandbox.data({
182 scripts: '(function(i,r,l){' + 'while(++i<l.length){' + 'r.push(l[i].textContent)' + '}' + 'return r' + '})(-1,[],document.getElementsByTagName("script"))'
183 }, {
184 timeout
185 });
186
187 if (!info) {
188 // Should not be possible.
189 throw new Error('Internal error');
190 } // Run the scripts that modify the download button.
191
192
193 for (const script of info.scripts) {
194 if (script.includes('dlbutton')) {
195 sandbox.run(script, {
196 timeout
197 });
198 }
199 } // Extract info about environment.
200
201
202 const result = sandbox.data({
203 dlbutton: 'document.getElementById("dlbutton").href'
204 }, {
205 timeout
206 }); // Check result.
207
208 if (!result || !result.dlbutton) {
209 throw new Error('Failed to extract info');
210 } // Parse download link and file name.
211
212
213 const u = new url.URL(result.dlbutton, uri);
214 return {
215 download: u.href,
216 filename: decodeURI(u.pathname.split('/').pop() || '') || null
217 };
218}
219//# sourceMappingURL=extract.mjs.map