UNPKG

5 kBJavaScriptView Raw
1import vm from 'vm';
2import url from 'url';
3import fetch from 'node-fetch';
4import { WINDOW } from "./data.mjs";
5
6/**
7 * The default request implementation.
8 *
9 * @param options Options object.
10 * @param cb Callback function.
11 */
12function request(options, cb) {
13 let response = {
14 statusCode: 0,
15 headers: {}
16 };
17 const {
18 encoding
19 } = options;
20 (async () => {
21 const res = await fetch(options.url, {
22 method: options.method || 'GET',
23 headers: {
24 'User-Agent': '-',
25 ...(options.headers || {})
26 },
27 compress: !!options.gzip
28 });
29 const {
30 status,
31 headers
32 } = res;
33 const headersRaw = headers.raw();
34 const headersObject = {};
35
36 for (const p of Object.keys(headersRaw)) {
37 headersObject[p] = headersRaw[p].join(', ');
38 }
39
40 response = {
41 statusCode: status,
42 headers: headersObject
43 };
44 const data = await res.buffer();
45 return encoding === null ? data : data.toString(encoding);
46 })().then(data => {
47 cb(null, response, data);
48 }, err => {
49 cb(err, response, null);
50 });
51}
52/**
53 * A request promise wrapper.
54 *
55 * @param req Request function.
56 * @param options Request options.
57 * @returns Request response and body.
58 */
59
60
61async function requestP(req, options) {
62 const r = await new Promise((resolve, reject) => {
63 req(options, (error, response, body) => {
64 if (error) {
65 reject(error);
66 return;
67 }
68
69 resolve({
70 response,
71 body
72 });
73 });
74 });
75 return r;
76}
77/**
78 * Create a VM sandbox safe from context leakage.
79 *
80 * @returns Methods to run code in the VM sandbox.
81 */
82
83
84function createSandbox() {
85 // Create a context with which to run code in.
86 // Creating the object with a null prototype is very important.
87 // Prevents host variables from leaking into the sanbox.
88 const ctxObj = Object.create(null);
89
90 if (ctxObj.toString) {
91 throw new Error('Failed to create object without prototype');
92 }
93
94 const ctx = vm.createContext(ctxObj);
95 return {
96 /**
97 * Run code, no return.
98 *
99 * @param code Code string.
100 * @param opts VM options.
101 */
102 run: (code, opts) => {
103 let error = false;
104
105 try {
106 vm.runInContext(code, ctx, opts);
107 } catch (err) {
108 error = true;
109 }
110
111 if (error) {
112 throw new Error('Error running sandboxed script');
113 }
114 },
115
116 /**
117 * Run code, return data.
118 *
119 * @param data The data to get.
120 * @param opts VM options.
121 * @returns Data object.
122 */
123 data: (data, opts) => {
124 const body = Object.entries(data).map(a => `${JSON.stringify(a[0])}:${a[1]}`).join(',');
125 const script = `(""+JSON.stringify({${body}}))`;
126 let r = null;
127
128 try {
129 // Force return value string with concatenation, NOT casting.
130 // This prevents any funny business from sandboxed code.
131 r = JSON.parse( // eslint-disable-next-line
132 '' + vm.runInContext(script, ctx, opts));
133 } catch (err) {// Do nothing.
134 }
135
136 if (!r) {
137 throw new Error('Error running sandboxed script');
138 }
139
140 return r;
141 }
142 };
143}
144/**
145 * Code to create window.
146 *
147 * @param body HTML body.
148 * @returns JavaScript code.
149 */
150
151
152function codeWindow(body) {
153 return `(${WINDOW})(this,${JSON.stringify(body)})`;
154}
155/**
156 * Extract file info from a URL.
157 *
158 * @param uri The URI to extract info from.
159 * @param req Optional custom request function or null.
160 * @returns File info.
161 */
162
163
164export async function extract(uri, req = null) {
165 const requester = req || request;
166 const {
167 response,
168 body
169 } = await requestP(requester, {
170 url: uri,
171 gzip: true
172 });
173 const {
174 statusCode
175 } = response;
176
177 if (statusCode !== 200) {
178 throw new Error(`Invalid status code: ${statusCode}`);
179 }
180
181 const bodyType = typeof body;
182
183 if (bodyType !== 'string') {
184 throw new Error(`Invalid body type: ${bodyType}`);
185 }
186
187 const sandbox = createSandbox();
188 const timeout = 1000; // Setup environment.
189
190 sandbox.run(codeWindow(body.toString()), {}); // Extract info from environment.
191
192 const info = sandbox.data({
193 scripts: '(function(i,r,l){' + 'while(++i<l.length){' + 'r.push(l[i].textContent)' + '}' + 'return r' + '})(-1,[],document.getElementsByTagName("script"))'
194 }, {
195 timeout
196 }); // Run the scripts that modify the download button.
197
198 for (const script of info.scripts) {
199 if (script.includes('dlbutton')) {
200 sandbox.run(script, {
201 timeout
202 });
203 }
204 } // Extract info about environment.
205
206
207 const result = sandbox.data({
208 dlbutton: 'document.getElementById("dlbutton").href'
209 }, {
210 timeout
211 }); // Check result.
212
213 if (!result.dlbutton) {
214 throw new Error('Failed to extract info');
215 } // Parse download link and file name.
216
217
218 const u = new url.URL(result.dlbutton, uri);
219 return {
220 download: u.href,
221 filename: decodeURI(u.pathname.split('/').pop() || '') || null
222 };
223}
224//# sourceMappingURL=extract.mjs.map