UNPKG

3.59 kBJavaScriptView Raw
1import vm from 'vm';
2import url from 'url';
3import request from 'request';
4import cheerio from 'cheerio';
5/**
6 * A request promise wrapper.
7 *
8 * @param req Request function.
9 * @param options Request options.
10 * @returns Request response and body.
11 */
12
13async function requestP(req, options) {
14 const r = await new Promise((resolve, reject) => {
15 req(options, (error, response, body) => {
16 if (error) {
17 reject(error);
18 return;
19 }
20
21 resolve({
22 response,
23 body
24 });
25 });
26 });
27 return r;
28}
29/**
30 * Extract script code from HTML code.
31 *
32 * @param html HTML code.
33 * @returns Script code.
34 */
35
36
37function extractScripts(html) {
38 const r = [];
39 const $ = cheerio.load(html);
40 $('script').each((elI, el) => {
41 const data = $(el).html();
42
43 if (data) {
44 r.push(data);
45 }
46 });
47 return r;
48}
49/**
50 * Attempt to extract info from script.
51 *
52 * @param script Script code.
53 * @returns Result object or null.
54 */
55
56
57function extractScript(script) {
58 let result = null;
59
60 if (!script.includes('dlbutton')) {
61 return result;
62 } // Create a context with wich to run code in
63 // Creating the object with a null prototype is very important.
64 // Prevents host variables from leaking into the sanbox.
65
66
67 const ctx = vm.createContext(Object.create(null));
68 const runOpts = {
69 timeout: 1000
70 }; // Setup environment.
71
72 const codePre = [
73 /* eslint-disable @typescript-eslint/indent */
74 'window = this;', 'document = (function(r) {', 'var elements = {', '"dlbutton": {},', '"fimage": {}', '};', 'r.getElementById = function(id) {', 'return elements[id];', '}', 'return r;', '})({});'
75 /* eslint-enable @typescript-eslint/indent */
76 ].join('\n'); // Extract info from environment.
77
78 const codePost = [
79 /* eslint-disable @typescript-eslint/indent */
80 'JSON.stringify({', '"dlbutton": document.getElementById("dlbutton").href', '})'
81 /* eslint-enable @typescript-eslint/indent */
82 ].join('\n'); // Attempt to run code in sanbox and extract the info.
83
84 try {
85 // Run the pre script.
86 vm.runInContext(codePre, ctx, runOpts); // Run the script code.
87
88 vm.runInContext(script, ctx, runOpts); // Run the post script.
89 // Force return value to be string, with concatenation, NOT casting.
90 // This prevents any funny business from sandboxed code.
91 // eslint-disable-next-line
92
93 result = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));
94 } catch (err) {// Ignore failure.
95 }
96
97 return result;
98}
99/**
100 * Extract file info from a URL.
101 *
102 * @param uri The URI to extract info from.
103 * @param req Optional custom request function or null.
104 * @returns File info.
105 */
106
107
108export async function extract(uri, req = null) {
109 const requester = req || request;
110 const {
111 response,
112 body
113 } = await requestP(requester, {
114 url: uri
115 });
116 const {
117 statusCode
118 } = response;
119
120 if (statusCode !== 200) {
121 throw new Error(`Invalid status code: ${statusCode}`);
122 }
123
124 const bodyType = typeof body;
125
126 if (bodyType !== 'string') {
127 throw new Error(`Invalid body type: ${bodyType}`);
128 }
129
130 const scripts = extractScripts(body);
131 let result = null;
132
133 for (const script of scripts) {
134 result = extractScript(script);
135
136 if (result) {
137 break;
138 }
139 }
140
141 if (!result || !result.dlbutton) {
142 throw new Error('Failed to extract info');
143 }
144
145 const download = url.resolve(uri, result.dlbutton);
146 const filename = decodeURI((url.parse(download).pathname || '').split('/').pop() || '') || null;
147 return {
148 download,
149 filename
150 };
151}
152//# sourceMappingURL=extract.mjs.map