UNPKG

3.97 kBJavaScriptView Raw
1"use strict";
2
3var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
5Object.defineProperty(exports, "__esModule", {
6 value: true
7});
8exports.extract = extract;
9
10var _vm = _interopRequireDefault(require("vm"));
11
12var _url = _interopRequireDefault(require("url"));
13
14var _request = _interopRequireDefault(require("request"));
15
16var _cheerio = _interopRequireDefault(require("cheerio"));
17
18/**
19 * A request promise wrapper.
20 *
21 * @param req Request function.
22 * @param options Request options.
23 * @returns Request response and body.
24 */
25async function requestP(req, options) {
26 const r = await new Promise((resolve, reject) => {
27 req(options, (error, response, body) => {
28 if (error) {
29 reject(error);
30 return;
31 }
32
33 resolve({
34 response,
35 body
36 });
37 });
38 });
39 return r;
40}
41/**
42 * Extract script code from HTML code.
43 *
44 * @param html HTML code.
45 * @returns Script code.
46 */
47
48
49function extractScripts(html) {
50 const r = [];
51
52 const $ = _cheerio.default.load(html);
53
54 $('script').each((elI, el) => {
55 const data = $(el).html();
56
57 if (data) {
58 r.push(data);
59 }
60 });
61 return r;
62}
63/**
64 * Attempt to extract info from script.
65 *
66 * @param script Script code.
67 * @returns Result object or null.
68 */
69
70
71function extractScript(script) {
72 let result = null;
73
74 if (!script.includes('dlbutton')) {
75 return result;
76 } // Create a context with wich to run code in
77 // Creating the object with a null prototype is very important.
78 // Prevents host variables from leaking into the sanbox.
79
80
81 const ctx = _vm.default.createContext(Object.create(null));
82
83 const runOpts = {
84 timeout: 1000
85 }; // Setup environment.
86
87 const codePre = [
88 /* eslint-disable @typescript-eslint/indent */
89 'window = this;', 'document = (function(r) {', 'var elements = {', '"dlbutton": {},', '"fimage": {}', '};', 'r.getElementById = function(id) {', 'return elements[id];', '}', 'return r;', '})({});'
90 /* eslint-enable @typescript-eslint/indent */
91 ].join('\n'); // Extract info from environment.
92
93 const codePost = [
94 /* eslint-disable @typescript-eslint/indent */
95 'JSON.stringify({', '"dlbutton": document.getElementById("dlbutton").href', '})'
96 /* eslint-enable @typescript-eslint/indent */
97 ].join('\n'); // Attempt to run code in sanbox and extract the info.
98
99 try {
100 // Run the pre script.
101 _vm.default.runInContext(codePre, ctx, runOpts); // Run the script code.
102
103
104 _vm.default.runInContext(script, ctx, runOpts); // Run the post script.
105 // Force return value to be string, with concatenation, NOT casting.
106 // This prevents any funny business from sandboxed code.
107 // eslint-disable-next-line
108
109
110 result = JSON.parse('' + _vm.default.runInContext(codePost, ctx, runOpts));
111 } catch (err) {// Ignore failure.
112 }
113
114 return result;
115}
116/**
117 * Extract file info from a URL.
118 *
119 * @param uri The URI to extract info from.
120 * @param req Optional custom request function or null.
121 * @returns File info.
122 */
123
124
125async function extract(uri, req = null) {
126 const requester = req || _request.default;
127 const {
128 response,
129 body
130 } = await requestP(requester, {
131 url: uri
132 });
133 const {
134 statusCode
135 } = response;
136
137 if (statusCode !== 200) {
138 throw new Error(`Invalid status code: ${statusCode}`);
139 }
140
141 const bodyType = typeof body;
142
143 if (bodyType !== 'string') {
144 throw new Error(`Invalid body type: ${bodyType}`);
145 }
146
147 const scripts = extractScripts(body);
148 let result = null;
149
150 for (const script of scripts) {
151 result = extractScript(script);
152
153 if (result) {
154 break;
155 }
156 }
157
158 if (!result || !result.dlbutton) {
159 throw new Error('Failed to extract info');
160 }
161
162 const download = _url.default.resolve(uri, result.dlbutton);
163
164 const filename = decodeURI((_url.default.parse(download).pathname || '').split('/').pop() || '') || null;
165 return {
166 download,
167 filename
168 };
169}
170//# sourceMappingURL=extract.js.map