1 | import vm from 'vm';
|
2 | import url from 'url';
|
3 | import request from 'request';
|
4 | import cheerio from 'cheerio';
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 | async function requestP(req, options) {
|
14 | const r = await new Promise((resolve, reject) => {
|
15 | req(options, (error, response, body) => {
|
16 | if (error) {
|
17 | reject(error);
|
18 | return;
|
19 | }
|
20 |
|
21 | resolve({
|
22 | response,
|
23 | body
|
24 | });
|
25 | });
|
26 | });
|
27 | return r;
|
28 | }
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 | function extractScripts(html) {
|
38 | const r = [];
|
39 | const $ = cheerio.load(html);
|
40 | $('script').each((elI, el) => {
|
41 | const data = $(el).html();
|
42 |
|
43 | if (data) {
|
44 | r.push(data);
|
45 | }
|
46 | });
|
47 | return r;
|
48 | }
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 | function extractScript(script) {
|
58 | let result = null;
|
59 |
|
60 | if (!script.includes('dlbutton')) {
|
61 | return result;
|
62 | }
|
63 |
|
64 |
|
65 |
|
66 |
|
67 | const ctx = vm.createContext(Object.create(null));
|
68 | const runOpts = {
|
69 | timeout: 1000
|
70 | };
|
71 |
|
72 | const codePre = [
|
73 |
|
74 | 'window = this;', 'document = (function(r) {', 'var elements = {', '"dlbutton": {},', '"fimage": {}', '};', 'r.getElementById = function(id) {', 'return elements[id];', '}', 'return r;', '})({});'
|
75 |
|
76 | ].join('\n');
|
77 |
|
78 | const codePost = [
|
79 |
|
80 | 'JSON.stringify({', '"dlbutton": document.getElementById("dlbutton").href', '})'
|
81 |
|
82 | ].join('\n');
|
83 |
|
84 | try {
|
85 |
|
86 | vm.runInContext(codePre, ctx, runOpts);
|
87 |
|
88 | vm.runInContext(script, ctx, runOpts);
|
89 |
|
90 |
|
91 |
|
92 |
|
93 | result = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));
|
94 | } catch (err) {
|
95 | }
|
96 |
|
97 | return result;
|
98 | }
|
99 |
|
100 |
|
101 |
|
102 |
|
103 |
|
104 |
|
105 |
|
106 |
|
107 |
|
108 | export async function extract(uri, req = null) {
|
109 | const requester = req || request;
|
110 | const {
|
111 | response,
|
112 | body
|
113 | } = await requestP(requester, {
|
114 | url: uri
|
115 | });
|
116 | const {
|
117 | statusCode
|
118 | } = response;
|
119 |
|
120 | if (statusCode !== 200) {
|
121 | throw new Error(`Invalid status code: ${statusCode}`);
|
122 | }
|
123 |
|
124 | const bodyType = typeof body;
|
125 |
|
126 | if (bodyType !== 'string') {
|
127 | throw new Error(`Invalid body type: ${bodyType}`);
|
128 | }
|
129 |
|
130 | const scripts = extractScripts(body);
|
131 | let result = null;
|
132 |
|
133 | for (const script of scripts) {
|
134 | result = extractScript(script);
|
135 |
|
136 | if (result) {
|
137 | break;
|
138 | }
|
139 | }
|
140 |
|
141 | if (!result || !result.dlbutton) {
|
142 | throw new Error('Failed to extract info');
|
143 | }
|
144 |
|
145 | const download = url.resolve(uri, result.dlbutton);
|
146 | const filename = decodeURI((url.parse(download).pathname || '').split('/').pop() || '') || null;
|
147 | return {
|
148 | download,
|
149 | filename
|
150 | };
|
151 | }
|
152 |
|