UNPKG

zs-extract

Version:
227 lines (190 loc) 4.79 kB
import vm from 'vm'; import url from 'url'; import cheerio from 'cheerio'; import fetch from 'node-fetch'; import { WINDOW } from "./data.mjs"; /** * The default request implementation. * * @param options Options object. * @param cb Callback function. */ function request(options, cb) { let response = { statusCode: 0, headers: {} }; const { encoding } = options; (async () => { const res = await fetch(options.url, { method: options.method || 'GET', headers: { 'User-Agent': '-', ...(options.headers || {}) }, compress: !!options.gzip }); const { status, headers } = res; const headersRaw = headers.raw(); const headersObject = {}; for (const p of Object.keys(headersRaw)) { headersObject[p] = headersRaw[p].join(', '); } response = { statusCode: status, headers: headersObject }; const data = await res.buffer(); return encoding === null ? data : data.toString(encoding); })().then(data => { cb(null, response, data); }, err => { cb(err, response, null); }); } /** * A request promise wrapper. * * @param req Request function. * @param options Request options. * @returns Request response and body. */ async function requestP(req, options) { const r = await new Promise((resolve, reject) => { req(options, (error, response, body) => { if (error) { reject(error); return; } resolve({ response, body }); }); }); return r; } /** * Code to create window. * * @param body HTML body. * @returns JavaScript code. */ function codeWindow(body) { return `(${WINDOW})(this,${JSON.stringify(body)})`; } /** * Code to extract data from window. * * @param data Data object. * @returns JavaScript code. */ function codeExtract(data) { const body = Object.entries(data).map(a => a.join(':')).join(','); return `(""+JSON.stringify({${body}}))`; } /** * Extract script code from HTML code. * * @param html HTML code. * @returns Script code. */ function extractScripts(html) { const r = []; const $ = cheerio.load(html); $('script').each((_elI, el) => { const data = $(el).html(); if (data) { r.push(data); } }); return r; } /** * Attempt to extract info from script. * * @param body HTML body. * @param script Script code. * @returns Result object or null. */ function extractScript(body, script) { let result = null; if (!script.includes('dlbutton')) { return result; } // Create a context with wich to run code in // Creating the object with a null prototype is very important. // Prevents host variables from leaking into the sanbox. const ctxObj = Object.create(null); if (ctxObj.toString) { throw new Error('Failed to create object without prototype'); } const ctx = vm.createContext(ctxObj); const runOpts = { timeout: 1000 }; // Setup environment. const codePre = codeWindow(body); // Extract info from environment. const codePost = codeExtract({ dlbutton: 'document.getElementById("dlbutton").href' }); // Attempt to run code in sanbox and extract the info. try { // Run the pre script. vm.runInContext(codePre, ctx, runOpts); // Run the script code. vm.runInContext(script, ctx, runOpts); // Run the post script. // Force return value to be string, with concatenation, NOT casting. // This prevents any funny business from sandboxed code. // eslint-disable-next-line result = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts)); } catch (err) {// Ignore failure. } return result; } /** * Extract file info from a URL. * * @param uri The URI to extract info from. * @param req Optional custom request function or null. * @returns File info. */ export async function extract(uri, req = null) { const requester = req || request; const { response, body } = await requestP(requester, { url: uri, gzip: true }); const { statusCode } = response; if (statusCode !== 200) { throw new Error(`Invalid status code: ${statusCode}`); } const bodyType = typeof body; if (bodyType !== 'string') { throw new Error(`Invalid body type: ${bodyType}`); } const scripts = extractScripts(body); let result = null; for (const script of scripts) { result = extractScript(body, script); if (result) { break; } } if (!result || !result.dlbutton) { throw new Error('Failed to extract info'); } const download = url.resolve(uri, result.dlbutton); const filename = decodeURI((url.parse(download).pathname || '').split('/').pop() || '') || null; return { download, filename }; } //# sourceMappingURL=extract.mjs.map