zs-extract
Version:
Zippyshare download data extractor
227 lines (190 loc) • 4.79 kB
JavaScript
import vm from 'vm';
import url from 'url';
import cheerio from 'cheerio';
import fetch from 'node-fetch';
import { WINDOW } from "./data.mjs";
/**
* The default request implementation.
*
* @param options Options object.
* @param cb Callback function.
*/
function request(options, cb) {
let response = {
statusCode: 0,
headers: {}
};
const {
encoding
} = options;
(async () => {
const res = await fetch(options.url, {
method: options.method || 'GET',
headers: {
'User-Agent': '-',
...(options.headers || {})
},
compress: !!options.gzip
});
const {
status,
headers
} = res;
const headersRaw = headers.raw();
const headersObject = {};
for (const p of Object.keys(headersRaw)) {
headersObject[p] = headersRaw[p].join(', ');
}
response = {
statusCode: status,
headers: headersObject
};
const data = await res.buffer();
return encoding === null ? data : data.toString(encoding);
})().then(data => {
cb(null, response, data);
}, err => {
cb(err, response, null);
});
}
/**
* A request promise wrapper.
*
* @param req Request function.
* @param options Request options.
* @returns Request response and body.
*/
async function requestP(req, options) {
const r = await new Promise((resolve, reject) => {
req(options, (error, response, body) => {
if (error) {
reject(error);
return;
}
resolve({
response,
body
});
});
});
return r;
}
/**
* Code to create window.
*
* @param body HTML body.
* @returns JavaScript code.
*/
function codeWindow(body) {
return `(${WINDOW})(this,${JSON.stringify(body)})`;
}
/**
* Code to extract data from window.
*
* @param data Data object.
* @returns JavaScript code.
*/
function codeExtract(data) {
const body = Object.entries(data).map(a => a.join(':')).join(',');
return `(""+JSON.stringify({${body}}))`;
}
/**
* Extract script code from HTML code.
*
* @param html HTML code.
* @returns Script code.
*/
function extractScripts(html) {
const r = [];
const $ = cheerio.load(html);
$('script').each((_elI, el) => {
const data = $(el).html();
if (data) {
r.push(data);
}
});
return r;
}
/**
* Attempt to extract info from script.
*
* @param body HTML body.
* @param script Script code.
* @returns Result object or null.
*/
function extractScript(body, script) {
let result = null;
if (!script.includes('dlbutton')) {
return result;
} // Create a context with wich to run code in
// Creating the object with a null prototype is very important.
// Prevents host variables from leaking into the sanbox.
const ctxObj = Object.create(null);
if (ctxObj.toString) {
throw new Error('Failed to create object without prototype');
}
const ctx = vm.createContext(ctxObj);
const runOpts = {
timeout: 1000
}; // Setup environment.
const codePre = codeWindow(body); // Extract info from environment.
const codePost = codeExtract({
dlbutton: 'document.getElementById("dlbutton").href'
}); // Attempt to run code in sanbox and extract the info.
try {
// Run the pre script.
vm.runInContext(codePre, ctx, runOpts); // Run the script code.
vm.runInContext(script, ctx, runOpts); // Run the post script.
// Force return value to be string, with concatenation, NOT casting.
// This prevents any funny business from sandboxed code.
// eslint-disable-next-line
result = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));
} catch (err) {// Ignore failure.
}
return result;
}
/**
* Extract file info from a URL.
*
* @param uri The URI to extract info from.
* @param req Optional custom request function or null.
* @returns File info.
*/
export async function extract(uri, req = null) {
const requester = req || request;
const {
response,
body
} = await requestP(requester, {
url: uri,
gzip: true
});
const {
statusCode
} = response;
if (statusCode !== 200) {
throw new Error(`Invalid status code: ${statusCode}`);
}
const bodyType = typeof body;
if (bodyType !== 'string') {
throw new Error(`Invalid body type: ${bodyType}`);
}
const scripts = extractScripts(body);
let result = null;
for (const script of scripts) {
result = extractScript(body, script);
if (result) {
break;
}
}
if (!result || !result.dlbutton) {
throw new Error('Failed to extract info');
}
const download = url.resolve(uri, result.dlbutton);
const filename = decodeURI((url.parse(download).pathname || '').split('/').pop() || '') || null;
return {
download,
filename
};
}
//# sourceMappingURL=extract.mjs.map