UNPKG

7.52 kBSource Map (JSON)View Raw
1{"version":3,"sources":["extract.ts"],"names":["requestP","req","options","r","Promise","resolve","reject","error","response","body","extractScripts","html","$","cheerio","load","each","elI","el","data","push","extractScript","script","result","includes","ctx","vm","createContext","Object","create","runOpts","timeout","codePre","join","codePost","runInContext","JSON","parse","err","extract","uri","requester","request","url","statusCode","Error","bodyType","scripts","dlbutton","download","filename","decodeURI","pathname","split","pop"],"mappings":";;;;;;;;;AAAA;;AACA;;AAEA;;AACA;;AAEA;;;;;;;AAOA,eAAeA,QAAf,CACCC,GADD,EAECC,OAFD,EAGE;AACD,QAAMC,CAAC,GAAG,MAAM,IAAIC,OAAJ,CAWb,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACvBL,IAAAA,GAAG,CAACC,OAAD,EAAU,CAACK,KAAD,EAAQC,QAAR,EAAkBC,IAAlB,KAA2B;AACvC,UAAIF,KAAJ,EAAW;AACVD,QAAAA,MAAM,CAACC,KAAD,CAAN;AACA;AACA;;AACDF,MAAAA,OAAO,CAAC;AACPG,QAAAA,QADO;AAEPC,QAAAA;AAFO,OAAD,CAAP;AAIA,KATE,CAAH;AAUA,GAtBe,CAAhB;AAuBA,SAAON,CAAP;AACA;AAED;;;;;;;;AAMA,SAASO,cAAT,CAAwBC,IAAxB,EAAsC;AACrC,QAAMR,CAAW,GAAG,EAApB;;AACA,QAAMS,CAAC,GAAGC,iBAAQC,IAAR,CAAaH,IAAb,CAAV;;AACAC,EAAAA,CAAC,CAAC,QAAD,CAAD,CAAYG,IAAZ,CAAiB,CAACC,GAAD,EAAMC,EAAN,KAAa;AAC7B,UAAMC,IAAI,GAAGN,CAAC,CAACK,EAAD,CAAD,CAAMN,IAAN,EAAb;;AACA,QAAIO,IAAJ,EAAU;AACTf,MAAAA,CAAC,CAACgB,IAAF,CAAOD,IAAP;AACA;AACD,GALD;AAMA,SAAOf,CAAP;AACA;AAED;;;;;;;;AAMA,SAASiB,aAAT,CAAuBC,MAAvB,EAAuC;AACtC,MAAIC,MAAqB,GAAG,IAA5B;;AACA,MAAI,CAACD,MAAM,CAACE,QAAP,CAAgB,UAAhB,CAAL,EAAkC;AACjC,WAAOD,MAAP;AACA,GAJqC,CAMtC;AACA;AACA;;;AACA,QAAME,GAAG,GAAGC,YAAGC,aAAH,CAAiBC,MAAM,CAACC,MAAP,CAAc,IAAd,CAAjB,CAAZ;;AACA,QAAMC,OAAO,GAAG;AACfC,IAAAA,OAAO,EAAE;AADM,GAAhB,CAVsC,CActC;;AACA,QAAMC,OAAO,GAAG;AACf;AACA,kBAFe,EAGf,2BAHe,EAId,kBAJc,EAKb,iBALa,EAMb,cANa,EAOd,IAPc,EAQd,mCARc,EASb,sBATa,EAUd,GAVc,EAWd,WAXc,EAYf;AACA;AAbe,IAcdC,IAdc,CAcT,IAdS,CAAhB,CAfsC,CA+BtC;;AACA,QAAMC,QAAQ,GAAG;AAChB;AACA,oBAFgB,EAGf,sDAHe,EAIhB;AACA;AALgB,IAMfD,IANe,CAMV,IANU,CAAjB,CAhCsC,CAwCtC;;AACA,MAAI;AACH;AACAP,gBAAGS,YAAH,CAAgBH,OAAhB,EAAyBP,GAAzB,EAA8BK,OAA9B,EAFG,CAIH;;;AACAJ,gBAAGS,YAAH,CAAgBb,MAAhB,EAAwBG,GAAxB,EAA6BK,OAA7B,EALG,CAOH;AACA;AACA;AACA;;;AACAP,IAAAA,MAAM,GAAGa,IAAI,CAACC,KAAL,CAAW,KAAKX,YAAGS,YAAH,CAAgBD,QAAhB,EAA0BT,GAA1B,EAA+BK,OAA/B,CAAhB,CAAT;AACA,GAZD,CAaA,OAAOQ,GAAP,EAAY,CACX;AACA;;AACD,SAAOf,MAAP;AACA;AAED;;;;;;;;;AAOO,eAAegB,OAAf,CACNC,GADM,EAENtC,GAA0B,GAAG,IAFvB,EAGL;AACD,QAAMuC,SAAS,GAAGvC,GAAG,IAAIwC,gBAAzB;AACA,QAAM;AAACjC,IAAAA,QAAD;AAAWC,IAAAA;AAAX,MAAmB,MAAMT,QAAQ,CAACwC,SAAD,EAAY;AAClDE,IAAAA,GAAG,EAAEH;AAD6C,GAAZ,CAAvC;AAGA,QAAM;AAACI,IAAAA;AAAD,MAAenC,QAArB;;AACA,MAAImC,UAAU,KAAK,GAAnB,EAAwB;AACvB,UAAM,IAAIC,KAAJ,CAAW,wBAAuBD,UAAW,EAA7C,CAAN;AACA;;AACD,QAAME,QAAQ,GAAG,OAAOpC,IAAxB;;AACA,MAAIoC,QAAQ,KAAK,QAAjB,EAA2B;AAC1B,UAAM,IAAID,KAAJ,CAAW,sBAAqBC,QAAS,EAAzC,CAAN;AACA;;AAED,QAAMC,OAAO,GAAGpC,cAAc,CAACD,IAAD,CAA9B;AACA,MAAIa,MAAkB,GAAG,IAAzB;;AACA,OAAK,MAAMD,MAAX,IAAqByB,OAArB,EAA8B;AAC7BxB,IAAAA,MAAM,GAAGF,aAAa,CAACC,MAAD,CAAtB;;AACA,QAAIC,MAAJ,EAAY;AACX;AACA;AACD;;AACD,MAAI,CAACA,MAAD,IAAW,CAACA,MAAM,CAACyB,QAAvB,EAAiC;AAChC,UAAM,IAAIH,KAAJ,CAAU,wBAAV,CAAN;AACA;;AAED,QAAMI,QAAQ,GAAGN,aAAIrC,OAAJ,CAAYkC,GAAZ,EAAiBjB,MAAM,CAACyB,QAAxB,CAAjB;;AACA,QAAME,QAAQ,GAAGC,SAAS,CACzB,CAACR,aAAIN,KAAJ,CAAUY,QAAV,EAAoBG,QAApB,IAAgC,EAAjC,EAAqCC,KAArC,CAA2C,GAA3C,EAAgDC,GAAhD,MAAyD,EADhC,CAAT,IAEZ,IAFL;AAIA,SAAO;AACNL,IAAAA,QADM;AAENC,IAAAA;AAFM,GAAP;AAIA","sourcesContent":["import vm from 'vm';\nimport url from 'url';\n\nimport request from 'request';\nimport cheerio from 'cheerio';\n\n/**\n * A request promise wrapper.\n *\n * @param req Request function.\n * @param options Request options.\n * @returns Request response and body.\n */\nasync function requestP(\n\treq: typeof request,\n\toptions: request.OptionsWithUrl\n) {\n\tconst r = await new Promise<{\n\n\t\t/**\n\t\t * Response object.\n\t\t */\n\t\tresponse: request.Response;\n\n\t\t/**\n\t\t * Response body.\n\t\t */\n\t\tbody: any;\n\t}>((resolve, reject) => {\n\t\treq(options, (error, response, body) => {\n\t\t\tif (error) {\n\t\t\t\treject(error);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tresolve({\n\t\t\t\tresponse,\n\t\t\t\tbody\n\t\t\t});\n\t\t});\n\t});\n\treturn r;\n}\n\n/**\n * Extract script code from HTML code.\n *\n * @param html HTML code.\n * @returns Script code.\n */\nfunction extractScripts(html: string) {\n\tconst r: string[] = [];\n\tconst $ = cheerio.load(html);\n\t$('script').each((elI, el) => {\n\t\tconst data = $(el).html();\n\t\tif (data) {\n\t\t\tr.push(data);\n\t\t}\n\t});\n\treturn r;\n}\n\n/**\n * Attempt to extract info from script.\n *\n * @param script Script code.\n * @returns Result object or null.\n */\nfunction extractScript(script: string) {\n\tlet result: object | null = null;\n\tif (!script.includes('dlbutton')) {\n\t\treturn result;\n\t}\n\n\t// Create a context with wich to run code in\n\t// Creating the object with a null prototype is very important.\n\t// Prevents host variables from leaking into the sanbox.\n\tconst ctx = vm.createContext(Object.create(null));\n\tconst runOpts = {\n\t\ttimeout: 1000\n\t};\n\n\t// Setup environment.\n\tconst codePre = [\n\t\t/* eslint-disable @typescript-eslint/indent */\n\t\t'window = this;',\n\t\t'document = (function(r) {',\n\t\t\t'var elements = {',\n\t\t\t\t'\"dlbutton\": {},',\n\t\t\t\t'\"fimage\": {}',\n\t\t\t'};',\n\t\t\t'r.getElementById = function(id) {',\n\t\t\t\t'return elements[id];',\n\t\t\t'}',\n\t\t\t'return r;',\n\t\t'})({});'\n\t\t/* eslint-enable @typescript-eslint/indent */\n\t].join('\\n');\n\n\t// Extract info from environment.\n\tconst codePost = [\n\t\t/* eslint-disable @typescript-eslint/indent */\n\t\t'JSON.stringify({',\n\t\t\t'\"dlbutton\": document.getElementById(\"dlbutton\").href',\n\t\t'})'\n\t\t/* eslint-enable @typescript-eslint/indent */\n\t].join('\\n');\n\n\t// Attempt to run code in sanbox and extract the info.\n\ttry {\n\t\t// Run the pre script.\n\t\tvm.runInContext(codePre, ctx, runOpts);\n\n\t\t// Run the script code.\n\t\tvm.runInContext(script, ctx, runOpts);\n\n\t\t// Run the post script.\n\t\t// Force return value to be string, with concatenation, NOT casting.\n\t\t// This prevents any funny business from sandboxed code.\n\t\t// eslint-disable-next-line\n\t\tresult = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));\n\t}\n\tcatch (err) {\n\t\t// Ignore failure.\n\t}\n\treturn result;\n}\n\n/**\n * Extract file info from a URL.\n *\n * @param uri The URI to extract info from.\n * @param req Optional custom request function or null.\n * @returns File info.\n */\nexport async function extract(\n\turi: string,\n\treq: typeof request | null = null\n) {\n\tconst requester = req || request;\n\tconst {response, body} = await requestP(requester, {\n\t\turl: uri\n\t});\n\tconst {statusCode} = response;\n\tif (statusCode !== 200) {\n\t\tthrow new Error(`Invalid status code: ${statusCode}`);\n\t}\n\tconst bodyType = typeof body;\n\tif (bodyType !== 'string') {\n\t\tthrow new Error(`Invalid body type: ${bodyType}`);\n\t}\n\n\tconst scripts = extractScripts(body);\n\tlet result: any | null = null;\n\tfor (const script of scripts) {\n\t\tresult = extractScript(script);\n\t\tif (result) {\n\t\t\tbreak;\n\t\t}\n\t}\n\tif (!result || !result.dlbutton) {\n\t\tthrow new Error('Failed to extract info');\n\t}\n\n\tconst download = url.resolve(uri, result.dlbutton);\n\tconst filename = decodeURI(\n\t\t(url.parse(download).pathname || '').split('/').pop() || ''\n\t) || null;\n\n\treturn {\n\t\tdownload,\n\t\tfilename\n\t};\n}\n"],"file":"extract.js","sourceRoot":"../src"}
\No newline at end of file