UNPKG

7.65 kBSource Map (JSON)View Raw
1{"version":3,"sources":["extract.ts"],"names":["vm","url","request","cheerio","requestP","req","options","r","Promise","resolve","reject","error","response","body","extractScripts","html","$","load","each","elI","el","data","push","extractScript","script","result","includes","ctx","createContext","Object","create","runOpts","timeout","codePre","join","codePost","runInContext","JSON","parse","err","extract","uri","requester","statusCode","Error","bodyType","scripts","dlbutton","download","filename","decodeURI","pathname","split","pop"],"mappings":"AAAA,OAAOA,EAAP,MAAe,IAAf;AACA,OAAOC,GAAP,MAAgB,KAAhB;AAEA,OAAOC,OAAP,MAAoB,SAApB;AACA,OAAOC,OAAP,MAAoB,SAApB;AAEA;;;;;;;;AAOA,eAAeC,QAAf,CACCC,GADD,EAECC,OAFD,EAGE;AACD,QAAMC,CAAC,GAAG,MAAM,IAAIC,OAAJ,CAWb,CAACC,OAAD,EAAUC,MAAV,KAAqB;AACvBL,IAAAA,GAAG,CAACC,OAAD,EAAU,CAACK,KAAD,EAAQC,QAAR,EAAkBC,IAAlB,KAA2B;AACvC,UAAIF,KAAJ,EAAW;AACVD,QAAAA,MAAM,CAACC,KAAD,CAAN;AACA;AACA;;AACDF,MAAAA,OAAO,CAAC;AACPG,QAAAA,QADO;AAEPC,QAAAA;AAFO,OAAD,CAAP;AAIA,KATE,CAAH;AAUA,GAtBe,CAAhB;AAuBA,SAAON,CAAP;AACA;AAED;;;;;;;;AAMA,SAASO,cAAT,CAAwBC,IAAxB,EAAsC;AACrC,QAAMR,CAAW,GAAG,EAApB;AACA,QAAMS,CAAC,GAAGb,OAAO,CAACc,IAAR,CAAaF,IAAb,CAAV;AACAC,EAAAA,CAAC,CAAC,QAAD,CAAD,CAAYE,IAAZ,CAAiB,CAACC,GAAD,EAAMC,EAAN,KAAa;AAC7B,UAAMC,IAAI,GAAGL,CAAC,CAACI,EAAD,CAAD,CAAML,IAAN,EAAb;;AACA,QAAIM,IAAJ,EAAU;AACTd,MAAAA,CAAC,CAACe,IAAF,CAAOD,IAAP;AACA;AACD,GALD;AAMA,SAAOd,CAAP;AACA;AAED;;;;;;;;AAMA,SAASgB,aAAT,CAAuBC,MAAvB,EAAuC;AACtC,MAAIC,MAAqB,GAAG,IAA5B;;AACA,MAAI,CAACD,MAAM,CAACE,QAAP,CAAgB,UAAhB,CAAL,EAAkC;AACjC,WAAOD,MAAP;AACA,GAJqC,CAMtC;AACA;AACA;;;AACA,QAAME,GAAG,GAAG3B,EAAE,CAAC4B,aAAH,CAAiBC,MAAM,CAACC,MAAP,CAAc,IAAd,CAAjB,CAAZ;AACA,QAAMC,OAAO,GAAG;AACfC,IAAAA,OAAO,EAAE;AADM,GAAhB,CAVsC,CActC;;AACA,QAAMC,OAAO,GAAG;AACf;AACA,kBAFe,EAGf,2BAHe,EAId,kBAJc,EAKb,iBALa,EAMb,cANa,EAOd,IAPc,EAQd,mCARc,EASb,sBATa,EAUd,GAVc,EAWd,WAXc,EAYf;AACA;AAbe,IAcdC,IAdc,CAcT,IAdS,CAAhB,CAfsC,CA+BtC;;AACA,QAAMC,QAAQ,GAAG;AAChB;AACA,oBAFgB,EAGf,sDAHe,EAIhB;AACA;AALgB,IAMfD,IANe,CAMV,IANU,CAAjB,CAhCsC,CAwCtC;;AACA,MAAI;AACH;AACAlC,IAAAA,EAAE,CAACoC,YAAH,CAAgBH,OAAhB,EAAyBN,GAAzB,EAA8BI,OAA9B,EAFG,CAIH;;AACA/B,IAAAA,EAAE,CAACoC,YAAH,CAAgBZ,MAAhB,EAAwBG,GAAxB,EAA6BI,OAA7B,EALG,CAOH;AACA;AACA;AACA;;AACAN,IAAAA,MAAM,GAAGY,IAAI,CAACC,KAAL,CAAW,KAAKtC,EAAE,CAACoC,YAAH,CAAgBD,QAAhB,EAA0BR,GAA1B,EAA+BI,OAA/B,CAAhB,CAAT;AACA,GAZD,CAaA,OAAOQ,GAAP,EAAY,CACX;AACA;;AACD,SAAOd,MAAP;AACA;AAED;;;;;;;;;AAOA,OAAO,eAAee,OAAf,CACNC,GADM,EAENpC,GAA0B,GAAG,IAFvB,EAGL;AACD,QAAMqC,SAAS,GAAGrC,GAAG,IAAIH,OAAzB;AACA,QAAM;AAACU,IAAAA,QAAD;AAAWC,IAAAA;AAAX,MAAmB,MAAMT,QAAQ,CAACsC,SAAD,EAAY;AAClDzC,IAAAA,GAAG,EAAEwC;AAD6C,GAAZ,CAAvC;AAGA,QAAM;AAACE,IAAAA;AAAD,MAAe/B,QAArB;;AACA,MAAI+B,UAAU,KAAK,GAAnB,EAAwB;AACvB,UAAM,IAAIC,KAAJ,CAAW,wBAAuBD,UAAW,EAA7C,CAAN;AACA;;AACD,QAAME,QAAQ,GAAG,OAAOhC,IAAxB;;AACA,MAAIgC,QAAQ,KAAK,QAAjB,EAA2B;AAC1B,UAAM,IAAID,KAAJ,CAAW,sBAAqBC,QAAS,EAAzC,CAAN;AACA;;AAED,QAAMC,OAAO,GAAGhC,cAAc,CAACD,IAAD,CAA9B;AACA,MAAIY,MAAkB,GAAG,IAAzB;;AACA,OAAK,MAAMD,MAAX,IAAqBsB,OAArB,EAA8B;AAC7BrB,IAAAA,MAAM,GAAGF,aAAa,CAACC,MAAD,CAAtB;;AACA,QAAIC,MAAJ,EAAY;AACX;AACA;AACD;;AACD,MAAI,CAACA,MAAD,IAAW,CAACA,MAAM,CAACsB,QAAvB,EAAiC;AAChC,UAAM,IAAIH,KAAJ,CAAU,wBAAV,CAAN;AACA;;AAED,QAAMI,QAAQ,GAAG/C,GAAG,CAACQ,OAAJ,CAAYgC,GAAZ,EAAiBhB,MAAM,CAACsB,QAAxB,CAAjB;AACA,QAAME,QAAQ,GAAGC,SAAS,CACzB,CAACjD,GAAG,CAACqC,KAAJ,CAAUU,QAAV,EAAoBG,QAApB,IAAgC,EAAjC,EAAqCC,KAArC,CAA2C,GAA3C,EAAgDC,GAAhD,MAAyD,EADhC,CAAT,IAEZ,IAFL;AAIA,SAAO;AACNL,IAAAA,QADM;AAENC,IAAAA;AAFM,GAAP;AAIA","sourcesContent":["import vm from 'vm';\nimport url from 'url';\n\nimport request from 'request';\nimport cheerio from 'cheerio';\n\n/**\n * A request promise wrapper.\n *\n * @param req Request function.\n * @param options Request options.\n * @returns Request response and body.\n */\nasync function requestP(\n\treq: typeof request,\n\toptions: request.OptionsWithUrl\n) {\n\tconst r = await new Promise<{\n\n\t\t/**\n\t\t * Response object.\n\t\t */\n\t\tresponse: request.Response;\n\n\t\t/**\n\t\t * Response body.\n\t\t */\n\t\tbody: any;\n\t}>((resolve, reject) => {\n\t\treq(options, (error, response, body) => {\n\t\t\tif (error) {\n\t\t\t\treject(error);\n\t\t\t\treturn;\n\t\t\t}\n\t\t\tresolve({\n\t\t\t\tresponse,\n\t\t\t\tbody\n\t\t\t});\n\t\t});\n\t});\n\treturn r;\n}\n\n/**\n * Extract script code from HTML code.\n *\n * @param html HTML code.\n * @returns Script code.\n */\nfunction extractScripts(html: string) {\n\tconst r: string[] = [];\n\tconst $ = cheerio.load(html);\n\t$('script').each((elI, el) => {\n\t\tconst data = $(el).html();\n\t\tif (data) {\n\t\t\tr.push(data);\n\t\t}\n\t});\n\treturn r;\n}\n\n/**\n * Attempt to extract info from script.\n *\n * @param script Script code.\n * @returns Result object or null.\n */\nfunction extractScript(script: string) {\n\tlet result: object | null = null;\n\tif (!script.includes('dlbutton')) {\n\t\treturn result;\n\t}\n\n\t// Create a context with wich to run code in\n\t// Creating the object with a null prototype is very important.\n\t// Prevents host variables from leaking into the sanbox.\n\tconst ctx = vm.createContext(Object.create(null));\n\tconst runOpts = {\n\t\ttimeout: 1000\n\t};\n\n\t// Setup environment.\n\tconst codePre = [\n\t\t/* eslint-disable @typescript-eslint/indent */\n\t\t'window = this;',\n\t\t'document = (function(r) {',\n\t\t\t'var elements = {',\n\t\t\t\t'\"dlbutton\": {},',\n\t\t\t\t'\"fimage\": {}',\n\t\t\t'};',\n\t\t\t'r.getElementById = function(id) {',\n\t\t\t\t'return elements[id];',\n\t\t\t'}',\n\t\t\t'return r;',\n\t\t'})({});'\n\t\t/* eslint-enable @typescript-eslint/indent */\n\t].join('\\n');\n\n\t// Extract info from environment.\n\tconst codePost = [\n\t\t/* eslint-disable @typescript-eslint/indent */\n\t\t'JSON.stringify({',\n\t\t\t'\"dlbutton\": document.getElementById(\"dlbutton\").href',\n\t\t'})'\n\t\t/* eslint-enable @typescript-eslint/indent */\n\t].join('\\n');\n\n\t// Attempt to run code in sanbox and extract the info.\n\ttry {\n\t\t// Run the pre script.\n\t\tvm.runInContext(codePre, ctx, runOpts);\n\n\t\t// Run the script code.\n\t\tvm.runInContext(script, ctx, runOpts);\n\n\t\t// Run the post script.\n\t\t// Force return value to be string, with concatenation, NOT casting.\n\t\t// This prevents any funny business from sandboxed code.\n\t\t// eslint-disable-next-line\n\t\tresult = JSON.parse('' + vm.runInContext(codePost, ctx, runOpts));\n\t}\n\tcatch (err) {\n\t\t// Ignore failure.\n\t}\n\treturn result;\n}\n\n/**\n * Extract file info from a URL.\n *\n * @param uri The URI to extract info from.\n * @param req Optional custom request function or null.\n * @returns File info.\n */\nexport async function extract(\n\turi: string,\n\treq: typeof request | null = null\n) {\n\tconst requester = req || request;\n\tconst {response, body} = await requestP(requester, {\n\t\turl: uri\n\t});\n\tconst {statusCode} = response;\n\tif (statusCode !== 200) {\n\t\tthrow new Error(`Invalid status code: ${statusCode}`);\n\t}\n\tconst bodyType = typeof body;\n\tif (bodyType !== 'string') {\n\t\tthrow new Error(`Invalid body type: ${bodyType}`);\n\t}\n\n\tconst scripts = extractScripts(body);\n\tlet result: any | null = null;\n\tfor (const script of scripts) {\n\t\tresult = extractScript(script);\n\t\tif (result) {\n\t\t\tbreak;\n\t\t}\n\t}\n\tif (!result || !result.dlbutton) {\n\t\tthrow new Error('Failed to extract info');\n\t}\n\n\tconst download = url.resolve(uri, result.dlbutton);\n\tconst filename = decodeURI(\n\t\t(url.parse(download).pathname || '').split('/').pop() || ''\n\t) || null;\n\n\treturn {\n\t\tdownload,\n\t\tfilename\n\t};\n}\n"],"file":"extract.mjs","sourceRoot":"../src"}
\No newline at end of file