UNPKG

7.53 kBJavaScriptView Raw
1const url = require('url');
2const miniget = require('miniget');
3const querystring = require('querystring');
4
5
6// A shared cache to keep track of html5player.js tokens.
7exports.cache = new Map();
8
9
10/**
11 * Extract signature deciphering tokens from html5player file.
12 *
13 * @param {string} html5playerfile
14 * @param {Object} options
15 * @return {Promise<Array.<string>>}
16 */
17exports.getTokens = async (html5playerfile, options) => {
18 let cachedTokens = exports.cache.get(html5playerfile);
19 if (cachedTokens) {
20 return cachedTokens;
21 } else {
22 let [, body] = await miniget.promise(html5playerfile, options.requestOptions);
23 const tokens = exports.extractActions(body);
24 if (!tokens || !tokens.length) {
25 throw Error('Could not extract signature deciphering actions');
26 }
27
28 exports.cache.set(html5playerfile, tokens);
29 return tokens;
30 }
31};
32
33
34/**
35 * Decipher a signature based on action tokens.
36 *
37 * @param {Array.<string>} tokens
38 * @param {string} sig
39 * @return {string}
40 */
41exports.decipher = (tokens, sig) => {
42 sig = sig.split('');
43 for (let i = 0, len = tokens.length; i < len; i++) {
44 let token = tokens[i], pos;
45 switch (token[0]) {
46 case 'r':
47 sig = sig.reverse();
48 break;
49 case 'w':
50 pos = ~~token.slice(1);
51 sig = swapHeadAndPosition(sig, pos);
52 break;
53 case 's':
54 pos = ~~token.slice(1);
55 sig = sig.slice(pos);
56 break;
57 case 'p':
58 pos = ~~token.slice(1);
59 sig.splice(0, pos);
60 break;
61 }
62 }
63 return sig.join('');
64};
65
66
67/**
68 * Swaps the first element of an array with one of given position.
69 *
70 * @param {Array.<Object>} arr
71 * @param {number} position
72 * @return {Array.<Object>}
73 */
74const swapHeadAndPosition = (arr, position) => {
75 const first = arr[0];
76 arr[0] = arr[position % arr.length];
77 arr[position] = first;
78 return arr;
79};
80
81
82const jsVarStr = '[a-zA-Z_\\$][a-zA-Z_0-9]*';
83const jsSingleQuoteStr = `'[^'\\\\]*(:?\\\\[\\s\\S][^'\\\\]*)*'`;
84const jsDoubleQuoteStr = `"[^"\\\\]*(:?\\\\[\\s\\S][^"\\\\]*)*"`;
85const jsQuoteStr = `(?:${jsSingleQuoteStr}|${jsDoubleQuoteStr})`;
86const jsKeyStr = `(?:${jsVarStr}|${jsQuoteStr})`;
87const jsPropStr = `(?:\\.${jsVarStr}|\\[${jsQuoteStr}\\])`;
88const jsEmptyStr = `(?:''|"")`;
89const reverseStr = ':function\\(a\\)\\{' +
90 '(?:return )?a\\.reverse\\(\\)' +
91'\\}';
92const sliceStr = ':function\\(a,b\\)\\{' +
93 'return a\\.slice\\(b\\)' +
94'\\}';
95const spliceStr = ':function\\(a,b\\)\\{' +
96 'a\\.splice\\(0,b\\)' +
97'\\}';
98const swapStr = ':function\\(a,b\\)\\{' +
99 'var c=a\\[0\\];a\\[0\\]=a\\[b(?:%a\\.length)?\\];a\\[b(?:%a\\.length)?\\]=c(?:;return a)?' +
100'\\}';
101const actionsObjRegexp = new RegExp(
102 `var (${jsVarStr})=\\{((?:(?:` +
103 jsKeyStr + reverseStr + '|' +
104 jsKeyStr + sliceStr + '|' +
105 jsKeyStr + spliceStr + '|' +
106 jsKeyStr + swapStr +
107 '),?\\r?\\n?)+)\\};'
108);
109const actionsFuncRegexp = new RegExp(`function(?: ${jsVarStr})?\\(a\\)\\{` +
110 `a=a\\.split\\(${jsEmptyStr}\\);\\s*` +
111 `((?:(?:a=)?${jsVarStr}` +
112 jsPropStr +
113 '\\(a,\\d+\\);)+)' +
114 `return a\\.join\\(${jsEmptyStr}\\)` +
115 '\\}'
116);
117const reverseRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${reverseStr}`, 'm');
118const sliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${sliceStr}`, 'm');
119const spliceRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${spliceStr}`, 'm');
120const swapRegexp = new RegExp(`(?:^|,)(${jsKeyStr})${swapStr}`, 'm');
121
122
123/**
124 * Extracts the actions that should be taken to decipher a signature.
125 *
126 * This searches for a function that performs string manipulations on
127 * the signature. We already know what the 3 possible changes to a signature
128 * are in order to decipher it. There is
129 *
130 * * Reversing the string.
131 * * Removing a number of characters from the beginning.
132 * * Swapping the first character with another position.
133 *
134 * Note, `Array#slice()` used to be used instead of `Array#splice()`,
135 * it's kept in case we encounter any older html5player files.
136 *
137 * After retrieving the function that does this, we can see what actions
138 * it takes on a signature.
139 *
140 * @param {string} body
141 * @return {Array.<string>}
142 */
143exports.extractActions = (body) => {
144 const objResult = actionsObjRegexp.exec(body);
145 const funcResult = actionsFuncRegexp.exec(body);
146 if (!objResult || !funcResult) { return null; }
147
148 const obj = objResult[1].replace(/\$/g, '\\$');
149 const objBody = objResult[2].replace(/\$/g, '\\$');
150 const funcBody = funcResult[1].replace(/\$/g, '\\$');
151
152 let result = reverseRegexp.exec(objBody);
153 const reverseKey = result && result[1]
154 .replace(/\$/g, '\\$')
155 .replace(/\$|^'|^"|'$|"$/g, '');
156 result = sliceRegexp.exec(objBody);
157 const sliceKey = result && result[1]
158 .replace(/\$/g, '\\$')
159 .replace(/\$|^'|^"|'$|"$/g, '');
160 result = spliceRegexp.exec(objBody);
161 const spliceKey = result && result[1]
162 .replace(/\$/g, '\\$')
163 .replace(/\$|^'|^"|'$|"$/g, '');
164 result = swapRegexp.exec(objBody);
165 const swapKey = result && result[1]
166 .replace(/\$/g, '\\$')
167 .replace(/\$|^'|^"|'$|"$/g, '');
168
169 const keys = `(${[reverseKey, sliceKey, spliceKey, swapKey].join('|')})`;
170 const myreg = '(?:a=)?' + obj +
171 `(?:\\.${keys}|\\['${keys}'\\]|\\["${keys}"\\])` +
172 '\\(a,(\\d+)\\)';
173 const tokenizeRegexp = new RegExp(myreg, 'g');
174 const tokens = [];
175 while ((result = tokenizeRegexp.exec(funcBody)) !== null) {
176 let key = result[1] || result[2] || result[3];
177 switch (key) {
178 case swapKey:
179 tokens.push('w' + result[4]);
180 break;
181 case reverseKey:
182 tokens.push('r');
183 break;
184 case sliceKey:
185 tokens.push('s' + result[4]);
186 break;
187 case spliceKey:
188 tokens.push('p' + result[4]);
189 break;
190 }
191 }
192 return tokens;
193};
194
195
196/**
197 * @param {Object} format
198 * @param {string} sig
199 * @param {boolean} debug
200 */
201exports.setDownloadURL = (format, sig, debug) => {
202 let decodedUrl;
203 if (format.url) {
204 decodedUrl = format.url;
205 } else {
206 if (debug) {
207 console.warn('Download url not found for itag ' + format.itag);
208 }
209 return;
210 }
211
212 try {
213 decodedUrl = decodeURIComponent(decodedUrl);
214 } catch (err) {
215 if (debug) {
216 console.warn('Could not decode url: ' + err.message);
217 }
218 return;
219 }
220
221 // Make some adjustments to the final url.
222 const parsedUrl = url.parse(decodedUrl, true);
223
224 // Deleting the `search` part is necessary otherwise changes to
225 // `query` won't reflect when running `url.format()`
226 delete parsedUrl.search;
227
228 let query = parsedUrl.query;
229
230 // This is needed for a speedier download.
231 // See https://github.com/fent/node-ytdl-core/issues/127
232 query.ratebypass = 'yes';
233 if (sig) {
234 // When YouTube provides a `sp` parameter the signature `sig` must go
235 // into the parameter it specifies.
236 // See https://github.com/fent/node-ytdl-core/issues/417
237 if (format.sp) {
238 query[format.sp] = sig;
239 } else {
240 query.signature = sig;
241 }
242 }
243
244 format.url = url.format(parsedUrl);
245};
246
247
248/**
249 * Applies `sig.decipher()` to all format URL's.
250 *
251 * @param {Array.<Object>} formats
252 * @param {Array.<string>} tokens
253 * @param {boolean} debug
254 */
255exports.decipherFormats = (formats, tokens, debug) => {
256 formats.forEach((format) => {
257 if (format.cipher) {
258 Object.assign(format, querystring.parse(format.cipher));
259 delete format.cipher;
260 }
261 const sig = tokens && format.s ? exports.decipher(tokens, format.s) : null;
262 exports.setDownloadURL(format, sig, debug);
263 });
264};