UNPKG

17.7 kBJavaScriptView Raw
1"use strict";
2
3Object.defineProperty(exports, "__esModule", {
4 value: true
5});
6exports.default = exports.PAGE_UNLOADED_DURING_EXECUTION_ERROR_MESSAGE = void 0;
7
8var _cssTree = _interopRequireDefault(require("css-tree"));
9
10var _debug = _interopRequireDefault(require("debug"));
11
12var _pruneNonCriticalSelectors = _interopRequireDefault(require("./browser-sandbox/pruneNonCriticalSelectors"));
13
14var _replacePageCss = _interopRequireDefault(require("./browser-sandbox/replacePageCss"));
15
16var _postformatting = _interopRequireDefault(require("./postformatting"));
17
18var _selectorsProfile = _interopRequireDefault(require("./selectors-profile"));
19
20var _nonMatchingMediaQueryRemover = _interopRequireDefault(require("./non-matching-media-query-remover"));
21
22function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
23
24function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; var ownKeys = Object.keys(source); if (typeof Object.getOwnPropertySymbols === 'function') { ownKeys = ownKeys.concat(Object.getOwnPropertySymbols(source).filter(function (sym) { return Object.getOwnPropertyDescriptor(source, sym).enumerable; })); } ownKeys.forEach(function (key) { _defineProperty(target, key, source[key]); }); } return target; }
25
26function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
27
28function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); }
29
30function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); }
31
32function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); }
33
34function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) arr2[i] = arr[i]; return arr2; } }
35
36const debuglog = (0, _debug.default)('penthouse:core');
37const PUPPETEER_PAGE_UNLOADED_DURING_EXECUTION_ERROR_REGEX = /(Cannot find context with specified id|Execution context was destroyed)/;
38const PAGE_UNLOADED_DURING_EXECUTION_ERROR_MESSAGE = 'PAGE_UNLOADED_DURING_EXECUTION: Critical css generation script could not be executed.\n\nThis can happen if Penthouse was killed during execution, OR otherwise most commonly if the page navigates away after load, via setting window.location, meta tag refresh directive or similar. For the critical css generation to work the loaded page must stay: remove any redirects or move them to the server. You can also disable them on your end just for the critical css generation, for example via a query parameter.';
39exports.PAGE_UNLOADED_DURING_EXECUTION_ERROR_MESSAGE = PAGE_UNLOADED_DURING_EXECUTION_ERROR_MESSAGE;
40
41function blockinterceptedRequests(interceptedRequest) {
42 const isJsRequest = /\.js(\?.*)?$/.test(interceptedRequest.url);
43
44 if (isJsRequest) {
45 interceptedRequest.abort();
46 } else {
47 interceptedRequest.continue();
48 }
49}
50
51function loadPage(page, url, timeout, pageLoadSkipTimeout, allowedResponseCode) {
52 debuglog('page load start');
53 let waitingForPageLoad = true;
54 let loadPagePromise = page.goto(url);
55
56 if (pageLoadSkipTimeout) {
57 loadPagePromise = Promise.race([loadPagePromise, new Promise(resolve => {
58 // _abort_ page load after X time,
59 // in order to deal with spammy pages that keep sending non-critical requests
60 // (tracking etc), which would otherwise never load.
61 // With JS disabled it just shouldn't take that many seconds to load what's needed
62 // for critical viewport.
63 setTimeout(() => {
64 if (waitingForPageLoad) {
65 debuglog('page load waiting ABORTED after ' + pageLoadSkipTimeout / 1000 + 's. ');
66 resolve();
67 }
68 }, pageLoadSkipTimeout);
69 })]);
70 }
71
72 return loadPagePromise.then(response => {
73 if (typeof allowedResponseCode !== 'undefined') {
74 checkResponseStatus(allowedResponseCode, response);
75 }
76
77 waitingForPageLoad = false;
78 debuglog('page load DONE');
79 });
80}
81
82function checkResponseStatus(allowedResponseCode, response) {
83 var errorMessage;
84
85 if (typeof allowedResponseCode === 'number' && response.status() !== allowedResponseCode) {
86 errorMessage = `Server response status ${response.status()} isn't matching allowedResponseCode: ${allowedResponseCode}.`;
87 } else if (typeof allowedResponseCode === 'object' && allowedResponseCode.constructor.name === 'RegExp' && !response.status().toString().match(allowedResponseCode)) {
88 errorMessage = `Server response status ${response.status()} isn't matching allowedResponseCode: ${allowedResponseCode.toString()}.`;
89 } else if (typeof allowedResponseCode === 'function' && !allowedResponseCode.call(this, response)) {
90 errorMessage = `Server response status ${response.status()} isn't matching allowedResponseCode.`;
91 }
92
93 if (errorMessage) {
94 throw new Error(errorMessage);
95 }
96}
97
98function setupBlockJsRequests(page) {
99 page.on('request', blockinterceptedRequests);
100 return page.setRequestInterception(true);
101}
102
103async function astFromCss({
104 cssString,
105 strict
106}) {
107 // breaks puppeteer
108 const css = cssString.replace(/￿/g, '\f042');
109 let parsingErrors = [];
110 debuglog('parse ast START');
111
112 let ast = _cssTree.default.parse(css, {
113 onParseError: error => parsingErrors.push(error.formattedMessage)
114 });
115
116 debuglog(`parse ast DONE (with ${parsingErrors.length} errors)`);
117
118 if (parsingErrors.length && strict === true) {
119 // NOTE: only informing about first error, even if there were more than one.
120 const parsingErrorMessage = parsingErrors[0];
121 throw new Error(`AST parser (css-tree) found ${parsingErrors.length} errors in CSS.
122 Breaking because in strict mode.
123 The first error was:
124 ` + parsingErrorMessage);
125 }
126
127 return ast;
128}
129
130async function preparePage({
131 page,
132 pagePromise,
133 width,
134 height,
135 cookies,
136 userAgent,
137 customPageHeaders,
138 blockJSRequests,
139 cleanupAndExit,
140 getHasExited
141}) {
142 let reusedPage;
143
144 try {
145 const pagePromiseResult = await pagePromise;
146 page = pagePromiseResult.page;
147 reusedPage = pagePromiseResult.reused;
148 } catch (e) {
149 debuglog('unexpected: could not get an open browser page' + e);
150 return;
151 } // we already exited while page was opening, stop execution
152 // (strict mode ast css parsing erros)
153
154
155 if (getHasExited()) {
156 return;
157 }
158
159 debuglog('open page ready in browser'); // We set the viewport size in the browser when it launches,
160 // and then re-use it for each page (to avoid extra work).
161 // Only if later pages use a different viewport size do we need to
162 // update it here.
163
164 let setViewportPromise = Promise.resolve();
165 const currentViewport = page.viewport();
166
167 if (currentViewport.width !== width || currentViewport.height !== height) {
168 setViewportPromise = page.setViewport({
169 width,
170 height
171 }).then(() => debuglog('viewport size updated'));
172 }
173
174 const setUserAgentPromise = page.setUserAgent(userAgent).then(() => debuglog('userAgent set'));
175 let setCustomPageHeadersPromise = Promise.resolve();
176
177 if (customPageHeaders && Object.keys(customPageHeaders).length) {
178 try {
179 setCustomPageHeadersPromise = page.setExtraHTTPHeaders(customPageHeaders).then(() => debuglog('customPageHeaders set:' + JSON.stringify(customPageHeaders)));
180 } catch (e) {
181 debuglog('failed setting extra http headers: ' + e);
182 }
183 }
184
185 let setCookiesPromise = Promise.resolve();
186
187 if (cookies) {
188 try {
189 var _page;
190
191 setCookiesPromise = (_page = page).setCookie.apply(_page, _toConsumableArray(cookies)).then(() => debuglog('cookie(s) set: ' + JSON.stringify(cookies)));
192 } catch (e) {
193 debuglog('failed to set cookies: ' + e);
194 }
195 } // assumes the page was already configured from previous call!
196
197
198 if (reusedPage) {
199 return Promise.all([setViewportPromise, setUserAgentPromise, setCustomPageHeadersPromise, setCookiesPromise]).then(() => {
200 debuglog('preparePage DONE');
201 return page;
202 });
203 } // disable Puppeteer navigation timeouts;
204 // Penthouse tracks these internally instead.
205
206
207 page.setDefaultNavigationTimeout(0);
208 let blockJSRequestsPromise;
209
210 if (blockJSRequests) {
211 // NOTE: with JS disabled we cannot use JS timers inside page.evaluate
212 // (setTimeout, setInterval), however requestAnimationFrame works.
213 blockJSRequestsPromise = Promise.all([page.setJavaScriptEnabled(false), setupBlockJsRequests(page)]).then(() => {
214 debuglog('blocking js requests DONE');
215 });
216 }
217
218 page.on('error', error => {
219 debuglog('page error: ' + error);
220 cleanupAndExit({
221 error
222 });
223 });
224 page.on('console', msg => {
225 const text = msg.text ? typeof msg.text === 'function' ? msg.text() : msg.text : msg; // pass through log messages
226 // - the ones sent by penthouse for debugging has 'debug: ' prefix.
227
228 if (/^debug: /.test(text)) {
229 debuglog(text.replace(/^debug: /, ''));
230 }
231 });
232 debuglog('page event listeners set');
233 return Promise.all([setViewportPromise, setUserAgentPromise, setCustomPageHeadersPromise, setCookiesPromise, blockJSRequestsPromise]).then(() => {
234 debuglog('preparePage DONE');
235 return page;
236 });
237}
238
239async function grabPageScreenshot({
240 type,
241 page,
242 screenshots,
243 screenshotExtension,
244 debuglog
245}) {
246 const path = screenshots.basePath + `-${type}` + screenshotExtension;
247 debuglog(`take ${type} screenshot, START`);
248 return page.screenshot(_objectSpread({}, screenshots, {
249 path
250 })).then(() => debuglog(`take ${type} screenshot DONE, path: ${path}`));
251}
252
253async function pruneNonCriticalCssLauncher({
254 pagePromise,
255 url,
256 cssString,
257 width,
258 height,
259 forceInclude,
260 forceExclude,
261 strict,
262 userAgent,
263 renderWaitTime,
264 timeout,
265 pageLoadSkipTimeout,
266 blockJSRequests,
267 customPageHeaders,
268 cookies,
269 screenshots,
270 propertiesToRemove,
271 maxEmbeddedBase64Length,
272 keepLargerMediaQueries,
273 maxElementsToCheckPerSelector,
274 unstableKeepBrowserAlive,
275 allowedResponseCode
276}) {
277 let _hasExited = false; // hacky to get around _hasExited only available in the scope of this function
278
279 const getHasExited = () => _hasExited;
280
281 const takeScreenshots = screenshots && screenshots.basePath;
282 const screenshotExtension = takeScreenshots && screenshots.type === 'jpeg' ? '.jpg' : '.png';
283 return new Promise(async (resolve, reject) => {
284 debuglog('Penthouse core start');
285 let page;
286 let killTimeout;
287
288 async function cleanupAndExit({
289 error,
290 returnValue
291 }) {
292 if (_hasExited) {
293 return;
294 }
295
296 debuglog('cleanupAndExit');
297 _hasExited = true;
298 clearTimeout(killTimeout);
299
300 if (error) {
301 return reject(error);
302 }
303
304 if (page) {
305 let resetPromises = []; // reset page headers and cookies,
306 // since we re-use the page
307
308 if (customPageHeaders && Object.keys(customPageHeaders).length) {
309 try {
310 resetPromises.push(page.setExtraHTTPHeaders({}).then(() => debuglog('customPageHeaders reset')));
311 } catch (e) {
312 debuglog('failed resetting extra http headers: ' + e);
313 }
314 } // reset cookies
315
316
317 if (cookies && cookies.length) {
318 try {
319 var _page2;
320
321 resetPromises.push((_page2 = page).deleteCookie.apply(_page2, _toConsumableArray(cookies)).then(() => debuglog('cookie(s) reset: ')));
322 } catch (e) {
323 debuglog('failed to reset cookies: ' + e);
324 }
325 }
326
327 await Promise.all(resetPromises);
328 }
329
330 return resolve(returnValue);
331 }
332
333 killTimeout = setTimeout(() => {
334 cleanupAndExit({
335 error: new Error('Penthouse timed out after ' + timeout / 1000 + 's. ')
336 });
337 }, timeout); // 1. start preparing a browser page (tab) [NOT BLOCKING]
338
339 const updatedPagePromise = preparePage({
340 page,
341 pagePromise,
342 width,
343 height,
344 userAgent,
345 cookies,
346 customPageHeaders,
347 blockJSRequests,
348 cleanupAndExit,
349 getHasExited
350 }); // 2. parse ast
351 // -> [BLOCK FOR] AST parsing
352
353 let ast;
354
355 try {
356 ast = await astFromCss({
357 cssString,
358 strict
359 });
360 } catch (e) {
361 cleanupAndExit({
362 error: e
363 });
364 return;
365 } // 3. Further process the ast [BLOCKING]
366 // Strip out non matching media queries.
367 // Need to be done before buildSelectorProfile;
368 // (very fast but could be done together/in parallel in future)
369
370
371 (0, _nonMatchingMediaQueryRemover.default)(ast, width, height, keepLargerMediaQueries);
372 debuglog('stripped out non matching media queries'); // -> [BLOCK FOR] page preparation
373
374 page = await updatedPagePromise;
375
376 if (!page) {
377 cleanupAndExit({
378 error: 'Could not open page in browser'
379 });
380 return;
381 } // load the page (slow) [NOT BLOCKING]
382
383
384 const loadPagePromise = loadPage(page, url, timeout, pageLoadSkipTimeout, allowedResponseCode); // turn css to formatted selectorlist [NOT BLOCKING]
385
386 debuglog('turn css to formatted selectorlist START');
387 const buildSelectorProfilePromise = (0, _selectorsProfile.default)(ast, forceInclude && forceInclude.length ? forceInclude : null, forceExclude && forceExclude.length ? forceExclude : null).then(res => {
388 debuglog('turn css to formatted selectorlist DONE');
389 return res;
390 }); // -> [BLOCK FOR] page load
391
392 try {
393 await loadPagePromise;
394 } catch (e) {
395 cleanupAndExit({
396 error: e
397 });
398 return;
399 }
400
401 if (!page) {
402 // in case we timed out
403 debuglog('page load TIMED OUT');
404 cleanupAndExit({
405 error: new Error('Page load timed out')
406 });
407 return;
408 }
409
410 if (_hasExited) return; // Penthouse waits for the `load` event to fire
411 // (before loadPagePromise resolves; except for very slow loading pages)
412 // (via default puppeteer page.goto options.waitUntil setting,
413 // https://github.com/GoogleChrome/puppeteer/blob/v1.8.0/docs/api.md#pagegotourl-options)
414 // This means "all of the objects in the document are in the DOM, and all the images...
415 // have finished loading".
416 // This is necessary for Penthouse to know the correct layout of the critical viewport
417 // (well really, we would only need to load the critical viewport.. not possible?)
418 // However, @font-face's can be available later,
419 // and for this reason it can be useful to delay further - if screenshots are used.
420 // For this `renderWaitTime` can be used.
421 // Note: `renderWaitTime` is not a very good name,
422 // and just setting a time is also not the most effective solution to f.e. wait for fonts.
423 // In future probably deprecate and allow for a custom function instead (returning a promise).
424 // -> [BLOCK FOR] renderWaitTime - needs to be done before we take any screenshots
425
426 await new Promise(resolve => {
427 setTimeout(() => {
428 debuglog('waited for renderWaitTime: ' + renderWaitTime);
429 resolve();
430 }, renderWaitTime);
431 }); // take before screenshot (optional) [NOT BLOCKING]
432
433 const beforeScreenshotPromise = takeScreenshots ? grabPageScreenshot({
434 type: 'before',
435 page,
436 screenshots,
437 screenshotExtension,
438 debuglog
439 }) : Promise.resolve(); // -> [BLOCK FOR] css into formatted selectors list with "sourcemap"
440 // latter used to map back to full css rule
441
442 const {
443 selectors,
444 selectorNodeMap
445 } = await buildSelectorProfilePromise;
446
447 if (getHasExited()) {
448 return;
449 } // -> [BLOCK FOR] critical css selector pruning (in browser)
450
451
452 let criticalSelectors;
453
454 try {
455 criticalSelectors = await page.evaluate(_pruneNonCriticalSelectors.default, {
456 selectors,
457 renderWaitTime,
458 maxElementsToCheckPerSelector
459 }).then(criticalSelectors => {
460 debuglog('pruneNonCriticalSelectors done');
461 return criticalSelectors;
462 });
463 } catch (err) {
464 debuglog('pruneNonCriticalSelector threw an error: ' + err);
465 const errorDueToPageUnloaded = PUPPETEER_PAGE_UNLOADED_DURING_EXECUTION_ERROR_REGEX.test(err);
466 cleanupAndExit({
467 error: errorDueToPageUnloaded ? new Error(PAGE_UNLOADED_DURING_EXECUTION_ERROR_MESSAGE) : err
468 });
469 return;
470 }
471
472 if (getHasExited()) {
473 return;
474 } // -> [BLOCK FOR] clean up final ast for critical css
475
476
477 debuglog('AST cleanup START'); // NOTE: this function mutates the AST
478
479 (0, _postformatting.default)({
480 ast,
481 selectorNodeMap,
482 criticalSelectors,
483 propertiesToRemove,
484 maxEmbeddedBase64Length
485 });
486 debuglog('AST cleanup DONE'); // -> [BLOCK FOR] generate final critical css from critical ast
487
488 const css = _cssTree.default.generate(ast);
489
490 debuglog('generated CSS from AST'); // take after screenshot (optional) [BLOCKING]
491
492 if (takeScreenshots) {
493 // wait for the before screenshot, before start modifying the page
494 await beforeScreenshotPromise;
495 debuglog('inline critical styles for after screenshot');
496 await page.evaluate(_replacePageCss.default, {
497 css
498 }).then(() => {
499 return grabPageScreenshot({
500 type: 'after',
501 page,
502 screenshots,
503 screenshotExtension,
504 debuglog
505 });
506 });
507 }
508
509 debuglog('generateCriticalCss DONE');
510 cleanupAndExit({
511 returnValue: css
512 });
513 });
514}
515
516var _default = pruneNonCriticalCssLauncher;
517exports.default = _default;
\No newline at end of file