UNPKG

1.88 kBJavaScriptView Raw
1/**
2 * Copyright 2017 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * @fileoverview Search developers.google.com/web for articles tagged
19 * "Headless Chrome" and scrape results from the results page.
20 */
21
22'use strict';
23
24const puppeteer = require('puppeteer');
25
26(async() => {
27 const browser = await puppeteer.launch();
28 const page = await browser.newPage();
29
30 await page.goto('https://developers.google.com/web/');
31
32 // Type into search box.
33 await page.type('#searchbox input', 'Headless Chrome');
34
35 // Wait for suggest overlay to appear and click "show all results".
36 const allResultsSelector = '.devsite-suggest-all-results';
37 await page.waitForSelector(allResultsSelector);
38 await page.click(allResultsSelector);
39
40 // Wait for the results page to load and display the results.
41 const resultsSelector = '.gsc-results .gsc-thumbnail-inside a.gs-title';
42 await page.waitForSelector(resultsSelector);
43
44 // Extract the results from the page.
45 const links = await page.evaluate(resultsSelector => {
46 const anchors = Array.from(document.querySelectorAll(resultsSelector));
47 return anchors.map(anchor => {
48 const title = anchor.textContent.split('|')[0].trim();
49 return `${title} - ${anchor.href}`;
50 });
51 }, resultsSelector);
52 console.log(links.join('\n'));
53
54 await browser.close();
55})();