1 | var q = require('q')
|
2 | var isCss = require('is-css')
|
3 | var isPresent = require('is-present')
|
4 | var isBlank = require('is-blank')
|
5 | var isUrl = require('is-url-superb')
|
6 | var fetch = require('node-fetch')
|
7 | var AbortController = require('abort-controller')
|
8 | var cheerio = require('cheerio')
|
9 | var normalizeUrl = require('normalize-url')
|
10 | var stripHtmlComments = require('strip-html-comments')
|
11 | var stripWaybackToolbar = require('strip-wayback-toolbar')
|
12 | var resolveCssImportUrls = require('resolve-css-import-urls')
|
13 | var ua = require('ua-string')
|
14 |
|
15 | var getLinkContents = require('./utils/get-link-contents')
|
16 | var createLink = require('./utils/create-link')
|
17 |
|
18 | module.exports = function (url, options, html) {
|
19 | var deferred = q.defer()
|
20 | options = options || {}
|
21 | options.timeout = options.timeout || 5000
|
22 |
|
23 | if (typeof url !== 'string' || isBlank(url) || !isUrl(url)) {
|
24 | throw new TypeError('get-css expected a url as a string')
|
25 | }
|
26 |
|
27 | url = normalizeUrl(url, { stripWWW: false })
|
28 |
|
29 | if (options.ignoreCerts) {
|
30 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'
|
31 | }
|
32 |
|
33 | var status = {
|
34 | parsed: 0,
|
35 | total: 0,
|
36 | }
|
37 |
|
38 | var result = {
|
39 | links: [],
|
40 | styles: [],
|
41 | css: '',
|
42 | }
|
43 |
|
44 | function handleResolve() {
|
45 | if (status.parsed >= status.total) {
|
46 | deferred.resolve(result)
|
47 | }
|
48 | }
|
49 |
|
50 | function parseHtml(html) {
|
51 | var stripWayback = options.stripWayback || false
|
52 | if (stripWayback) {
|
53 | html = stripWaybackToolbar(html)
|
54 | }
|
55 | var $ = cheerio.load(html)
|
56 | result.pageTitle = $('head > title').text()
|
57 | result.html = html
|
58 |
|
59 | $('[rel=stylesheet]').each(function () {
|
60 | var link = $(this).attr('href')
|
61 | if (isPresent(link)) {
|
62 | result.links.push(createLink(link, url))
|
63 | } else {
|
64 | result.styles.push(stripHtmlComments($(this).html()))
|
65 | }
|
66 | })
|
67 |
|
68 | $('style').each(function () {
|
69 | result.styles.push(stripHtmlComments($(this).html()))
|
70 | })
|
71 |
|
72 | status.total = result.links.length + result.styles.length
|
73 | if (!status.total) {
|
74 | handleResolve()
|
75 | }
|
76 |
|
77 | result.links.forEach(function (link) {
|
78 | getLinkContents(link.url, options)
|
79 | .then(function (css) {
|
80 | handleCssFromLink(link, css)
|
81 | })
|
82 | .catch(function (error) {
|
83 | link.error = error
|
84 | status.parsed++
|
85 | handleResolve()
|
86 | })
|
87 | })
|
88 |
|
89 | result.styles.forEach(function (css) {
|
90 | result.css += css
|
91 | status.parsed++
|
92 | handleResolve()
|
93 | })
|
94 | }
|
95 |
|
96 | function handleCssFromLink(link, css) {
|
97 | link.css += css
|
98 |
|
99 | parseCssForImports(link, css)
|
100 |
|
101 | status.parsed++
|
102 | handleResolve()
|
103 | }
|
104 |
|
105 |
|
106 | function parseCssForImports(link, css) {
|
107 | link.imports = resolveCssImportUrls(link.url, css)
|
108 | status.total += link.imports.length
|
109 | result.css += css
|
110 |
|
111 | link.imports.forEach(function (importUrl) {
|
112 | var importLink = createLink(importUrl, importUrl)
|
113 | result.links.push(importLink)
|
114 |
|
115 | getLinkContents(importLink.url, options)
|
116 | .then(function (css) {
|
117 | handleCssFromLink(importLink, css)
|
118 | })
|
119 | .catch(function (error) {
|
120 | link.error = error
|
121 | status.parsed++
|
122 | handleResolve()
|
123 | })
|
124 | })
|
125 | }
|
126 |
|
127 | function handleBody(body) {
|
128 | if (isCss(url)) {
|
129 | var link = createLink(url, url)
|
130 | result.links.push(link)
|
131 | handleCssFromLink(link, body)
|
132 | } else {
|
133 | parseHtml(body)
|
134 | }
|
135 | }
|
136 |
|
137 | if (html) {
|
138 | handleBody(html)
|
139 | } else {
|
140 | var controller = new AbortController()
|
141 |
|
142 | var options = Object.assign({}, options)
|
143 | options.headers = options.headers || {}
|
144 | options.headers['User-Agent'] = options.headers['User-Agent'] || ua
|
145 | options.signal = controller.signal
|
146 |
|
147 | var timeoutTimer = setTimeout(() => {
|
148 | controller.abort()
|
149 | }, options.timeout)
|
150 | fetch(url, options)
|
151 | .then((response) => {
|
152 | if (response && response.status != 200) {
|
153 | if (options.verbose) {
|
154 | console.log('Received a ' + response.status + ' from: ' + url)
|
155 | }
|
156 | deferred.reject({ url: url, status: response.status })
|
157 | return
|
158 | }
|
159 |
|
160 | return response.text()
|
161 | })
|
162 | .then((body) => {
|
163 | handleBody(body)
|
164 | })
|
165 | .catch((error) => {
|
166 | if (options.verbose) {
|
167 | console.log('Error from ' + url + ' ' + error)
|
168 | }
|
169 | deferred.reject(error)
|
170 | })
|
171 | .finally(() => {
|
172 | clearTimeout(timeoutTimer)
|
173 | })
|
174 | }
|
175 |
|
176 | return deferred.promise
|
177 | }
|