UNPKG

4.47 kBJavaScriptView Raw
1var q = require('q')
2var isCss = require('is-css')
3var isPresent = require('is-present')
4var isBlank = require('is-blank')
5var isUrl = require('is-url-superb')
6var fetch = require('node-fetch')
7var AbortController = require('abort-controller')
8var cheerio = require('cheerio')
9var normalizeUrl = require('normalize-url')
10var stripHtmlComments = require('strip-html-comments')
11var stripWaybackToolbar = require('strip-wayback-toolbar')
12var resolveCssImportUrls = require('resolve-css-import-urls')
13var ua = require('ua-string')
14
15var getLinkContents = require('./utils/get-link-contents')
16var createLink = require('./utils/create-link')
17
18module.exports = function (url, options, html) {
19 var deferred = q.defer()
20 options = options || {}
21 options.timeout = options.timeout || 5000
22
23 if (typeof url !== 'string' || isBlank(url) || !isUrl(url)) {
24 throw new TypeError('get-css expected a url as a string')
25 }
26
27 url = normalizeUrl(url, { stripWWW: false })
28
29 if (options.ignoreCerts) {
30 process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'
31 }
32
33 var status = {
34 parsed: 0,
35 total: 0,
36 }
37
38 var result = {
39 links: [],
40 styles: [],
41 css: '',
42 }
43
44 function handleResolve() {
45 if (status.parsed >= status.total) {
46 deferred.resolve(result)
47 }
48 }
49
50 function parseHtml(html) {
51 var stripWayback = options.stripWayback || false
52 if (stripWayback) {
53 html = stripWaybackToolbar(html)
54 }
55 var $ = cheerio.load(html)
56 result.pageTitle = $('head > title').text()
57 result.html = html
58
59 $('[rel=stylesheet]').each(function () {
60 var link = $(this).attr('href')
61 if (isPresent(link)) {
62 result.links.push(createLink(link, url))
63 } else {
64 result.styles.push(stripHtmlComments($(this).html()))
65 }
66 })
67
68 $('style').each(function () {
69 result.styles.push(stripHtmlComments($(this).html()))
70 })
71
72 status.total = result.links.length + result.styles.length
73 if (!status.total) {
74 handleResolve()
75 }
76
77 result.links.forEach(function (link) {
78 getLinkContents(link.url, options)
79 .then(function (css) {
80 handleCssFromLink(link, css)
81 })
82 .catch(function (error) {
83 link.error = error
84 status.parsed++
85 handleResolve()
86 })
87 })
88
89 result.styles.forEach(function (css) {
90 result.css += css
91 status.parsed++
92 handleResolve()
93 })
94 }
95
96 function handleCssFromLink(link, css) {
97 link.css += css
98
99 parseCssForImports(link, css)
100
101 status.parsed++
102 handleResolve()
103 }
104
105 // Handle potential @import url(foo.css) statements in the CSS.
106 function parseCssForImports(link, css) {
107 link.imports = resolveCssImportUrls(link.url, css)
108 status.total += link.imports.length
109 result.css += css
110
111 link.imports.forEach(function (importUrl) {
112 var importLink = createLink(importUrl, importUrl)
113 result.links.push(importLink)
114
115 getLinkContents(importLink.url, options)
116 .then(function (css) {
117 handleCssFromLink(importLink, css)
118 })
119 .catch(function (error) {
120 link.error = error
121 status.parsed++
122 handleResolve()
123 })
124 })
125 }
126
127 function handleBody(body) {
128 if (isCss(url)) {
129 var link = createLink(url, url)
130 result.links.push(link)
131 handleCssFromLink(link, body)
132 } else {
133 parseHtml(body)
134 }
135 }
136
137 if (html) {
138 handleBody(html)
139 } else {
140 var controller = new AbortController()
141
142 var options = Object.assign({}, options)
143 options.headers = options.headers || {}
144 options.headers['User-Agent'] = options.headers['User-Agent'] || ua
145 options.signal = controller.signal
146
147 var timeoutTimer = setTimeout(() => {
148 controller.abort()
149 }, options.timeout)
150 fetch(url, options)
151 .then((response) => {
152 if (response && response.status != 200) {
153 if (options.verbose) {
154 console.log('Received a ' + response.status + ' from: ' + url)
155 }
156 deferred.reject({ url: url, status: response.status })
157 return
158 }
159
160 return response.text()
161 })
162 .then((body) => {
163 handleBody(body)
164 })
165 .catch((error) => {
166 if (options.verbose) {
167 console.log('Error from ' + url + ' ' + error)
168 }
169 deferred.reject(error)
170 })
171 .finally(() => {
172 clearTimeout(timeoutTimer)
173 })
174 }
175
176 return deferred.promise
177}