1 | 'use strict';
|
2 |
|
3 | var EventEmitter = require('events').EventEmitter,
|
4 | emits = require('emits'),
|
5 | html = require('htmlparser2'),
|
6 | domutils = require('domutils'),
|
7 | util = require('util'),
|
8 | uuid = require('uuid'),
|
9 | async = require('async'),
|
10 | url = require('url'),
|
11 | request = require('request-promise'),
|
12 | probeImageSize = require('probe-image-size'),
|
13 | _ = require('lodash'),
|
14 | sizeOf = require('image-size'),
|
15 | validator = require('validator'),
|
16 | helpers = require('./helpers'),
|
17 | DEFAULTS = {
|
18 | 'amp-img': {
|
19 | layout: 'responsive',
|
20 | width: 600,
|
21 | height: 400
|
22 | },
|
23 | 'amp-anim': {
|
24 | layout: 'responsive',
|
25 | width: 600,
|
26 | height: 400
|
27 | },
|
28 | 'amp-iframe': {
|
29 | layout: 'responsive',
|
30 | width: 600,
|
31 | height: 400,
|
32 | sandbox: 'allow-scripts allow-same-origin'
|
33 | },
|
34 | 'amp-youtube': {
|
35 | layout: 'responsive',
|
36 | width: 600,
|
37 | height: 400
|
38 | },
|
39 | 'request_timeout': 3000
|
40 | };
|
41 |
|
42 |
|
43 | const FETCH_ONLY_FORMATS = [
|
44 | 'cur', 'icns', 'ico', 'dds'
|
45 | ];
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 | function Amperize(options) {
|
57 | this.config = _.merge({}, DEFAULTS, options || {});
|
58 | this.emits = emits;
|
59 |
|
60 | this.htmlParser = new html.Parser(
|
61 | new html.DomHandler(this.emits('read'))
|
62 | );
|
63 | }
|
64 |
|
65 | util.inherits(Amperize, EventEmitter);
|
66 |
|
67 |
|
68 |
|
69 |
|
70 |
|
71 |
|
72 |
|
73 |
|
74 |
|
75 |
|
76 | Amperize.prototype.parse = function parse(content, callback) {
|
77 | var id;
|
78 |
|
79 | if (typeof callback !== 'function') {
|
80 | throw new Error('No callback provided');
|
81 | }
|
82 |
|
83 | id = uuid.v4();
|
84 |
|
85 | this.once('read', this.amperizer.bind(this, id));
|
86 | this.once('parsed: ' + id, callback);
|
87 |
|
88 | this.htmlParser.parseComplete(content);
|
89 | };
|
90 |
|
91 |
|
92 |
|
93 |
|
94 |
|
95 |
|
96 |
|
97 |
|
98 |
|
99 |
|
100 |
|
101 | Amperize.prototype.amperizer = function amperizer(id, error, dom) {
|
102 | if (error) {
|
103 | throw new Error('Amperizer failed to parse DOM', error);
|
104 | }
|
105 |
|
106 | this.traverse(dom, '', this.emits('parsed: ' + id));
|
107 | };
|
108 |
|
109 |
|
110 |
|
111 |
|
112 |
|
113 |
|
114 |
|
115 |
|
116 |
|
117 |
|
118 |
|
119 | Amperize.prototype.traverse = async function traverse(data, html, done) {
|
120 | var self = this;
|
121 | var imageSizeCache = {};
|
122 |
|
123 | var requestOptions = {
|
124 |
|
125 | headers: {
|
126 | 'User-Agent': 'Mozilla/5.0 Safari/537.36'
|
127 | },
|
128 | timeout: self.config['request_timeout'],
|
129 | encoding: null
|
130 | };
|
131 |
|
132 |
|
133 |
|
134 |
|
135 | function setLayoutAttribute(element) {
|
136 | var layout = element.attribs.width < 300 ? layout = 'fixed' : self.config[element.name].layout;
|
137 | element.attribs.layout = !element.attribs.layout ? layout : element.attribs.layout;
|
138 | }
|
139 |
|
140 |
|
141 |
|
142 |
|
143 | function useSecureSchema(element) {
|
144 | if (element.attribs && element.attribs.src) {
|
145 | if (element.attribs.src.indexOf('https://') === -1) {
|
146 | if (element.attribs.src.indexOf('http://') === 0) {
|
147 |
|
148 | element.attribs.src = element.attribs.src.replace(/^http:\/\//i, 'https://');
|
149 | } else if (element.attribs.src.indexOf('//') === 0) {
|
150 |
|
151 |
|
152 | element.attribs.src = 'https:' + element.attribs.src;
|
153 | }
|
154 | }
|
155 | }
|
156 | }
|
157 |
|
158 |
|
159 |
|
160 | function _probeImageSize(url) {
|
161 | return probeImageSize(
|
162 | url,
|
163 | requestOptions
|
164 | ).then(function (result) {
|
165 | imageSizeCache[url] = result;
|
166 | return result;
|
167 | });
|
168 | }
|
169 |
|
170 |
|
171 |
|
172 | function _fetchImageSize(url) {
|
173 | return request(
|
174 | url,
|
175 | requestOptions
|
176 | ).then(function (response) {
|
177 | var result = sizeOf(response);
|
178 | imageSizeCache[url] = result;
|
179 | return result;
|
180 | });
|
181 | }
|
182 |
|
183 |
|
184 | function _getImageSize(url) {
|
185 |
|
186 | if (imageSizeCache[url]) {
|
187 | return Promise.resolve(imageSizeCache[url]);
|
188 | }
|
189 |
|
190 |
|
191 | const extensionMatch = url.match(/(?:\.)([a-zA-Z]{3,4})(\?|$)/) || [];
|
192 | const extension = (extensionMatch[1] || '').toLowerCase();
|
193 | if (FETCH_ONLY_FORMATS.includes(extension)) {
|
194 | return _fetchImageSize(url);
|
195 | }
|
196 |
|
197 |
|
198 | return _probeImageSize(url);
|
199 | }
|
200 |
|
201 |
|
202 |
|
203 | function amperizeImageElem(element) {
|
204 | return async function() {
|
205 | if (!element.attribs || !element.attribs.src) {
|
206 | return;
|
207 | }
|
208 |
|
209 | var src = url.parse(element.attribs.src).href;
|
210 |
|
211 |
|
212 | element.name = src.match(/(\.gif$)/) ? 'amp-anim' : 'amp-img';
|
213 |
|
214 | if (src.indexOf('http') === 0) {
|
215 |
|
216 | try {
|
217 | if (!validator.isURL(src)) {
|
218 | element.name = 'img';
|
219 | return;
|
220 | }
|
221 |
|
222 | var dimensions = await _getImageSize(src);
|
223 |
|
224 |
|
225 |
|
226 | if (dimensions.images) {
|
227 | dimensions.width = _.maxBy(dimensions.images, function (w) {return w.width;}).width;
|
228 | dimensions.height = _.maxBy(dimensions.images, function (h) {return h.height;}).height;
|
229 | }
|
230 |
|
231 | if (!dimensions.width || !dimensions.height) {
|
232 | element.name = 'img';
|
233 | return;
|
234 | }
|
235 |
|
236 | element.attribs.width = dimensions.width;
|
237 | element.attribs.height = dimensions.height;
|
238 |
|
239 | } catch (err) {
|
240 | element.name = 'img';
|
241 | return;
|
242 | }
|
243 | } else {
|
244 |
|
245 | element.attribs.width = self.config[element.name].width;
|
246 | element.attribs.height = self.config[element.name].height;
|
247 | }
|
248 |
|
249 | if (!element.attribs.layout) {
|
250 | setLayoutAttribute(element);
|
251 | }
|
252 | }
|
253 | }
|
254 |
|
255 |
|
256 |
|
257 |
|
258 |
|
259 | if (self.config['amp-img']) {
|
260 | var imgTest = function(elem) {
|
261 | return elem.name === 'img' && elem.attribs.src;
|
262 | }
|
263 | var imgElems = domutils.findAll(elem => imgTest(elem), data);
|
264 | var imgTasks = imgElems.map(elem => amperizeImageElem(elem));
|
265 | await async.parallelLimit(imgTasks, 10);
|
266 | }
|
267 |
|
268 |
|
269 | async.reduce(data, html, function reduce(html, element, step) {
|
270 | var children;
|
271 |
|
272 | if (/(style|script|textarea|link)/.test(element.name)) {
|
273 | return step(null, html);
|
274 | }
|
275 |
|
276 | function close(error, html) {
|
277 | html += helpers.close(element);
|
278 | step(null, html);
|
279 | }
|
280 |
|
281 | function enter() {
|
282 | children = element.children;
|
283 | html += helpers[element.type](element);
|
284 |
|
285 | if (!children || !children.length) {
|
286 | return close(null, html);
|
287 | }
|
288 |
|
289 | setImmediate(function delay() {
|
290 | traverse.call(self, children, html, close);
|
291 | });
|
292 | }
|
293 |
|
294 | if (element.name === 'iframe') {
|
295 | if (!element.attribs.src) {
|
296 | return enter();
|
297 | }
|
298 |
|
299 | var youtubeId = element.attribs.src.match(/^.*(youtu.be\/|youtube(-nocookie)?.com\/(v\/|.*u\/\w\/|embed\/|.*v=))([\w-]{11}).*/)
|
300 | useSecureSchema(element);
|
301 |
|
302 | if (youtubeId) {
|
303 | element.name = 'amp-youtube';
|
304 | element.attribs['data-videoid'] = youtubeId[4];
|
305 | delete element.attribs.src;
|
306 | delete element.attribs.sandbox;
|
307 | delete element.attribs.allowfullscreen;
|
308 | delete element.attribs.allow;
|
309 | delete element.attribs.frameborder;
|
310 | } else {
|
311 | element.name = 'amp-iframe';
|
312 | element.attribs.sandbox = !element.attribs.sandbox ? self.config['amp-iframe'].sandbox : element.attribs.sandbox;
|
313 | }
|
314 |
|
315 | if (!element.attribs.width || !element.attribs.height || !element.attribs.layout) {
|
316 | element.attribs.width = !element.attribs.width ? self.config['amp-iframe'].width : element.attribs.width;
|
317 | element.attribs.height = !element.attribs.height ? self.config['amp-iframe'].height : element.attribs.height;
|
318 | setLayoutAttribute(element);
|
319 | }
|
320 | }
|
321 |
|
322 | if (element.name === 'audio') {
|
323 | element.name = 'amp-audio';
|
324 | useSecureSchema(element);
|
325 | }
|
326 |
|
327 | if (element.attribs && element.attribs.src && element.parent && element.parent.name === 'amp-audio') {
|
328 | useSecureSchema(element);
|
329 | }
|
330 |
|
331 | return enter();
|
332 | }, done);
|
333 | };
|
334 |
|
335 | module.exports = Amperize;
|