UNPKG

19.7 kBJavaScriptView Raw
1"use strict";
2var __importDefault = (this && this.__importDefault) || function (mod) {
3 return (mod && mod.__esModule) ? mod : { "default": mod };
4};
5Object.defineProperty(exports, "__esModule", { value: true });
6exports.ObjectStreamToJSON = exports.parseSitemap = exports.XMLToSitemapItemStream = void 0;
7const sax_1 = __importDefault(require("sax"));
8const stream_1 = require("stream");
9const types_1 = require("./types");
10function isValidTagName(tagName) {
11 // This only works because the enum name and value are the same
12 return tagName in types_1.TagNames;
13}
14function tagTemplate() {
15 return {
16 img: [],
17 video: [],
18 links: [],
19 url: '',
20 };
21}
22function videoTemplate() {
23 return {
24 tag: [],
25 thumbnail_loc: '',
26 title: '',
27 description: '',
28 };
29}
30const imageTemplate = {
31 url: '',
32};
33const linkTemplate = {
34 lang: '',
35 url: '',
36};
37function newsTemplate() {
38 return {
39 publication: { name: '', language: '' },
40 publication_date: '',
41 title: '',
42 };
43}
44const defaultLogger = (level, ...message) => console[level](...message);
45const defaultStreamOpts = {
46 logger: defaultLogger,
47};
48// TODO does this need to end with `options`
49/**
50 * Takes a stream of xml and transforms it into a stream of SitemapItems
51 * Use this to parse existing sitemaps into config options compatible with this library
52 */
53class XMLToSitemapItemStream extends stream_1.Transform {
54 constructor(opts = defaultStreamOpts) {
55 var _a;
56 opts.objectMode = true;
57 super(opts);
58 this.saxStream = sax_1.default.createStream(true, {
59 xmlns: true,
60 // eslint-disable-next-line @typescript-eslint/ban-ts-comment
61 // @ts-ignore
62 strictEntities: true,
63 trim: true,
64 });
65 this.level = opts.level || types_1.ErrorLevel.WARN;
66 if (this.level !== types_1.ErrorLevel.SILENT && opts.logger !== false) {
67 this.logger = (_a = opts.logger) !== null && _a !== void 0 ? _a : defaultLogger;
68 }
69 else {
70 this.logger = () => undefined;
71 }
72 let currentItem = tagTemplate();
73 let currentTag;
74 let currentVideo = videoTemplate();
75 let currentImage = { ...imageTemplate };
76 let currentLink = { ...linkTemplate };
77 let dontpushCurrentLink = false;
78 this.saxStream.on('opentagstart', (tag) => {
79 currentTag = tag.name;
80 if (currentTag.startsWith('news:') && !currentItem.news) {
81 currentItem.news = newsTemplate();
82 }
83 });
84 this.saxStream.on('opentag', (tag) => {
85 if (isValidTagName(tag.name)) {
86 if (tag.name === 'xhtml:link') {
87 if (typeof tag.attributes.rel === 'string' ||
88 typeof tag.attributes.href === 'string') {
89 return;
90 }
91 if (tag.attributes.rel.value === 'alternate' &&
92 tag.attributes.hreflang) {
93 currentLink.url = tag.attributes.href.value;
94 if (typeof tag.attributes.hreflang === 'string')
95 return;
96 currentLink.lang = tag.attributes.hreflang.value;
97 }
98 else if (tag.attributes.rel.value === 'alternate') {
99 dontpushCurrentLink = true;
100 currentItem.androidLink = tag.attributes.href.value;
101 }
102 else if (tag.attributes.rel.value === 'amphtml') {
103 dontpushCurrentLink = true;
104 currentItem.ampLink = tag.attributes.href.value;
105 }
106 else {
107 this.logger('log', 'unhandled attr for xhtml:link', tag.attributes);
108 }
109 }
110 }
111 else {
112 this.logger('warn', 'unhandled tag', tag.name);
113 }
114 });
115 this.saxStream.on('text', (text) => {
116 switch (currentTag) {
117 case 'mobile:mobile':
118 break;
119 case types_1.TagNames.loc:
120 currentItem.url = text;
121 break;
122 case types_1.TagNames.changefreq:
123 if (types_1.isValidChangeFreq(text)) {
124 currentItem.changefreq = text;
125 }
126 break;
127 case types_1.TagNames.priority:
128 currentItem.priority = parseFloat(text);
129 break;
130 case types_1.TagNames.lastmod:
131 currentItem.lastmod = text;
132 break;
133 case types_1.TagNames['video:thumbnail_loc']:
134 currentVideo.thumbnail_loc = text;
135 break;
136 case types_1.TagNames['video:tag']:
137 currentVideo.tag.push(text);
138 break;
139 case types_1.TagNames['video:duration']:
140 currentVideo.duration = parseInt(text, 10);
141 break;
142 case types_1.TagNames['video:player_loc']:
143 currentVideo.player_loc = text;
144 break;
145 case types_1.TagNames['video:content_loc']:
146 currentVideo.content_loc = text;
147 break;
148 case types_1.TagNames['video:requires_subscription']:
149 if (types_1.isValidYesNo(text)) {
150 currentVideo.requires_subscription = text;
151 }
152 break;
153 case types_1.TagNames['video:publication_date']:
154 currentVideo.publication_date = text;
155 break;
156 case types_1.TagNames['video:id']:
157 currentVideo.id = text;
158 break;
159 case types_1.TagNames['video:restriction']:
160 currentVideo.restriction = text;
161 break;
162 case types_1.TagNames['video:view_count']:
163 currentVideo.view_count = parseInt(text, 10);
164 break;
165 case types_1.TagNames['video:uploader']:
166 currentVideo.uploader = text;
167 break;
168 case types_1.TagNames['video:family_friendly']:
169 if (types_1.isValidYesNo(text)) {
170 currentVideo.family_friendly = text;
171 }
172 break;
173 case types_1.TagNames['video:expiration_date']:
174 currentVideo.expiration_date = text;
175 break;
176 case types_1.TagNames['video:platform']:
177 currentVideo.platform = text;
178 break;
179 case types_1.TagNames['video:price']:
180 currentVideo.price = text;
181 break;
182 case types_1.TagNames['video:rating']:
183 currentVideo.rating = parseFloat(text);
184 break;
185 case types_1.TagNames['video:category']:
186 currentVideo.category = text;
187 break;
188 case types_1.TagNames['video:live']:
189 if (types_1.isValidYesNo(text)) {
190 currentVideo.live = text;
191 }
192 break;
193 case types_1.TagNames['video:gallery_loc']:
194 currentVideo.gallery_loc = text;
195 break;
196 case types_1.TagNames['image:loc']:
197 currentImage.url = text;
198 break;
199 case types_1.TagNames['image:geo_location']:
200 currentImage.geoLocation = text;
201 break;
202 case types_1.TagNames['image:license']:
203 currentImage.license = text;
204 break;
205 case types_1.TagNames['news:access']:
206 if (!currentItem.news) {
207 currentItem.news = newsTemplate();
208 }
209 currentItem.news.access = text;
210 break;
211 case types_1.TagNames['news:genres']:
212 if (!currentItem.news) {
213 currentItem.news = newsTemplate();
214 }
215 currentItem.news.genres = text;
216 break;
217 case types_1.TagNames['news:publication_date']:
218 if (!currentItem.news) {
219 currentItem.news = newsTemplate();
220 }
221 currentItem.news.publication_date = text;
222 break;
223 case types_1.TagNames['news:keywords']:
224 if (!currentItem.news) {
225 currentItem.news = newsTemplate();
226 }
227 currentItem.news.keywords = text;
228 break;
229 case types_1.TagNames['news:stock_tickers']:
230 if (!currentItem.news) {
231 currentItem.news = newsTemplate();
232 }
233 currentItem.news.stock_tickers = text;
234 break;
235 case types_1.TagNames['news:language']:
236 if (!currentItem.news) {
237 currentItem.news = newsTemplate();
238 }
239 currentItem.news.publication.language = text;
240 break;
241 case types_1.TagNames['video:title']:
242 currentVideo.title += text;
243 break;
244 case types_1.TagNames['video:description']:
245 currentVideo.description += text;
246 break;
247 case types_1.TagNames['news:name']:
248 if (!currentItem.news) {
249 currentItem.news = newsTemplate();
250 }
251 currentItem.news.publication.name += text;
252 break;
253 case types_1.TagNames['news:title']:
254 if (!currentItem.news) {
255 currentItem.news = newsTemplate();
256 }
257 currentItem.news.title += text;
258 break;
259 case types_1.TagNames['image:caption']:
260 if (!currentImage.caption) {
261 currentImage.caption = text;
262 }
263 else {
264 currentImage.caption += text;
265 }
266 break;
267 case types_1.TagNames['image:title']:
268 if (!currentImage.title) {
269 currentImage.title = text;
270 }
271 else {
272 currentImage.title += text;
273 }
274 break;
275 default:
276 this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
277 break;
278 }
279 });
280 this.saxStream.on('cdata', (text) => {
281 switch (currentTag) {
282 case types_1.TagNames['video:title']:
283 currentVideo.title += text;
284 break;
285 case types_1.TagNames['video:description']:
286 currentVideo.description += text;
287 break;
288 case types_1.TagNames['news:name']:
289 if (!currentItem.news) {
290 currentItem.news = newsTemplate();
291 }
292 currentItem.news.publication.name += text;
293 break;
294 case types_1.TagNames['news:title']:
295 if (!currentItem.news) {
296 currentItem.news = newsTemplate();
297 }
298 currentItem.news.title += text;
299 break;
300 case types_1.TagNames['image:caption']:
301 if (!currentImage.caption) {
302 currentImage.caption = text;
303 }
304 else {
305 currentImage.caption += text;
306 }
307 break;
308 case types_1.TagNames['image:title']:
309 if (!currentImage.title) {
310 currentImage.title = text;
311 }
312 else {
313 currentImage.title += text;
314 }
315 break;
316 default:
317 this.logger('log', 'unhandled cdata for tag:', currentTag);
318 break;
319 }
320 });
321 this.saxStream.on('attribute', (attr) => {
322 switch (currentTag) {
323 case types_1.TagNames['urlset']:
324 case types_1.TagNames['xhtml:link']:
325 case types_1.TagNames['video:id']:
326 break;
327 case types_1.TagNames['video:restriction']:
328 if (attr.name === 'relationship' && types_1.isAllowDeny(attr.value)) {
329 currentVideo['restriction:relationship'] = attr.value;
330 }
331 else {
332 this.logger('log', 'unhandled attr', currentTag, attr.name);
333 }
334 break;
335 case types_1.TagNames['video:price']:
336 if (attr.name === 'type' && types_1.isPriceType(attr.value)) {
337 currentVideo['price:type'] = attr.value;
338 }
339 else if (attr.name === 'currency') {
340 currentVideo['price:currency'] = attr.value;
341 }
342 else if (attr.name === 'resolution' && types_1.isResolution(attr.value)) {
343 currentVideo['price:resolution'] = attr.value;
344 }
345 else {
346 this.logger('log', 'unhandled attr for video:price', attr.name);
347 }
348 break;
349 case types_1.TagNames['video:player_loc']:
350 if (attr.name === 'autoplay') {
351 currentVideo['player_loc:autoplay'] = attr.value;
352 }
353 else if (attr.name === 'allow_embed' && types_1.isValidYesNo(attr.value)) {
354 currentVideo['player_loc:allow_embed'] = attr.value;
355 }
356 else {
357 this.logger('log', 'unhandled attr for video:player_loc', attr.name);
358 }
359 break;
360 case types_1.TagNames['video:platform']:
361 if (attr.name === 'relationship' && types_1.isAllowDeny(attr.value)) {
362 currentVideo['platform:relationship'] = attr.value;
363 }
364 else {
365 this.logger('log', 'unhandled attr for video:platform', attr.name, attr.value);
366 }
367 break;
368 case types_1.TagNames['video:gallery_loc']:
369 if (attr.name === 'title') {
370 currentVideo['gallery_loc:title'] = attr.value;
371 }
372 else {
373 this.logger('log', 'unhandled attr for video:galler_loc', attr.name);
374 }
375 break;
376 case types_1.TagNames['video:uploader']:
377 if (attr.name === 'info') {
378 currentVideo['uploader:info'] = attr.value;
379 }
380 else {
381 this.logger('log', 'unhandled attr for video:uploader', attr.name);
382 }
383 break;
384 default:
385 this.logger('log', 'unhandled attr', currentTag, attr.name);
386 }
387 });
388 this.saxStream.on('closetag', (tag) => {
389 switch (tag) {
390 case types_1.TagNames.url:
391 this.push(currentItem);
392 currentItem = tagTemplate();
393 break;
394 case types_1.TagNames['video:video']:
395 currentItem.video.push(currentVideo);
396 currentVideo = videoTemplate();
397 break;
398 case types_1.TagNames['image:image']:
399 currentItem.img.push(currentImage);
400 currentImage = { ...imageTemplate };
401 break;
402 case types_1.TagNames['xhtml:link']:
403 if (!dontpushCurrentLink) {
404 currentItem.links.push(currentLink);
405 }
406 currentLink = { ...linkTemplate };
407 break;
408 default:
409 break;
410 }
411 });
412 }
413 _transform(data, encoding, callback) {
414 // correcting the type here can be done without making it a breaking change
415 // TODO fix this
416 // eslint-disable-next-line @typescript-eslint/ban-ts-comment
417 // @ts-ignore
418 this.saxStream.write(data, encoding);
419 callback();
420 }
421}
422exports.XMLToSitemapItemStream = XMLToSitemapItemStream;
423/**
424 Read xml and resolve with the configuration that would produce it or reject with
425 an error
426 ```
427 const { createReadStream } = require('fs')
428 const { parseSitemap, createSitemap } = require('sitemap')
429 parseSitemap(createReadStream('./example.xml')).then(
430 // produces the same xml
431 // you can, of course, more practically modify it or store it
432 (xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
433 (err) => console.log(err)
434 )
435 ```
436 @param {Readable} xml what to parse
437 @return {Promise<SitemapItem[]>} resolves with list of sitemap items that can be fed into a SitemapStream. Rejects with an Error object.
438 */
439async function parseSitemap(xml) {
440 const urls = [];
441 return new Promise((resolve, reject) => {
442 xml
443 .pipe(new XMLToSitemapItemStream())
444 .on('data', (smi) => urls.push(smi))
445 .on('end', () => {
446 resolve(urls);
447 })
448 .on('error', (error) => {
449 reject(error);
450 });
451 });
452}
453exports.parseSitemap = parseSitemap;
454const defaultObjectStreamOpts = {
455 lineSeparated: false,
456};
457/**
458 * A Transform that converts a stream of objects into a JSON Array or a line
459 * separated stringified JSON
460 * @param [lineSeparated=false] whether to separate entries by a new line or comma
461 */
462class ObjectStreamToJSON extends stream_1.Transform {
463 constructor(opts = defaultObjectStreamOpts) {
464 opts.writableObjectMode = true;
465 super(opts);
466 this.lineSeparated = opts.lineSeparated;
467 this.firstWritten = false;
468 }
469 _transform(chunk, encoding, cb) {
470 if (!this.firstWritten) {
471 this.firstWritten = true;
472 if (!this.lineSeparated) {
473 this.push('[');
474 }
475 }
476 else if (this.lineSeparated) {
477 this.push('\n');
478 }
479 else {
480 this.push(',');
481 }
482 if (chunk) {
483 this.push(JSON.stringify(chunk));
484 }
485 cb();
486 }
487 _flush(cb) {
488 if (!this.lineSeparated) {
489 this.push(']');
490 }
491 cb();
492 }
493}
494exports.ObjectStreamToJSON = ObjectStreamToJSON;