1 | "use strict";
|
2 | var __importDefault = (this && this.__importDefault) || function (mod) {
|
3 | return (mod && mod.__esModule) ? mod : { "default": mod };
|
4 | };
|
5 | Object.defineProperty(exports, "__esModule", { value: true });
|
6 | exports.ObjectStreamToJSON = exports.parseSitemap = exports.XMLToSitemapItemStream = void 0;
|
7 | const sax_1 = __importDefault(require("sax"));
|
8 | const stream_1 = require("stream");
|
9 | const types_1 = require("./types");
|
10 | function isValidTagName(tagName) {
|
11 |
|
12 | return tagName in types_1.TagNames;
|
13 | }
|
14 | function tagTemplate() {
|
15 | return {
|
16 | img: [],
|
17 | video: [],
|
18 | links: [],
|
19 | url: '',
|
20 | };
|
21 | }
|
22 | function videoTemplate() {
|
23 | return {
|
24 | tag: [],
|
25 | thumbnail_loc: '',
|
26 | title: '',
|
27 | description: '',
|
28 | };
|
29 | }
|
30 | const imageTemplate = {
|
31 | url: '',
|
32 | };
|
33 | const linkTemplate = {
|
34 | lang: '',
|
35 | url: '',
|
36 | };
|
37 | function newsTemplate() {
|
38 | return {
|
39 | publication: { name: '', language: '' },
|
40 | publication_date: '',
|
41 | title: '',
|
42 | };
|
43 | }
|
44 | const defaultLogger = (level, ...message) => console[level](...message);
|
45 | const defaultStreamOpts = {
|
46 | logger: defaultLogger,
|
47 | };
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 | class XMLToSitemapItemStream extends stream_1.Transform {
|
54 | constructor(opts = defaultStreamOpts) {
|
55 | var _a;
|
56 | opts.objectMode = true;
|
57 | super(opts);
|
58 | this.saxStream = sax_1.default.createStream(true, {
|
59 | xmlns: true,
|
60 |
|
61 |
|
62 | strictEntities: true,
|
63 | trim: true,
|
64 | });
|
65 | this.level = opts.level || types_1.ErrorLevel.WARN;
|
66 | if (this.level !== types_1.ErrorLevel.SILENT && opts.logger !== false) {
|
67 | this.logger = (_a = opts.logger) !== null && _a !== void 0 ? _a : defaultLogger;
|
68 | }
|
69 | else {
|
70 | this.logger = () => undefined;
|
71 | }
|
72 | let currentItem = tagTemplate();
|
73 | let currentTag;
|
74 | let currentVideo = videoTemplate();
|
75 | let currentImage = { ...imageTemplate };
|
76 | let currentLink = { ...linkTemplate };
|
77 | let dontpushCurrentLink = false;
|
78 | this.saxStream.on('opentagstart', (tag) => {
|
79 | currentTag = tag.name;
|
80 | if (currentTag.startsWith('news:') && !currentItem.news) {
|
81 | currentItem.news = newsTemplate();
|
82 | }
|
83 | });
|
84 | this.saxStream.on('opentag', (tag) => {
|
85 | if (isValidTagName(tag.name)) {
|
86 | if (tag.name === 'xhtml:link') {
|
87 | if (typeof tag.attributes.rel === 'string' ||
|
88 | typeof tag.attributes.href === 'string') {
|
89 | return;
|
90 | }
|
91 | if (tag.attributes.rel.value === 'alternate' &&
|
92 | tag.attributes.hreflang) {
|
93 | currentLink.url = tag.attributes.href.value;
|
94 | if (typeof tag.attributes.hreflang === 'string')
|
95 | return;
|
96 | currentLink.lang = tag.attributes.hreflang.value;
|
97 | }
|
98 | else if (tag.attributes.rel.value === 'alternate') {
|
99 | dontpushCurrentLink = true;
|
100 | currentItem.androidLink = tag.attributes.href.value;
|
101 | }
|
102 | else if (tag.attributes.rel.value === 'amphtml') {
|
103 | dontpushCurrentLink = true;
|
104 | currentItem.ampLink = tag.attributes.href.value;
|
105 | }
|
106 | else {
|
107 | this.logger('log', 'unhandled attr for xhtml:link', tag.attributes);
|
108 | }
|
109 | }
|
110 | }
|
111 | else {
|
112 | this.logger('warn', 'unhandled tag', tag.name);
|
113 | }
|
114 | });
|
115 | this.saxStream.on('text', (text) => {
|
116 | switch (currentTag) {
|
117 | case 'mobile:mobile':
|
118 | break;
|
119 | case types_1.TagNames.loc:
|
120 | currentItem.url = text;
|
121 | break;
|
122 | case types_1.TagNames.changefreq:
|
123 | if (types_1.isValidChangeFreq(text)) {
|
124 | currentItem.changefreq = text;
|
125 | }
|
126 | break;
|
127 | case types_1.TagNames.priority:
|
128 | currentItem.priority = parseFloat(text);
|
129 | break;
|
130 | case types_1.TagNames.lastmod:
|
131 | currentItem.lastmod = text;
|
132 | break;
|
133 | case types_1.TagNames['video:thumbnail_loc']:
|
134 | currentVideo.thumbnail_loc = text;
|
135 | break;
|
136 | case types_1.TagNames['video:tag']:
|
137 | currentVideo.tag.push(text);
|
138 | break;
|
139 | case types_1.TagNames['video:duration']:
|
140 | currentVideo.duration = parseInt(text, 10);
|
141 | break;
|
142 | case types_1.TagNames['video:player_loc']:
|
143 | currentVideo.player_loc = text;
|
144 | break;
|
145 | case types_1.TagNames['video:content_loc']:
|
146 | currentVideo.content_loc = text;
|
147 | break;
|
148 | case types_1.TagNames['video:requires_subscription']:
|
149 | if (types_1.isValidYesNo(text)) {
|
150 | currentVideo.requires_subscription = text;
|
151 | }
|
152 | break;
|
153 | case types_1.TagNames['video:publication_date']:
|
154 | currentVideo.publication_date = text;
|
155 | break;
|
156 | case types_1.TagNames['video:id']:
|
157 | currentVideo.id = text;
|
158 | break;
|
159 | case types_1.TagNames['video:restriction']:
|
160 | currentVideo.restriction = text;
|
161 | break;
|
162 | case types_1.TagNames['video:view_count']:
|
163 | currentVideo.view_count = parseInt(text, 10);
|
164 | break;
|
165 | case types_1.TagNames['video:uploader']:
|
166 | currentVideo.uploader = text;
|
167 | break;
|
168 | case types_1.TagNames['video:family_friendly']:
|
169 | if (types_1.isValidYesNo(text)) {
|
170 | currentVideo.family_friendly = text;
|
171 | }
|
172 | break;
|
173 | case types_1.TagNames['video:expiration_date']:
|
174 | currentVideo.expiration_date = text;
|
175 | break;
|
176 | case types_1.TagNames['video:platform']:
|
177 | currentVideo.platform = text;
|
178 | break;
|
179 | case types_1.TagNames['video:price']:
|
180 | currentVideo.price = text;
|
181 | break;
|
182 | case types_1.TagNames['video:rating']:
|
183 | currentVideo.rating = parseFloat(text);
|
184 | break;
|
185 | case types_1.TagNames['video:category']:
|
186 | currentVideo.category = text;
|
187 | break;
|
188 | case types_1.TagNames['video:live']:
|
189 | if (types_1.isValidYesNo(text)) {
|
190 | currentVideo.live = text;
|
191 | }
|
192 | break;
|
193 | case types_1.TagNames['video:gallery_loc']:
|
194 | currentVideo.gallery_loc = text;
|
195 | break;
|
196 | case types_1.TagNames['image:loc']:
|
197 | currentImage.url = text;
|
198 | break;
|
199 | case types_1.TagNames['image:geo_location']:
|
200 | currentImage.geoLocation = text;
|
201 | break;
|
202 | case types_1.TagNames['image:license']:
|
203 | currentImage.license = text;
|
204 | break;
|
205 | case types_1.TagNames['news:access']:
|
206 | if (!currentItem.news) {
|
207 | currentItem.news = newsTemplate();
|
208 | }
|
209 | currentItem.news.access = text;
|
210 | break;
|
211 | case types_1.TagNames['news:genres']:
|
212 | if (!currentItem.news) {
|
213 | currentItem.news = newsTemplate();
|
214 | }
|
215 | currentItem.news.genres = text;
|
216 | break;
|
217 | case types_1.TagNames['news:publication_date']:
|
218 | if (!currentItem.news) {
|
219 | currentItem.news = newsTemplate();
|
220 | }
|
221 | currentItem.news.publication_date = text;
|
222 | break;
|
223 | case types_1.TagNames['news:keywords']:
|
224 | if (!currentItem.news) {
|
225 | currentItem.news = newsTemplate();
|
226 | }
|
227 | currentItem.news.keywords = text;
|
228 | break;
|
229 | case types_1.TagNames['news:stock_tickers']:
|
230 | if (!currentItem.news) {
|
231 | currentItem.news = newsTemplate();
|
232 | }
|
233 | currentItem.news.stock_tickers = text;
|
234 | break;
|
235 | case types_1.TagNames['news:language']:
|
236 | if (!currentItem.news) {
|
237 | currentItem.news = newsTemplate();
|
238 | }
|
239 | currentItem.news.publication.language = text;
|
240 | break;
|
241 | case types_1.TagNames['video:title']:
|
242 | currentVideo.title += text;
|
243 | break;
|
244 | case types_1.TagNames['video:description']:
|
245 | currentVideo.description += text;
|
246 | break;
|
247 | case types_1.TagNames['news:name']:
|
248 | if (!currentItem.news) {
|
249 | currentItem.news = newsTemplate();
|
250 | }
|
251 | currentItem.news.publication.name += text;
|
252 | break;
|
253 | case types_1.TagNames['news:title']:
|
254 | if (!currentItem.news) {
|
255 | currentItem.news = newsTemplate();
|
256 | }
|
257 | currentItem.news.title += text;
|
258 | break;
|
259 | case types_1.TagNames['image:caption']:
|
260 | if (!currentImage.caption) {
|
261 | currentImage.caption = text;
|
262 | }
|
263 | else {
|
264 | currentImage.caption += text;
|
265 | }
|
266 | break;
|
267 | case types_1.TagNames['image:title']:
|
268 | if (!currentImage.title) {
|
269 | currentImage.title = text;
|
270 | }
|
271 | else {
|
272 | currentImage.title += text;
|
273 | }
|
274 | break;
|
275 | default:
|
276 | this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
|
277 | break;
|
278 | }
|
279 | });
|
280 | this.saxStream.on('cdata', (text) => {
|
281 | switch (currentTag) {
|
282 | case types_1.TagNames['video:title']:
|
283 | currentVideo.title += text;
|
284 | break;
|
285 | case types_1.TagNames['video:description']:
|
286 | currentVideo.description += text;
|
287 | break;
|
288 | case types_1.TagNames['news:name']:
|
289 | if (!currentItem.news) {
|
290 | currentItem.news = newsTemplate();
|
291 | }
|
292 | currentItem.news.publication.name += text;
|
293 | break;
|
294 | case types_1.TagNames['news:title']:
|
295 | if (!currentItem.news) {
|
296 | currentItem.news = newsTemplate();
|
297 | }
|
298 | currentItem.news.title += text;
|
299 | break;
|
300 | case types_1.TagNames['image:caption']:
|
301 | if (!currentImage.caption) {
|
302 | currentImage.caption = text;
|
303 | }
|
304 | else {
|
305 | currentImage.caption += text;
|
306 | }
|
307 | break;
|
308 | case types_1.TagNames['image:title']:
|
309 | if (!currentImage.title) {
|
310 | currentImage.title = text;
|
311 | }
|
312 | else {
|
313 | currentImage.title += text;
|
314 | }
|
315 | break;
|
316 | default:
|
317 | this.logger('log', 'unhandled cdata for tag:', currentTag);
|
318 | break;
|
319 | }
|
320 | });
|
321 | this.saxStream.on('attribute', (attr) => {
|
322 | switch (currentTag) {
|
323 | case types_1.TagNames['urlset']:
|
324 | case types_1.TagNames['xhtml:link']:
|
325 | case types_1.TagNames['video:id']:
|
326 | break;
|
327 | case types_1.TagNames['video:restriction']:
|
328 | if (attr.name === 'relationship' && types_1.isAllowDeny(attr.value)) {
|
329 | currentVideo['restriction:relationship'] = attr.value;
|
330 | }
|
331 | else {
|
332 | this.logger('log', 'unhandled attr', currentTag, attr.name);
|
333 | }
|
334 | break;
|
335 | case types_1.TagNames['video:price']:
|
336 | if (attr.name === 'type' && types_1.isPriceType(attr.value)) {
|
337 | currentVideo['price:type'] = attr.value;
|
338 | }
|
339 | else if (attr.name === 'currency') {
|
340 | currentVideo['price:currency'] = attr.value;
|
341 | }
|
342 | else if (attr.name === 'resolution' && types_1.isResolution(attr.value)) {
|
343 | currentVideo['price:resolution'] = attr.value;
|
344 | }
|
345 | else {
|
346 | this.logger('log', 'unhandled attr for video:price', attr.name);
|
347 | }
|
348 | break;
|
349 | case types_1.TagNames['video:player_loc']:
|
350 | if (attr.name === 'autoplay') {
|
351 | currentVideo['player_loc:autoplay'] = attr.value;
|
352 | }
|
353 | else if (attr.name === 'allow_embed' && types_1.isValidYesNo(attr.value)) {
|
354 | currentVideo['player_loc:allow_embed'] = attr.value;
|
355 | }
|
356 | else {
|
357 | this.logger('log', 'unhandled attr for video:player_loc', attr.name);
|
358 | }
|
359 | break;
|
360 | case types_1.TagNames['video:platform']:
|
361 | if (attr.name === 'relationship' && types_1.isAllowDeny(attr.value)) {
|
362 | currentVideo['platform:relationship'] = attr.value;
|
363 | }
|
364 | else {
|
365 | this.logger('log', 'unhandled attr for video:platform', attr.name, attr.value);
|
366 | }
|
367 | break;
|
368 | case types_1.TagNames['video:gallery_loc']:
|
369 | if (attr.name === 'title') {
|
370 | currentVideo['gallery_loc:title'] = attr.value;
|
371 | }
|
372 | else {
|
373 | this.logger('log', 'unhandled attr for video:galler_loc', attr.name);
|
374 | }
|
375 | break;
|
376 | case types_1.TagNames['video:uploader']:
|
377 | if (attr.name === 'info') {
|
378 | currentVideo['uploader:info'] = attr.value;
|
379 | }
|
380 | else {
|
381 | this.logger('log', 'unhandled attr for video:uploader', attr.name);
|
382 | }
|
383 | break;
|
384 | default:
|
385 | this.logger('log', 'unhandled attr', currentTag, attr.name);
|
386 | }
|
387 | });
|
388 | this.saxStream.on('closetag', (tag) => {
|
389 | switch (tag) {
|
390 | case types_1.TagNames.url:
|
391 | this.push(currentItem);
|
392 | currentItem = tagTemplate();
|
393 | break;
|
394 | case types_1.TagNames['video:video']:
|
395 | currentItem.video.push(currentVideo);
|
396 | currentVideo = videoTemplate();
|
397 | break;
|
398 | case types_1.TagNames['image:image']:
|
399 | currentItem.img.push(currentImage);
|
400 | currentImage = { ...imageTemplate };
|
401 | break;
|
402 | case types_1.TagNames['xhtml:link']:
|
403 | if (!dontpushCurrentLink) {
|
404 | currentItem.links.push(currentLink);
|
405 | }
|
406 | currentLink = { ...linkTemplate };
|
407 | break;
|
408 | default:
|
409 | break;
|
410 | }
|
411 | });
|
412 | }
|
413 | _transform(data, encoding, callback) {
|
414 |
|
415 |
|
416 |
|
417 |
|
418 | this.saxStream.write(data, encoding);
|
419 | callback();
|
420 | }
|
421 | }
|
422 | exports.XMLToSitemapItemStream = XMLToSitemapItemStream;
|
423 |
|
424 |
|
425 |
|
426 |
|
427 |
|
428 |
|
429 |
|
430 |
|
431 |
|
432 |
|
433 |
|
434 |
|
435 |
|
436 |
|
437 |
|
438 |
|
439 | async function parseSitemap(xml) {
|
440 | const urls = [];
|
441 | return new Promise((resolve, reject) => {
|
442 | xml
|
443 | .pipe(new XMLToSitemapItemStream())
|
444 | .on('data', (smi) => urls.push(smi))
|
445 | .on('end', () => {
|
446 | resolve(urls);
|
447 | })
|
448 | .on('error', (error) => {
|
449 | reject(error);
|
450 | });
|
451 | });
|
452 | }
|
453 | exports.parseSitemap = parseSitemap;
|
454 | const defaultObjectStreamOpts = {
|
455 | lineSeparated: false,
|
456 | };
|
457 |
|
458 |
|
459 |
|
460 |
|
461 |
|
462 | class ObjectStreamToJSON extends stream_1.Transform {
|
463 | constructor(opts = defaultObjectStreamOpts) {
|
464 | opts.writableObjectMode = true;
|
465 | super(opts);
|
466 | this.lineSeparated = opts.lineSeparated;
|
467 | this.firstWritten = false;
|
468 | }
|
469 | _transform(chunk, encoding, cb) {
|
470 | if (!this.firstWritten) {
|
471 | this.firstWritten = true;
|
472 | if (!this.lineSeparated) {
|
473 | this.push('[');
|
474 | }
|
475 | }
|
476 | else if (this.lineSeparated) {
|
477 | this.push('\n');
|
478 | }
|
479 | else {
|
480 | this.push(',');
|
481 | }
|
482 | if (chunk) {
|
483 | this.push(JSON.stringify(chunk));
|
484 | }
|
485 | cb();
|
486 | }
|
487 | _flush(cb) {
|
488 | if (!this.lineSeparated) {
|
489 | this.push(']');
|
490 | }
|
491 | cb();
|
492 | }
|
493 | }
|
494 | exports.ObjectStreamToJSON = ObjectStreamToJSON;
|