1 | import { AtomCategory, AtomContent, AtomLink, AtomLinkRelType, AtomPerson, AtomText, AtomTextType } from './AtomCommon';
|
2 | import { AtomEntry, AtomSource } from './AtomEntry';
|
3 | import { AtomFeed } from './AtomFeed';
|
4 |
|
5 | import { sanitize } from 'dompurify';
|
6 |
|
7 |
|
8 | const findByTag = (nodes: Iterable<Element> | ArrayLike<Element> | HTMLCollection, tagName: string) => Array.from(nodes).find(e => e.nodeName === tagName);
|
9 |
|
10 | const filterByTag = (nodes: Iterable<Element> | ArrayLike<Element> | HTMLCollection, tagName: string) => Array.from(nodes).filter(e => e.nodeName === tagName);
|
11 |
|
12 | const findChildTag = (parent: Document | Element, tagName: string) => findByTag(parent.children, tagName);
|
13 |
|
14 | const filterChildTags = (parent: Document | Element, tagName: string) => filterByTag(parent.children, tagName);
|
15 |
|
16 |
|
17 | export function parseAtomFeed(data: string): AtomFeed {
|
18 | const parser = new DOMParser();
|
19 | const xml = parser.parseFromString(data, 'text/xml');
|
20 | const feed = findChildTag(xml, 'feed');
|
21 | if(feed) {
|
22 | return {
|
23 | id: sanitizeTextContent(findChildTag(feed, 'id')) ?? '',
|
24 | title: parseAtomText(findChildTag(feed, 'title')),
|
25 | updated: new Date(findChildTag(feed, 'updated')?.textContent ?? 0),
|
26 | entries: filterChildTags(feed, 'entry').map(e => parseAtomEntry(e)),
|
27 | author: filterChildTags(feed, 'author').map(author => parseAtomPerson(author)),
|
28 | link: filterChildTags(feed, 'link').map(link => parseAtomLink(link)),
|
29 | category: filterChildTags(feed, 'category').map(category => parseAtomCategory(category)),
|
30 | contributor: filterChildTags(feed, 'contributor').map(contributor => parseAtomPerson(contributor)),
|
31 | generator: {
|
32 | value: sanitizeTextContent(findChildTag(feed, 'generator')) ?? '',
|
33 | uri: sanitizeTextAttribute(findChildTag(feed, 'generator'), 'uri'),
|
34 | version: sanitizeTextAttribute(findChildTag(feed, 'generator'), 'version'),
|
35 | },
|
36 | icon: sanitizeTextContent(findChildTag(feed, 'icon')),
|
37 | logo: sanitizeTextContent(findChildTag(feed, 'logo')),
|
38 | rights: parseAtomText(findChildTag(feed, 'rights')),
|
39 | subtitle: sanitizeTextContent(findChildTag(feed, 'subtitle')),
|
40 | };
|
41 | }
|
42 | throw Error('No <feed> tag found.');
|
43 | }
|
44 |
|
45 | export function parseAtomEntry(entry: Element): AtomEntry {
|
46 | return {
|
47 | id: sanitizeTextContent(findChildTag(entry, 'id')) ?? '',
|
48 | title: parseAtomText(findChildTag(entry, 'title')),
|
49 | updated: new Date(findChildTag(entry, 'updated')?.textContent ?? 0),
|
50 | author: filterChildTags(entry, 'author').map(author => parseAtomPerson(author)),
|
51 | content: parseAtomContent(findChildTag(entry, 'content')),
|
52 | link: filterChildTags(entry, 'link').map(link => parseAtomLink(link)),
|
53 | summary: parseAtomText(findChildTag(entry, 'summary')),
|
54 | category: filterChildTags(entry, 'category').map(category => parseAtomCategory(category)),
|
55 | contributor: filterChildTags(entry, 'contributor').map(contributor => parseAtomPerson(contributor)),
|
56 | published: findChildTag(entry, 'published') ? new Date(findChildTag(entry, 'published')?.textContent ?? 0) : undefined,
|
57 | rights: parseAtomText(findChildTag(entry, 'rights')),
|
58 | source: parseAtomSource(findChildTag(entry, 'source')),
|
59 | };
|
60 | }
|
61 |
|
62 |
|
63 | export function safelyDecodeAtomText(type: AtomTextType, element: Element | undefined): string {
|
64 | if(element !== undefined) {
|
65 |
|
66 |
|
67 | if(type === 'xhtml') return sanitize(element.innerHTML);
|
68 |
|
69 |
|
70 | else if(type === 'html') return sanitize(element.textContent ?? '');
|
71 |
|
72 |
|
73 |
|
74 | else if(type === 'text') return sanitize(element.innerHTML);
|
75 | }
|
76 | return '';
|
77 | }
|
78 |
|
79 |
|
80 | export function sanitizeTextContent(element: Element | undefined): string | undefined {
|
81 | return element !== undefined ? sanitize(element?.textContent ?? '') : undefined;
|
82 | }
|
83 |
|
84 |
|
85 | export function sanitizeTextAttribute<T = string>(element: Element | undefined, attributeName: string): T | undefined {
|
86 | return element !== undefined ? (element.getAttribute(attributeName) !== null ? sanitize(element.getAttribute(attributeName)!) as unknown as T : undefined) : undefined;
|
87 | }
|
88 |
|
89 | export function parseAtomContent(content: Element | undefined): AtomContent {
|
90 | const type = (sanitizeTextAttribute(content, 'type') as AtomTextType) ?? undefined;
|
91 | return {
|
92 | type,
|
93 | src: sanitizeTextAttribute(content, 'src'),
|
94 | value: safelyDecodeAtomText(type, content),
|
95 | }
|
96 | }
|
97 |
|
98 |
|
99 | export function parseAtomText(text: Element | undefined): AtomText {
|
100 | const type = (sanitizeTextAttribute(text, 'type') as AtomTextType) ?? undefined;
|
101 | return {
|
102 | type,
|
103 | value: safelyDecodeAtomText(type, text)
|
104 | }
|
105 | }
|
106 |
|
107 | export function parseAtomPerson(person: Element): AtomPerson {
|
108 | return {
|
109 | name: sanitize(findChildTag(person, 'name')?.textContent ?? ''),
|
110 | uri: sanitizeTextContent(findChildTag(person, 'uri')),
|
111 | email: sanitizeTextContent(findChildTag(person, 'email')),
|
112 | }
|
113 | }
|
114 |
|
115 | export function parseAtomLink(link: Element): AtomLink {
|
116 | return {
|
117 | href: sanitizeTextAttribute(link, 'href') ?? '',
|
118 | rel: sanitizeTextAttribute<AtomLinkRelType>(link, 'ref'),
|
119 | type: sanitizeTextAttribute(link, 'type'),
|
120 | hreflang: sanitizeTextAttribute(link, 'hreflang'),
|
121 | title: sanitizeTextAttribute(link, 'title'),
|
122 | length: sanitizeTextAttribute(link, 'length'),
|
123 | };
|
124 | }
|
125 |
|
126 | export function parseAtomCategory(category: Element): AtomCategory {
|
127 | return {
|
128 | term: sanitizeTextAttribute(category, 'term') ?? '',
|
129 | scheme: sanitizeTextAttribute(category, 'scheme') ?? undefined,
|
130 | label: sanitizeTextAttribute(category, 'label') ?? undefined
|
131 | };
|
132 | }
|
133 |
|
134 | export function parseAtomSource(source: Element | undefined): AtomSource | undefined {
|
135 | if(source !== undefined) {
|
136 | return {
|
137 | id: sanitizeTextContent(findChildTag(source, 'id')) ?? '',
|
138 | title: sanitizeTextContent(findChildTag(source, 'title')) ?? '',
|
139 | updated: new Date(findChildTag(source, 'title')?.textContent ?? 0)
|
140 | };
|
141 | }
|
142 | return undefined;
|
143 | } |
\ | No newline at end of file |