UNPKG

6.89 kBPlain TextView Raw
1import { AtomCategory, AtomContent, AtomLink, AtomLinkRelType, AtomPerson, AtomText, AtomTextType } from './AtomCommon';
2import { AtomEntry, AtomSource } from './AtomEntry';
3import { AtomFeed } from './AtomFeed';
4
5import { sanitize } from 'dompurify';
6
7/** searches for a tag in the node list, prevents recursive searches */
8const findByTag = (nodes: Iterable<Element> | ArrayLike<Element> | HTMLCollection, tagName: string) => Array.from(nodes).find(e => e.nodeName === tagName);
9/** searches for nodes which have a matching tagName, prevents recursive searches */
10const filterByTag = (nodes: Iterable<Element> | ArrayLike<Element> | HTMLCollection, tagName: string) => Array.from(nodes).filter(e => e.nodeName === tagName);
11/** shortcut method for `findByTag()` that accesses children */
12const findChildTag = (parent: Document | Element, tagName: string) => findByTag(parent.children, tagName);
13/** shortcut method for `filterByTag()` that accesses children */
14const filterChildTags = (parent: Document | Element, tagName: string) => filterByTag(parent.children, tagName);
15
16/** parses the feed */
17export function parseAtomFeed(data: string): AtomFeed {
18 const parser = new DOMParser();
19 const xml = parser.parseFromString(data, 'text/xml');
20 const feed = findChildTag(xml, 'feed');
21 if(feed) {
22 return {
23 id: sanitizeTextContent(findChildTag(feed, 'id')) ?? '',
24 title: parseAtomText(findChildTag(feed, 'title')),
25 updated: new Date(findChildTag(feed, 'updated')?.textContent ?? 0),
26 entries: filterChildTags(feed, 'entry').map(e => parseAtomEntry(e)),
27 author: filterChildTags(feed, 'author').map(author => parseAtomPerson(author)),
28 link: filterChildTags(feed, 'link').map(link => parseAtomLink(link)),
29 category: filterChildTags(feed, 'category').map(category => parseAtomCategory(category)),
30 contributor: filterChildTags(feed, 'contributor').map(contributor => parseAtomPerson(contributor)),
31 generator: {
32 value: sanitizeTextContent(findChildTag(feed, 'generator')) ?? '',
33 uri: sanitizeTextAttribute(findChildTag(feed, 'generator'), 'uri'),
34 version: sanitizeTextAttribute(findChildTag(feed, 'generator'), 'version'),
35 },
36 icon: sanitizeTextContent(findChildTag(feed, 'icon')),
37 logo: sanitizeTextContent(findChildTag(feed, 'logo')),
38 rights: parseAtomText(findChildTag(feed, 'rights')),
39 subtitle: sanitizeTextContent(findChildTag(feed, 'subtitle')),
40 };
41 }
42 throw Error('No <feed> tag found.');
43}
44
45export function parseAtomEntry(entry: Element): AtomEntry {
46 return {
47 id: sanitizeTextContent(findChildTag(entry, 'id')) ?? '',
48 title: parseAtomText(findChildTag(entry, 'title')),
49 updated: new Date(findChildTag(entry, 'updated')?.textContent ?? 0),
50 author: filterChildTags(entry, 'author').map(author => parseAtomPerson(author)),
51 content: parseAtomContent(findChildTag(entry, 'content')),
52 link: filterChildTags(entry, 'link').map(link => parseAtomLink(link)),
53 summary: parseAtomText(findChildTag(entry, 'summary')),
54 category: filterChildTags(entry, 'category').map(category => parseAtomCategory(category)),
55 contributor: filterChildTags(entry, 'contributor').map(contributor => parseAtomPerson(contributor)),
56 published: findChildTag(entry, 'published') ? new Date(findChildTag(entry, 'published')?.textContent ?? 0) : undefined,
57 rights: parseAtomText(findChildTag(entry, 'rights')),
58 source: parseAtomSource(findChildTag(entry, 'source')),
59 };
60}
61
62/** safely decode text content */
63export function safelyDecodeAtomText(type: AtomTextType, element: Element | undefined): string {
64 if(element !== undefined) {
65 // If type="xhtml", then this element contains inline xhtml, wrapped in a div element.
66 // This means that the existing `.innerHTML` is ready to be santized
67 if(type === 'xhtml') return sanitize(element.innerHTML);
68 // If type="html", then this element contains entity escaped html.
69 // using `.textContent` will un-escape the text
70 else if(type === 'html') return sanitize(element.textContent ?? '');
71 // If type="text", then this element contains plain text with no entity escaped html.
72 // This means that the content of `.innerHTML` are **intended** to be safe.
73 // However, we don't want to leave an attack vector open, so we're going to sanitize it anyway.
74 else if(type === 'text') return sanitize(element.innerHTML);
75 }
76 return '';
77}
78
79/** shortcut for safely decoding the `.textContent` value of an element */
80export function sanitizeTextContent(element: Element | undefined): string | undefined {
81 return element !== undefined ? sanitize(element?.textContent ?? '') : undefined;
82}
83
84/** shortcut for safely decoding the an attribute value of an element */
85export function sanitizeTextAttribute<T = string>(element: Element | undefined, attributeName: string): T | undefined {
86 return element !== undefined ? (element.getAttribute(attributeName) !== null ? sanitize(element.getAttribute(attributeName)!) as unknown as T : undefined) : undefined;
87}
88
89export function parseAtomContent(content: Element | undefined): AtomContent {
90 const type = (sanitizeTextAttribute(content, 'type') as AtomTextType) ?? undefined;
91 return {
92 type,
93 src: sanitizeTextAttribute(content, 'src'),
94 value: safelyDecodeAtomText(type, content),
95 }
96}
97
98
99export function parseAtomText(text: Element | undefined): AtomText {
100 const type = (sanitizeTextAttribute(text, 'type') as AtomTextType) ?? undefined;
101 return {
102 type,
103 value: safelyDecodeAtomText(type, text)
104 }
105}
106
107export function parseAtomPerson(person: Element): AtomPerson {
108 return {
109 name: sanitize(findChildTag(person, 'name')?.textContent ?? ''),
110 uri: sanitizeTextContent(findChildTag(person, 'uri')),
111 email: sanitizeTextContent(findChildTag(person, 'email')),
112 }
113}
114
115export function parseAtomLink(link: Element): AtomLink {
116 return {
117 href: sanitizeTextAttribute(link, 'href') ?? '',
118 rel: sanitizeTextAttribute<AtomLinkRelType>(link, 'ref'),
119 type: sanitizeTextAttribute(link, 'type'),
120 hreflang: sanitizeTextAttribute(link, 'hreflang'),
121 title: sanitizeTextAttribute(link, 'title'),
122 length: sanitizeTextAttribute(link, 'length'),
123 };
124}
125
126export function parseAtomCategory(category: Element): AtomCategory {
127 return {
128 term: sanitizeTextAttribute(category, 'term') ?? '',
129 scheme: sanitizeTextAttribute(category, 'scheme') ?? undefined,
130 label: sanitizeTextAttribute(category, 'label') ?? undefined
131 };
132}
133
134export function parseAtomSource(source: Element | undefined): AtomSource | undefined {
135 if(source !== undefined) {
136 return {
137 id: sanitizeTextContent(findChildTag(source, 'id')) ?? '',
138 title: sanitizeTextContent(findChildTag(source, 'title')) ?? '',
139 updated: new Date(findChildTag(source, 'title')?.textContent ?? 0)
140 };
141 }
142 return undefined;
143}
\No newline at end of file