UNPKG

10.7 kBJavaScriptView Raw
1import Document, { AdjacentBoundaryBehaviour, ParseAnnotation, } from "@atjson/document";
2import HTMLSource from "@atjson/source-html";
3import * as entities from "entities";
4import * as sax from "sax";
5import { Article, Description, Media, Message, Title } from "./annotations";
6function prefix(vendorPrefix, attributes) {
7 if (Array.isArray(attributes)) {
8 return attributes.map(function recurseWithVendor(item) {
9 return prefix(vendorPrefix, item);
10 });
11 }
12 else if (typeof attributes === "object" && attributes != null) {
13 let prefixedAttributes = {};
14 for (let namespacedKey in attributes) {
15 let [namespace, key] = namespacedKey.split(":");
16 if (key == null) {
17 key = namespace;
18 namespace = vendorPrefix;
19 }
20 prefixedAttributes[`-${namespace}-${key}`] = prefix(vendorPrefix, attributes[key]);
21 }
22 return prefixedAttributes;
23 }
24 else {
25 return attributes;
26 }
27}
28function getVendorPrefix(tagName) {
29 let [namespace, tag] = tagName.split(":");
30 if (tag == null) {
31 return "html";
32 }
33 else {
34 return namespace;
35 }
36}
37function getType(tagName) {
38 let parts = tagName.split(":");
39 return parts[parts.length - 1];
40}
41export default class PRISMSource extends Document {
42 static fromRaw(xml) {
43 let parser = sax.parser(false, {
44 trim: false,
45 normalize: false,
46 lowercase: true,
47 xmlns: false,
48 position: true,
49 });
50 let content = xml;
51 let annotations = [];
52 let xmlStart = xml.indexOf("<?xml");
53 let xmlEnd = xml.indexOf("?>", xmlStart) + 2;
54 if (xmlStart > -1 && xmlEnd > 1) {
55 annotations.push(new ParseAnnotation({
56 start: xmlStart,
57 end: xmlEnd,
58 attributes: {
59 reason: "<?xml>",
60 },
61 }));
62 }
63 let partialAnnotations = [];
64 parser.onopentag = function onopentag(node) {
65 let vendorPrefix = getVendorPrefix(node.name);
66 let type = getType(node.name);
67 if (node.isSelfClosing) {
68 annotations.push({
69 type: `-${vendorPrefix}-${type}`,
70 start: parser.startTagPosition - 1,
71 end: parser.position,
72 attributes: prefix(vendorPrefix, node.attributes),
73 }, new ParseAnnotation({
74 start: parser.startTagPosition - 1,
75 end: parser.position,
76 attributes: {
77 reason: `<${node.name}/>`,
78 },
79 }));
80 }
81 else {
82 partialAnnotations.push({
83 type: `-${vendorPrefix}-${type}`,
84 start: parser.startTagPosition - 1,
85 attributes: prefix(vendorPrefix, node.attributes),
86 });
87 annotations.push(new ParseAnnotation({
88 start: parser.startTagPosition - 1,
89 end: parser.position,
90 attributes: {
91 reason: `<${node.name}>`,
92 },
93 }));
94 }
95 };
96 parser.onclosetag = function onclosetag(tagName) {
97 let annotation = partialAnnotations.pop();
98 if (annotation == null) {
99 throw new Error("Expected there to be an annotation from the opening tag, but got none.");
100 }
101 if (annotation.type !== `-${getVendorPrefix(tagName)}-${getType(tagName)}`) {
102 partialAnnotations.push(annotation);
103 return;
104 }
105 annotation.end = parser.position;
106 annotations.push(annotation, new ParseAnnotation({
107 start: parser.startTagPosition - 1,
108 end: parser.position,
109 attributes: {
110 reason: `</${tagName}>`,
111 },
112 }));
113 };
114 parser.write(xml).close();
115 let prism = new this({
116 content,
117 annotations,
118 });
119 let results = prism
120 .match(/(&((#[\d]+)|(#x[\da-f]+)|(amp)|(quot)|(apos)|(lt)|(gt));)/gi)
121 .reverse();
122 for (let { start, end, matches } of results) {
123 let entity = entities.decodeXML(matches[0]);
124 prism.insertText(start, entity, AdjacentBoundaryBehaviour.preserve);
125 prism.deleteText(start + entity.length, end + entity.length);
126 }
127 return prism;
128 }
129}
130PRISMSource.contentType = "application/vnd.atjson+prism";
131PRISMSource.schema = [...HTMLSource.schema].concat([
132 Article,
133 Description,
134 Media,
135 Message,
136 Title,
137]);
138//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic291cmNlLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vc3JjL3NvdXJjZS50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLFFBQVEsRUFBRSxFQUNmLHlCQUF5QixFQUd6QixlQUFlLEdBQ2hCLE1BQU0sa0JBQWtCLENBQUM7QUFDMUIsT0FBTyxVQUFVLE1BQU0scUJBQXFCLENBQUM7QUFDN0MsT0FBTyxLQUFLLFFBQVEsTUFBTSxVQUFVLENBQUM7QUFDckMsT0FBTyxLQUFLLEdBQUcsTUFBTSxLQUFLLENBQUM7QUFDM0IsT0FBTyxFQUFFLE9BQU8sRUFBRSxXQUFXLEVBQUUsS0FBSyxFQUFFLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxlQUFlLENBQUM7QUFFNUUsU0FBUyxNQUFNLENBQUMsWUFBb0IsRUFBRSxVQUFlO0lBQ25ELElBQUksS0FBSyxDQUFDLE9BQU8sQ0FBQyxVQUFVLENBQUMsRUFBRTtRQUM3QixPQUFPLFVBQVUsQ0FBQyxHQUFHLENBQUMsU0FBUyxpQkFBaUIsQ0FBQyxJQUFTO1lBQ3hELE9BQU8sTUFBTSxDQUFDLFlBQVksRUFBRSxJQUFJLENBQUMsQ0FBQztRQUNwQyxDQUFDLENBQUMsQ0FBQztLQUNKO1NBQU0sSUFBSSxPQUFPLFVBQVUsS0FBSyxRQUFRLElBQUksVUFBVSxJQUFJLElBQUksRUFBRTtRQUMvRCxJQUFJLGtCQUFrQixHQUFRLEVBQUUsQ0FBQztRQUNqQyxLQUFLLElBQUksYUFBYSxJQUFJLFVBQVUsRUFBRTtZQUNwQyxJQUFJLENBQUMsU0FBUyxFQUFFLEdBQUcsQ0FBQyxHQUFHLGFBQWEsQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7WUFDaEQsSUFBSSxHQUFHLElBQUksSUFBSSxFQUFFO2dCQUNmLEdBQUcsR0FBRyxTQUFTLENBQUM7Z0JBQ2hCLFNBQVMsR0FBRyxZQUFZLENBQUM7YUFDMUI7WUFDRCxrQkFBa0IsQ0FBQyxJQUFJLFNBQVMsSUFBSSxHQUFHLEVBQUUsQ0FBQyxHQUFHLE1BQU0sQ0FDakQsWUFBWSxFQUNaLFVBQVUsQ0FBQyxHQUFHLENBQUMsQ0FDaEIsQ0FBQztTQUNIO1FBQ0QsT0FBTyxrQkFBa0IsQ0FBQztLQUMzQjtTQUFNO1FBQ0wsT0FBTyxVQUFVLENBQUM7S0FDbkI7QUFDSCxDQUFDO0FBRUQsU0FBUyxlQUFlLENBQUMsT0FBZTtJQUN0QyxJQUFJLENBQUMsU0FBUyxFQUFFLEdBQUcsQ0FBQyxHQUFHLE9BQU8sQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7SUFDMUMsSUFBSSxHQUFHLElBQUksSUFBSSxFQUFFO1FBQ2YsT0FBTyxNQUFNLENBQUM7S0FDZjtTQUFNO1FBQ0wsT0FBTyxTQUFTLENBQUM7S0FDbEI7QUFDSCxDQUFDO0FBRUQsU0FBUyxPQUFPLENBQUMsT0FBZTtJQUM5QixJQUFJLEtBQUssR0FBRyxPQUFPLENBQUMsS0FBSyxDQUFDLEdBQUcsQ0FBQyxDQUFDO0lBQy9CLE9BQU8sS0FBSyxDQUFDLEtBQUssQ0FBQyxNQUFNLEdBQUcsQ0FBQyxDQUFDLENBQUM7QUFDakMsQ0FBQztBQUVELE1BQU0sQ0FBQyxPQUFPLE9BQU8sV0FBWSxTQUFRLFFBQVE7SUFVL0MsTUFBTSxDQUFDLE9BQU8sQ0FBQyxHQUFXO1FBQ3hCLElBQUksTUFBTSxHQUFHLEdBQUcsQ0FBQyxNQUFNLENBQUMsS0FBSyxFQUFFO1lBQzdCLElBQUksRUFBRSxLQUFLO1lBQ1gsU0FBUyxFQUFFLEtBQUs7WUFDaEIsU0FBUyxFQUFFLElBQUk7WUFDZixLQUFLLEVBQUUsS0FBSztZQUNaLFFBQVEsRUFBRSxJQUFJO1NBQ2YsQ0FBQyxDQUFDO1FBRUgsSUFBSSxPQUFPLEdBQUcsR0FBRyxDQUFDO1FBQ2xCLElBQUksV0FBVyxHQUF1QyxFQUFFLENBQUM7UUFFekQsSUFBSSxRQUFRLEdBQUcsR0FBRyxDQUFDLE9BQU8sQ0FBQyxPQUFPLENBQUMsQ0FBQztRQUNwQyxJQUFJLE1BQU0sR0FBRyxHQUFHLENBQUMsT0FBTyxDQUFDLElBQUksRUFBRSxRQUFRLENBQUMsR0FBRyxDQUFDLENBQUM7UUFDN0MsSUFBSSxRQUFRLEdBQUcsQ0FBQyxDQUFDLElBQUksTUFBTSxHQUFHLENBQUMsRUFBRTtZQUMvQixXQUFXLENBQUMsSUFBSSxDQUNkLElBQUksZUFBZSxDQUFDO2dCQUNsQixLQUFLLEVBQUUsUUFBUTtnQkFDZixHQUFHLEVBQUUsTUFBTTtnQkFDWCxVQUFVLEVBQUU7b0JBQ1YsTUFBTSxFQUFFLFFBQVE7aUJBQ2pCO2FBQ0YsQ0FBQyxDQUNILENBQUM7U0FDSDtRQUVELElBQUksa0JBQWtCLEdBQW1DLEVBQUUsQ0FBQztRQUU1RCxNQUFNLENBQUMsU0FBUyxHQUFHLFNBQVMsU0FBUyxDQUFDLElBQUk7WUFDeEMsSUFBSSxZQUFZLEdBQUcsZUFBZSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsQ0FBQztZQUM5QyxJQUFJLElBQUksR0FBRyxPQUFPLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxDQUFDO1lBQzlCLElBQUksSUFBSSxDQUFDLGFBQWEsRUFBRTtnQkFDdEIsV0FBVyxDQUFDLElBQUksQ0FDZDtvQkFDRSxJQUFJLEVBQUUsSUFBSSxZQUFZLElBQUksSUFBSSxFQUFFO29CQUNoQyxLQUFLLEVBQUUsTUFBTSxDQUFDLGdCQUFnQixHQUFHLENBQUM7b0JBQ2xDLEdBQUcsRUFBRSxNQUFNLENBQUMsUUFBUTtvQkFDcEIsVUFBVSxFQUFFLE1BQU0sQ0FBQyxZQUFZLEVBQUUsSUFBSSxDQUFDLFVBQVUsQ0FBQztpQkFDbEQsRUFDRCxJQUFJLGVBQWUsQ0FBQztvQkFDbEIsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxHQUFHLEVBQUUsTUFBTSxDQUFDLFFBQVE7b0JBQ3BCLFVBQVUsRUFBRTt3QkFDVixNQUFNLEVBQUUsSUFBSSxJQUFJLENBQUMsSUFBSSxJQUFJO3FCQUMxQjtpQkFDRixDQUFDLENBQ0gsQ0FBQzthQUNIO2lCQUFNO2dCQUNMLGtCQUFrQixDQUFDLElBQUksQ0FBQztvQkFDdEIsSUFBSSxFQUFFLElBQUksWUFBWSxJQUFJLElBQUksRUFBRTtvQkFDaEMsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxVQUFVLEVBQUUsTUFBTSxDQUFDLFlBQVksRUFBRSxJQUFJLENBQUMsVUFBVSxDQUFDO2lCQUNsRCxDQUFDLENBQUM7Z0JBQ0gsV0FBVyxDQUFDLElBQUksQ0FDZCxJQUFJLGVBQWUsQ0FBQztvQkFDbEIsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxHQUFHLEVBQUUsTUFBTSxDQUFDLFFBQVE7b0JBQ3BCLFVBQVUsRUFBRTt3QkFDVixNQUFNLEVBQUUsSUFBSSxJQUFJLENBQUMsSUFBSSxHQUFHO3FCQUN6QjtpQkFDRixDQUFDLENBQ0gsQ0FBQzthQUNIO1FBQ0gsQ0FBQyxDQUFDO1FBRUYsTUFBTSxDQUFDLFVBQVUsR0FBRyxTQUFTLFVBQVUsQ0FBQyxPQUFPO1lBQzdDLElBQUksVUFBVSxHQUFHLGtCQUFrQixDQUFDLEdBQUcsRUFBRSxDQUFDO1lBQzFDLElBQUksVUFBVSxJQUFJLElBQUksRUFBRTtnQkFDdEIsTUFBTSxJQUFJLEtBQUssQ0FDYix3RUFBd0UsQ0FDekUsQ0FBQzthQUNIO1lBR0QsSUFDRSxVQUFVLENBQUMsSUFBSSxLQUFLLElBQUksZUFBZSxDQUFDLE9BQU8sQ0FBQyxJQUFJLE9BQU8sQ0FBQyxPQUFPLENBQUMsRUFBRSxFQUN0RTtnQkFDQSxrQkFBa0IsQ0FBQyxJQUFJLENBQUMsVUFBVSxDQUFDLENBQUM7Z0JBQ3BDLE9BQU87YUFDUjtZQUVELFVBQVUsQ0FBQyxHQUFHLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQztZQUVqQyxXQUFXLENBQUMsSUFBSSxDQUNkLFVBQTRCLEVBQzVCLElBQUksZUFBZSxDQUFDO2dCQUNsQixLQUFLLEVBQUUsTUFBTSxDQUFDLGdCQUFnQixHQUFHLENBQUM7Z0JBQ2xDLEdBQUcsRUFBRSxNQUFNLENBQUMsUUFBUTtnQkFDcEIsVUFBVSxFQUFFO29CQUNWLE1BQU0sRUFBRSxLQUFLLE9BQU8sR0FBRztpQkFDeEI7YUFDRixDQUFDLENBQ0gsQ0FBQztRQUNKLENBQUMsQ0FBQztRQUVGLE1BQU0sQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUMsS0FBSyxFQUFFLENBQUM7UUFFMUIsSUFBSSxLQUFLLEdBQUcsSUFBSSxJQUFJLENBQUM7WUFDbkIsT0FBTztZQUNQLFdBQVc7U0FDWixDQUFDLENBQUM7UUFFSCxJQUFJLE9BQU8sR0FBRyxLQUFLO2FBQ2hCLEtBQUssQ0FBQyw2REFBNkQsQ0FBQzthQUNwRSxPQUFPLEVBQUUsQ0FBQztRQUViLEtBQUssSUFBSSxFQUFFLEtBQUssRUFBRSxHQUFHLEVBQUUsT0FBTyxFQUFFLElBQUksT0FBTyxFQUFFO1lBQzNDLElBQUksTUFBTSxHQUFHLFFBQVEsQ0FBQyxTQUFTLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFDNUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxLQUFLLEVBQUUsTUFBTSxFQUFFLHlCQUF5QixDQUFDLFFBQVEsQ0FBQyxDQUFDO1lBQ3BFLEtBQUssQ0FBQyxVQUFVLENBQUMsS0FBSyxHQUFHLE1BQU0sQ0FBQyxNQUFNLEVBQUUsR0FBRyxHQUFHLE1BQU0sQ0FBQyxNQUFNLENBQUMsQ0FBQztTQUM5RDtRQUVELE9BQU8sS0FBSyxDQUFDO0lBQ2YsQ0FBQzs7QUExSE0sdUJBQVcsR0FBRyw4QkFBOEIsQ0FBQztBQUM3QyxrQkFBTSxHQUFHLENBQUMsR0FBRyxVQUFVLENBQUMsTUFBTSxDQUFDLENBQUMsTUFBTSxDQUFDO0lBQzVDLE9BQU87SUFDUCxXQUFXO0lBQ1gsS0FBSztJQUNMLE9BQU87SUFDUCxLQUFLO0NBQ04sQ0FBQyxDQUFDIn0=
\No newline at end of file