1 | import Document, { AdjacentBoundaryBehaviour, ParseAnnotation, } from "@atjson/document";
|
2 | import HTMLSource from "@atjson/source-html";
|
3 | import * as entities from "entities";
|
4 | import * as sax from "sax";
|
5 | import { Article, Description, Media, Message, Title } from "./annotations";
|
6 | function prefix(vendorPrefix, attributes) {
|
7 | if (Array.isArray(attributes)) {
|
8 | return attributes.map(function recurseWithVendor(item) {
|
9 | return prefix(vendorPrefix, item);
|
10 | });
|
11 | }
|
12 | else if (typeof attributes === "object" && attributes != null) {
|
13 | let prefixedAttributes = {};
|
14 | for (let namespacedKey in attributes) {
|
15 | let [namespace, key] = namespacedKey.split(":");
|
16 | if (key == null) {
|
17 | key = namespace;
|
18 | namespace = vendorPrefix;
|
19 | }
|
20 | prefixedAttributes[`-${namespace}-${key}`] = prefix(vendorPrefix, attributes[key]);
|
21 | }
|
22 | return prefixedAttributes;
|
23 | }
|
24 | else {
|
25 | return attributes;
|
26 | }
|
27 | }
|
28 | function getVendorPrefix(tagName) {
|
29 | let [namespace, tag] = tagName.split(":");
|
30 | if (tag == null) {
|
31 | return "html";
|
32 | }
|
33 | else {
|
34 | return namespace;
|
35 | }
|
36 | }
|
37 | function getType(tagName) {
|
38 | let parts = tagName.split(":");
|
39 | return parts[parts.length - 1];
|
40 | }
|
41 | export default class PRISMSource extends Document {
|
42 | static fromRaw(xml) {
|
43 | let parser = sax.parser(false, {
|
44 | trim: false,
|
45 | normalize: false,
|
46 | lowercase: true,
|
47 | xmlns: false,
|
48 | position: true,
|
49 | });
|
50 | let content = xml;
|
51 | let annotations = [];
|
52 | let xmlStart = xml.indexOf("<?xml");
|
53 | let xmlEnd = xml.indexOf("?>", xmlStart) + 2;
|
54 | if (xmlStart > -1 && xmlEnd > 1) {
|
55 | annotations.push(new ParseAnnotation({
|
56 | start: xmlStart,
|
57 | end: xmlEnd,
|
58 | attributes: {
|
59 | reason: "<?xml>",
|
60 | },
|
61 | }));
|
62 | }
|
63 | let partialAnnotations = [];
|
64 | parser.onopentag = function onopentag(node) {
|
65 | let vendorPrefix = getVendorPrefix(node.name);
|
66 | let type = getType(node.name);
|
67 | if (node.isSelfClosing) {
|
68 | annotations.push({
|
69 | type: `-${vendorPrefix}-${type}`,
|
70 | start: parser.startTagPosition - 1,
|
71 | end: parser.position,
|
72 | attributes: prefix(vendorPrefix, node.attributes),
|
73 | }, new ParseAnnotation({
|
74 | start: parser.startTagPosition - 1,
|
75 | end: parser.position,
|
76 | attributes: {
|
77 | reason: `<${node.name}/>`,
|
78 | },
|
79 | }));
|
80 | }
|
81 | else {
|
82 | partialAnnotations.push({
|
83 | type: `-${vendorPrefix}-${type}`,
|
84 | start: parser.startTagPosition - 1,
|
85 | attributes: prefix(vendorPrefix, node.attributes),
|
86 | });
|
87 | annotations.push(new ParseAnnotation({
|
88 | start: parser.startTagPosition - 1,
|
89 | end: parser.position,
|
90 | attributes: {
|
91 | reason: `<${node.name}>`,
|
92 | },
|
93 | }));
|
94 | }
|
95 | };
|
96 | parser.onclosetag = function onclosetag(tagName) {
|
97 | let annotation = partialAnnotations.pop();
|
98 | if (annotation == null) {
|
99 | throw new Error("Expected there to be an annotation from the opening tag, but got none.");
|
100 | }
|
101 | if (annotation.type !== `-${getVendorPrefix(tagName)}-${getType(tagName)}`) {
|
102 | partialAnnotations.push(annotation);
|
103 | return;
|
104 | }
|
105 | annotation.end = parser.position;
|
106 | annotations.push(annotation, new ParseAnnotation({
|
107 | start: parser.startTagPosition - 1,
|
108 | end: parser.position,
|
109 | attributes: {
|
110 | reason: `</${tagName}>`,
|
111 | },
|
112 | }));
|
113 | };
|
114 | parser.write(xml).close();
|
115 | let prism = new this({
|
116 | content,
|
117 | annotations,
|
118 | });
|
119 | let results = prism
|
120 | .match(/(&((#[\d]+)|(#x[\da-f]+)|(amp)|(quot)|(apos)|(lt)|(gt));)/gi)
|
121 | .reverse();
|
122 | for (let { start, end, matches } of results) {
|
123 | let entity = entities.decodeXML(matches[0]);
|
124 | prism.insertText(start, entity, AdjacentBoundaryBehaviour.preserve);
|
125 | prism.deleteText(start + entity.length, end + entity.length);
|
126 | }
|
127 | return prism;
|
128 | }
|
129 | }
|
130 | PRISMSource.contentType = "application/vnd.atjson+prism";
|
131 | PRISMSource.schema = [...HTMLSource.schema].concat([
|
132 | Article,
|
133 | Description,
|
134 | Media,
|
135 | Message,
|
136 | Title,
|
137 | ]);
|
138 | //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic291cmNlLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vc3JjL3NvdXJjZS50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLFFBQVEsRUFBRSxFQUNmLHlCQUF5QixFQUd6QixlQUFlLEdBQ2hCLE1BQU0sa0JBQWtCLENBQUM7QUFDMUIsT0FBTyxVQUFVLE1BQU0scUJBQXFCLENBQUM7QUFDN0MsT0FBTyxLQUFLLFFBQVEsTUFBTSxVQUFVLENBQUM7QUFDckMsT0FBTyxLQUFLLEdBQUcsTUFBTSxLQUFLLENBQUM7QUFDM0IsT0FBTyxFQUFFLE9BQU8sRUFBRSxXQUFXLEVBQUUsS0FBSyxFQUFFLE9BQU8sRUFBRSxLQUFLLEVBQUUsTUFBTSxlQUFlLENBQUM7QUFFNUUsU0FBUyxNQUFNLENBQUMsWUFBb0IsRUFBRSxVQUFlO0lBQ25ELElBQUksS0FBSyxDQUFDLE9BQU8sQ0FBQyxVQUFVLENBQUMsRUFBRTtRQUM3QixPQUFPLFVBQVUsQ0FBQyxHQUFHLENBQUMsU0FBUyxpQkFBaUIsQ0FBQyxJQUFTO1lBQ3hELE9BQU8sTUFBTSxDQUFDLFlBQVksRUFBRSxJQUFJLENBQUMsQ0FBQztRQUNwQyxDQUFDLENBQUMsQ0FBQztLQUNKO1NBQU0sSUFBSSxPQUFPLFVBQVUsS0FBSyxRQUFRLElBQUksVUFBVSxJQUFJLElBQUksRUFBRTtRQUMvRCxJQUFJLGtCQUFrQixHQUFRLEVBQUUsQ0FBQztRQUNqQyxLQUFLLElBQUksYUFBYSxJQUFJLFVBQVUsRUFBRTtZQUNwQyxJQUFJLENBQUMsU0FBUyxFQUFFLEdBQUcsQ0FBQyxHQUFHLGFBQWEsQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7WUFDaEQsSUFBSSxHQUFHLElBQUksSUFBSSxFQUFFO2dCQUNmLEdBQUcsR0FBRyxTQUFTLENBQUM7Z0JBQ2hCLFNBQVMsR0FBRyxZQUFZLENBQUM7YUFDMUI7WUFDRCxrQkFBa0IsQ0FBQyxJQUFJLFNBQVMsSUFBSSxHQUFHLEVBQUUsQ0FBQyxHQUFHLE1BQU0sQ0FDakQsWUFBWSxFQUNaLFVBQVUsQ0FBQyxHQUFHLENBQUMsQ0FDaEIsQ0FBQztTQUNIO1FBQ0QsT0FBTyxrQkFBa0IsQ0FBQztLQUMzQjtTQUFNO1FBQ0wsT0FBTyxVQUFVLENBQUM7S0FDbkI7QUFDSCxDQUFDO0FBRUQsU0FBUyxlQUFlLENBQUMsT0FBZTtJQUN0QyxJQUFJLENBQUMsU0FBUyxFQUFFLEdBQUcsQ0FBQyxHQUFHLE9BQU8sQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7SUFDMUMsSUFBSSxHQUFHLElBQUksSUFBSSxFQUFFO1FBQ2YsT0FBTyxNQUFNLENBQUM7S0FDZjtTQUFNO1FBQ0wsT0FBTyxTQUFTLENBQUM7S0FDbEI7QUFDSCxDQUFDO0FBRUQsU0FBUyxPQUFPLENBQUMsT0FBZTtJQUM5QixJQUFJLEtBQUssR0FBRyxPQUFPLENBQUMsS0FBSyxDQUFDLEdBQUcsQ0FBQyxDQUFDO0lBQy9CLE9BQU8sS0FBSyxDQUFDLEtBQUssQ0FBQyxNQUFNLEdBQUcsQ0FBQyxDQUFDLENBQUM7QUFDakMsQ0FBQztBQUVELE1BQU0sQ0FBQyxPQUFPLE9BQU8sV0FBWSxTQUFRLFFBQVE7SUFVL0MsTUFBTSxDQUFDLE9BQU8sQ0FBQyxHQUFXO1FBQ3hCLElBQUksTUFBTSxHQUFHLEdBQUcsQ0FBQyxNQUFNLENBQUMsS0FBSyxFQUFFO1lBQzdCLElBQUksRUFBRSxLQUFLO1lBQ1gsU0FBUyxFQUFFLEtBQUs7WUFDaEIsU0FBUyxFQUFFLElBQUk7WUFDZixLQUFLLEVBQUUsS0FBSztZQUNaLFFBQVEsRUFBRSxJQUFJO1NBQ2YsQ0FBQyxDQUFDO1FBRUgsSUFBSSxPQUFPLEdBQUcsR0FBRyxDQUFDO1FBQ2xCLElBQUksV0FBVyxHQUF1QyxFQUFFLENBQUM7UUFFekQsSUFBSSxRQUFRLEdBQUcsR0FBRyxDQUFDLE9BQU8sQ0FBQyxPQUFPLENBQUMsQ0FBQztRQUNwQyxJQUFJLE1BQU0sR0FBRyxHQUFHLENBQUMsT0FBTyxDQUFDLElBQUksRUFBRSxRQUFRLENBQUMsR0FBRyxDQUFDLENBQUM7UUFDN0MsSUFBSSxRQUFRLEdBQUcsQ0FBQyxDQUFDLElBQUksTUFBTSxHQUFHLENBQUMsRUFBRTtZQUMvQixXQUFXLENBQUMsSUFBSSxDQUNkLElBQUksZUFBZSxDQUFDO2dCQUNsQixLQUFLLEVBQUUsUUFBUTtnQkFDZixHQUFHLEVBQUUsTUFBTTtnQkFDWCxVQUFVLEVBQUU7b0JBQ1YsTUFBTSxFQUFFLFFBQVE7aUJBQ2pCO2FBQ0YsQ0FBQyxDQUNILENBQUM7U0FDSDtRQUVELElBQUksa0JBQWtCLEdBQW1DLEVBQUUsQ0FBQztRQUU1RCxNQUFNLENBQUMsU0FBUyxHQUFHLFNBQVMsU0FBUyxDQUFDLElBQUk7WUFDeEMsSUFBSSxZQUFZLEdBQUcsZUFBZSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsQ0FBQztZQUM5QyxJQUFJLElBQUksR0FBRyxPQUFPLENBQUMsSUFBSSxDQUFDLElBQUksQ0FBQyxDQUFDO1lBQzlCLElBQUksSUFBSSxDQUFDLGFBQWEsRUFBRTtnQkFDdEIsV0FBVyxDQUFDLElBQUksQ0FDZDtvQkFDRSxJQUFJLEVBQUUsSUFBSSxZQUFZLElBQUksSUFBSSxFQUFFO29CQUNoQyxLQUFLLEVBQUUsTUFBTSxDQUFDLGdCQUFnQixHQUFHLENBQUM7b0JBQ2xDLEdBQUcsRUFBRSxNQUFNLENBQUMsUUFBUTtvQkFDcEIsVUFBVSxFQUFFLE1BQU0sQ0FBQyxZQUFZLEVBQUUsSUFBSSxDQUFDLFVBQVUsQ0FBQztpQkFDbEQsRUFDRCxJQUFJLGVBQWUsQ0FBQztvQkFDbEIsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxHQUFHLEVBQUUsTUFBTSxDQUFDLFFBQVE7b0JBQ3BCLFVBQVUsRUFBRTt3QkFDVixNQUFNLEVBQUUsSUFBSSxJQUFJLENBQUMsSUFBSSxJQUFJO3FCQUMxQjtpQkFDRixDQUFDLENBQ0gsQ0FBQzthQUNIO2lCQUFNO2dCQUNMLGtCQUFrQixDQUFDLElBQUksQ0FBQztvQkFDdEIsSUFBSSxFQUFFLElBQUksWUFBWSxJQUFJLElBQUksRUFBRTtvQkFDaEMsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxVQUFVLEVBQUUsTUFBTSxDQUFDLFlBQVksRUFBRSxJQUFJLENBQUMsVUFBVSxDQUFDO2lCQUNsRCxDQUFDLENBQUM7Z0JBQ0gsV0FBVyxDQUFDLElBQUksQ0FDZCxJQUFJLGVBQWUsQ0FBQztvQkFDbEIsS0FBSyxFQUFFLE1BQU0sQ0FBQyxnQkFBZ0IsR0FBRyxDQUFDO29CQUNsQyxHQUFHLEVBQUUsTUFBTSxDQUFDLFFBQVE7b0JBQ3BCLFVBQVUsRUFBRTt3QkFDVixNQUFNLEVBQUUsSUFBSSxJQUFJLENBQUMsSUFBSSxHQUFHO3FCQUN6QjtpQkFDRixDQUFDLENBQ0gsQ0FBQzthQUNIO1FBQ0gsQ0FBQyxDQUFDO1FBRUYsTUFBTSxDQUFDLFVBQVUsR0FBRyxTQUFTLFVBQVUsQ0FBQyxPQUFPO1lBQzdDLElBQUksVUFBVSxHQUFHLGtCQUFrQixDQUFDLEdBQUcsRUFBRSxDQUFDO1lBQzFDLElBQUksVUFBVSxJQUFJLElBQUksRUFBRTtnQkFDdEIsTUFBTSxJQUFJLEtBQUssQ0FDYix3RUFBd0UsQ0FDekUsQ0FBQzthQUNIO1lBR0QsSUFDRSxVQUFVLENBQUMsSUFBSSxLQUFLLElBQUksZUFBZSxDQUFDLE9BQU8sQ0FBQyxJQUFJLE9BQU8sQ0FBQyxPQUFPLENBQUMsRUFBRSxFQUN0RTtnQkFDQSxrQkFBa0IsQ0FBQyxJQUFJLENBQUMsVUFBVSxDQUFDLENBQUM7Z0JBQ3BDLE9BQU87YUFDUjtZQUVELFVBQVUsQ0FBQyxHQUFHLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQztZQUVqQyxXQUFXLENBQUMsSUFBSSxDQUNkLFVBQTRCLEVBQzVCLElBQUksZUFBZSxDQUFDO2dCQUNsQixLQUFLLEVBQUUsTUFBTSxDQUFDLGdCQUFnQixHQUFHLENBQUM7Z0JBQ2xDLEdBQUcsRUFBRSxNQUFNLENBQUMsUUFBUTtnQkFDcEIsVUFBVSxFQUFFO29CQUNWLE1BQU0sRUFBRSxLQUFLLE9BQU8sR0FBRztpQkFDeEI7YUFDRixDQUFDLENBQ0gsQ0FBQztRQUNKLENBQUMsQ0FBQztRQUVGLE1BQU0sQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUMsS0FBSyxFQUFFLENBQUM7UUFFMUIsSUFBSSxLQUFLLEdBQUcsSUFBSSxJQUFJLENBQUM7WUFDbkIsT0FBTztZQUNQLFdBQVc7U0FDWixDQUFDLENBQUM7UUFFSCxJQUFJLE9BQU8sR0FBRyxLQUFLO2FBQ2hCLEtBQUssQ0FBQyw2REFBNkQsQ0FBQzthQUNwRSxPQUFPLEVBQUUsQ0FBQztRQUViLEtBQUssSUFBSSxFQUFFLEtBQUssRUFBRSxHQUFHLEVBQUUsT0FBTyxFQUFFLElBQUksT0FBTyxFQUFFO1lBQzNDLElBQUksTUFBTSxHQUFHLFFBQVEsQ0FBQyxTQUFTLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxDQUFDLENBQUM7WUFDNUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxLQUFLLEVBQUUsTUFBTSxFQUFFLHlCQUF5QixDQUFDLFFBQVEsQ0FBQyxDQUFDO1lBQ3BFLEtBQUssQ0FBQyxVQUFVLENBQUMsS0FBSyxHQUFHLE1BQU0sQ0FBQyxNQUFNLEVBQUUsR0FBRyxHQUFHLE1BQU0sQ0FBQyxNQUFNLENBQUMsQ0FBQztTQUM5RDtRQUVELE9BQU8sS0FBSyxDQUFDO0lBQ2YsQ0FBQzs7QUExSE0sdUJBQVcsR0FBRyw4QkFBOEIsQ0FBQztBQUM3QyxrQkFBTSxHQUFHLENBQUMsR0FBRyxVQUFVLENBQUMsTUFBTSxDQUFDLENBQUMsTUFBTSxDQUFDO0lBQzVDLE9BQU87SUFDUCxXQUFXO0lBQ1gsS0FBSztJQUNMLE9BQU87SUFDUCxLQUFLO0NBQ04sQ0FBQyxDQUFDIn0= |
\ | No newline at end of file |