1 | export default function(dom, data) {
|
2 |
|
3 | var textNodes = dom.createTreeWalker(
|
4 | dom.body,
|
5 | dom.defaultView.NodeFilter.SHOW_TEXT
|
6 | );
|
7 | while (textNodes.nextNode()) {
|
8 | var n = textNodes.currentNode,
|
9 | text = n.nodeValue;
|
10 | if (text && acceptNode(n)) {
|
11 | text = quotes(text);
|
12 | text = punctuation(text);
|
13 | text = ligatures(text);
|
14 | n.nodeValue = text;
|
15 | }
|
16 | }
|
17 | }
|
18 |
|
19 | function acceptNode(node) {
|
20 | var parent = node.parentElement;
|
21 | var isMath = (parent && parent.getAttribute && parent.getAttribute("class")) ? parent.getAttribute("class").includes("katex") || parent.getAttribute("class").includes("MathJax") : false;
|
22 | return parent &&
|
23 | parent.nodeName !== "SCRIPT" &&
|
24 | parent.nodeName !== "STYLE" &&
|
25 | parent.nodeName !== "CODE" &&
|
26 | parent.nodeName !== "PRE" &&
|
27 | parent.nodeName !== "SPAN" &&
|
28 | parent.nodeName !== "DT-HEADER" &&
|
29 | parent.nodeName !== "DT-BYLINE" &&
|
30 | parent.nodeName !== "DT-MATH" &&
|
31 | parent.nodeName !== "DT-CODE" &&
|
32 | parent.nodeName !== "DT-BIBLIOGRAPHY" &&
|
33 | parent.nodeName !== "DT-FOOTER" &&
|
34 | parent.nodeType !== 8 &&
|
35 | !isMath;
|
36 | }
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 | function punctuation(text){
|
50 |
|
51 |
|
52 | text = text.replace(/--/g, '\u2014');
|
53 | text = text.replace(/ \u2014 /g,"\u2009\u2014\u2009");
|
54 |
|
55 |
|
56 | text = text.replace(/\.\.\./g,'…');
|
57 |
|
58 |
|
59 | var NBSP = "\u00a0";
|
60 | var NBSP_PUNCTUATION_START = /([«¿¡]) /g;
|
61 | var NBSP_PUNCTUATION_END = / ([\!\?:;\.,‽»])/g;
|
62 |
|
63 | text = text.replace(NBSP_PUNCTUATION_START, '$1' + NBSP);
|
64 | text = text.replace(NBSP_PUNCTUATION_END, NBSP + '$1');
|
65 |
|
66 | return text;
|
67 | }
|
68 |
|
69 | function quotes(text) {
|
70 |
|
71 | text = text
|
72 | .replace(/(\W|^)"([^\s\!\?:;\.,‽»])/g, '$1\u201c$2')
|
73 | .replace(/(\u201c[^"]*)"([^"]*$|[^\u201c"]*\u201c)/g, '$1\u201d$2')
|
74 | .replace(/([^0-9])"/g,'$1\u201d')
|
75 | .replace(/(\W|^)'(\S)/g, '$1\u2018$2')
|
76 | .replace(/([a-z])'([a-z])/ig, '$1\u2019$2')
|
77 | .replace(/((\u2018[^']*)|[a-z])'([^0-9]|$)/ig, '$1\u2019$3')
|
78 | .replace(/(\u2018)([0-9]{2}[^\u2019]*)(\u2018([^0-9]|$)|$|\u2019[a-z])/ig, '\u2019$2$3')
|
79 | .replace(/(\B|^)\u2018(?=([^\u2019]*\u2019\b)*([^\u2019\u2018]*\W[\u2019\u2018]\b|[^\u2019\u2018]*$))/ig, '$1\u2019')
|
80 | .replace(/'''/g, '\u2034')
|
81 | .replace(/("|'')/g, '\u2033')
|
82 | .replace(/'/g, '\u2032');
|
83 |
|
84 |
|
85 | text = text.replace(/\\“/, '\"');
|
86 | text = text.replace(/\\”/, '\"');
|
87 | text = text.replace(/\\’/, '\'');
|
88 | text = text.replace(/\\‘/, '\'');
|
89 |
|
90 | return text;
|
91 | }
|
92 |
|
93 | function ligatures(text){
|
94 |
|
95 | text = text.replace(/fi/g, 'fi');
|
96 | text = text.replace(/fl/g, 'fl');
|
97 |
|
98 | return text;
|
99 | };
|