1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 | var _ = require("./util");
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 | function getTagName(html) {
|
16 | var i = _.spaceIndex(html);
|
17 | var tagName;
|
18 | if (i === -1) {
|
19 | tagName = html.slice(1, -1);
|
20 | } else {
|
21 | tagName = html.slice(1, i + 1);
|
22 | }
|
23 | tagName = _.trim(tagName).toLowerCase();
|
24 | if (tagName.slice(0, 1) === "/") tagName = tagName.slice(1);
|
25 | if (tagName.slice(-1) === "/") tagName = tagName.slice(0, -1);
|
26 | return tagName;
|
27 | }
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 | function isClosing(html) {
|
36 | return html.slice(0, 2) === "</";
|
37 | }
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 | function parseTag(html, onTag, escapeHtml) {
|
48 | "use strict";
|
49 |
|
50 | var rethtml = "";
|
51 | var lastPos = 0;
|
52 | var tagStart = false;
|
53 | var quoteStart = false;
|
54 | var currentPos = 0;
|
55 | var len = html.length;
|
56 | var currentTagName = "";
|
57 | var currentHtml = "";
|
58 |
|
59 | chariterator: for (currentPos = 0; currentPos < len; currentPos++) {
|
60 | var c = html.charAt(currentPos);
|
61 | if (tagStart === false) {
|
62 | if (c === "<") {
|
63 | tagStart = currentPos;
|
64 | continue;
|
65 | }
|
66 | } else {
|
67 | if (quoteStart === false) {
|
68 | if (c === "<") {
|
69 | rethtml += escapeHtml(html.slice(lastPos, currentPos));
|
70 | tagStart = currentPos;
|
71 | lastPos = currentPos;
|
72 | continue;
|
73 | }
|
74 | if (c === ">") {
|
75 | rethtml += escapeHtml(html.slice(lastPos, tagStart));
|
76 | currentHtml = html.slice(tagStart, currentPos + 1);
|
77 | currentTagName = getTagName(currentHtml);
|
78 | rethtml += onTag(
|
79 | tagStart,
|
80 | rethtml.length,
|
81 | currentTagName,
|
82 | currentHtml,
|
83 | isClosing(currentHtml)
|
84 | );
|
85 | lastPos = currentPos + 1;
|
86 | tagStart = false;
|
87 | continue;
|
88 | }
|
89 | if (c === '"' || c === "'") {
|
90 | var i = 1;
|
91 | var ic = html.charAt(currentPos - i);
|
92 |
|
93 | while (ic.trim() === "" || ic === "=") {
|
94 | if (ic === "=") {
|
95 | quoteStart = c;
|
96 | continue chariterator;
|
97 | }
|
98 | ic = html.charAt(currentPos - ++i);
|
99 | }
|
100 | }
|
101 | } else {
|
102 | if (c === quoteStart) {
|
103 | quoteStart = false;
|
104 | continue;
|
105 | }
|
106 | }
|
107 | }
|
108 | }
|
109 | if (lastPos < html.length) {
|
110 | rethtml += escapeHtml(html.substr(lastPos));
|
111 | }
|
112 |
|
113 | return rethtml;
|
114 | }
|
115 |
|
116 | var REGEXP_ILLEGAL_ATTR_NAME = /[^a-zA-Z0-9\\_:.-]/gim;
|
117 |
|
118 |
|
119 |
|
120 |
|
121 |
|
122 |
|
123 |
|
124 |
|
125 | function parseAttr(html, onAttr) {
|
126 | "use strict";
|
127 |
|
128 | var lastPos = 0;
|
129 | var lastMarkPos = 0;
|
130 | var retAttrs = [];
|
131 | var tmpName = false;
|
132 | var len = html.length;
|
133 |
|
134 | function addAttr(name, value) {
|
135 | name = _.trim(name);
|
136 | name = name.replace(REGEXP_ILLEGAL_ATTR_NAME, "").toLowerCase();
|
137 | if (name.length < 1) return;
|
138 | var ret = onAttr(name, value || "");
|
139 | if (ret) retAttrs.push(ret);
|
140 | }
|
141 |
|
142 |
|
143 | for (var i = 0; i < len; i++) {
|
144 | var c = html.charAt(i);
|
145 | var v, j;
|
146 | if (tmpName === false && c === "=") {
|
147 | tmpName = html.slice(lastPos, i);
|
148 | lastPos = i + 1;
|
149 | lastMarkPos = html.charAt(lastPos) === '"' || html.charAt(lastPos) === "'" ? lastPos : findNextQuotationMark(html, i + 1);
|
150 | continue;
|
151 | }
|
152 | if (tmpName !== false) {
|
153 | if (
|
154 | i === lastMarkPos
|
155 | ) {
|
156 | j = html.indexOf(c, i + 1);
|
157 | if (j === -1) {
|
158 | break;
|
159 | } else {
|
160 | v = _.trim(html.slice(lastMarkPos + 1, j));
|
161 | addAttr(tmpName, v);
|
162 | tmpName = false;
|
163 | i = j;
|
164 | lastPos = i + 1;
|
165 | continue;
|
166 | }
|
167 | }
|
168 | }
|
169 | if (/\s|\n|\t/.test(c)) {
|
170 | html = html.replace(/\s|\n|\t/g, " ");
|
171 | if (tmpName === false) {
|
172 | j = findNextEqual(html, i);
|
173 | if (j === -1) {
|
174 | v = _.trim(html.slice(lastPos, i));
|
175 | addAttr(v);
|
176 | tmpName = false;
|
177 | lastPos = i + 1;
|
178 | continue;
|
179 | } else {
|
180 | i = j - 1;
|
181 | continue;
|
182 | }
|
183 | } else {
|
184 | j = findBeforeEqual(html, i - 1);
|
185 | if (j === -1) {
|
186 | v = _.trim(html.slice(lastPos, i));
|
187 | v = stripQuoteWrap(v);
|
188 | addAttr(tmpName, v);
|
189 | tmpName = false;
|
190 | lastPos = i + 1;
|
191 | continue;
|
192 | } else {
|
193 | continue;
|
194 | }
|
195 | }
|
196 | }
|
197 | }
|
198 |
|
199 | if (lastPos < html.length) {
|
200 | if (tmpName === false) {
|
201 | addAttr(html.slice(lastPos));
|
202 | } else {
|
203 | addAttr(tmpName, stripQuoteWrap(_.trim(html.slice(lastPos))));
|
204 | }
|
205 | }
|
206 |
|
207 | return _.trim(retAttrs.join(" "));
|
208 | }
|
209 |
|
210 | function findNextEqual(str, i) {
|
211 | for (; i < str.length; i++) {
|
212 | var c = str[i];
|
213 | if (c === " ") continue;
|
214 | if (c === "=") return i;
|
215 | return -1;
|
216 | }
|
217 | }
|
218 |
|
219 | function findNextQuotationMark(str, i) {
|
220 | for (; i < str.length; i++) {
|
221 | var c = str[i];
|
222 | if (c === " ") continue;
|
223 | if (c === "'" || c === '"') return i;
|
224 | return -1;
|
225 | }
|
226 | }
|
227 |
|
228 | function findBeforeEqual(str, i) {
|
229 | for (; i > 0; i--) {
|
230 | var c = str[i];
|
231 | if (c === " ") continue;
|
232 | if (c === "=") return i;
|
233 | return -1;
|
234 | }
|
235 | }
|
236 |
|
237 | function isQuoteWrapString(text) {
|
238 | if (
|
239 | (text[0] === '"' && text[text.length - 1] === '"') ||
|
240 | (text[0] === "'" && text[text.length - 1] === "'")
|
241 | ) {
|
242 | return true;
|
243 | } else {
|
244 | return false;
|
245 | }
|
246 | }
|
247 |
|
248 | function stripQuoteWrap(text) {
|
249 | if (isQuoteWrapString(text)) {
|
250 | return text.substr(1, text.length - 2);
|
251 | } else {
|
252 | return text;
|
253 | }
|
254 | }
|
255 |
|
256 | exports.parseTag = parseTag;
|
257 | exports.parseAttr = parseAttr;
|