UNPKG

19.1 kBJavaScriptView Raw
1const commentRegEx = /(\/\*(?:[^\*]|\*[^\/])*\*\/)/gmy;
2// eslint-disable-next-line no-control-regex
3const nameRegEx = /-?(?:(?:[a-zA-Z_]|[^\x00-\x7F]|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))(?:[a-zA-Z_0-9\-]*|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)/gmy;
4const numberRegEx = /[\+\-]?(?:\d+\.\d+|\d+|\.\d+)(?:[eE][\+\-]?\d+)?/gmy;
5const doubleQuoteStringRegEx = /"((?:[^\n\r\f\"]|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)(:?"|$)/gmy; // Besides $n, parse escape
6const whitespaceRegEx = /[\s\t\n\r\f]*/gmy;
7const singleQuoteStringRegEx = /'((?:[^\n\r\f\']|\\(?:\$|\n|[0-9a-fA-F]{1,6}\s?))*)(:?'|$)/gmy; // Besides $n, parse escape
8/**
9 * CSS parser following relatively close:
10 * CSS Syntax Module Level 3
11 * https://www.w3.org/TR/css-syntax-3/
12 */
13export class CSS3Parser {
14 constructor(text) {
15 this.text = text;
16 this.nextInputCodePointIndex = 0;
17 }
18 /**
19 * For testing purposes.
20 * This method allows us to run and assert the proper working of the tokenizer.
21 */
22 tokenize() {
23 const tokens = [];
24 let inputToken;
25 do {
26 inputToken = this.consumeAToken();
27 tokens.push(inputToken);
28 } while (inputToken);
29 return tokens;
30 }
31 /**
32 * 4.3.1. Consume a token
33 * https://www.w3.org/TR/css-syntax-3/#consume-a-token
34 */
35 consumeAToken() {
36 if (this.reconsumedInputToken) {
37 const result = this.reconsumedInputToken;
38 this.reconsumedInputToken = null;
39 return result;
40 }
41 const char = this.text[this.nextInputCodePointIndex];
42 switch (char) {
43 case '"':
44 return this.consumeAStringToken();
45 case "'":
46 return this.consumeAStringToken();
47 case '(':
48 case ')':
49 case ',':
50 case ':':
51 case ';':
52 case '[':
53 case ']':
54 case '{':
55 case '}':
56 this.nextInputCodePointIndex++;
57 return char;
58 case '#':
59 return this.consumeAHashToken() || this.consumeADelimToken();
60 case ' ':
61 case '\t':
62 case '\n':
63 case '\r':
64 case '\f':
65 return this.consumeAWhitespace();
66 case '@':
67 return this.consumeAtKeyword() || this.consumeADelimToken();
68 // TODO: Only if this is valid escape, otherwise it is a parse error
69 case '\\':
70 return this.consumeAnIdentLikeToken() || this.consumeADelimToken();
71 case '0':
72 case '1':
73 case '2':
74 case '3':
75 case '4':
76 case '5':
77 case '6':
78 case '7':
79 case '8':
80 case '9':
81 return this.consumeANumericToken();
82 case 'u':
83 case 'U':
84 if (this.text[this.nextInputCodePointIndex + 1] === '+') {
85 const thirdChar = this.text[this.nextInputCodePointIndex + 2];
86 if ((thirdChar >= '0' && thirdChar <= '9') || thirdChar === '?') {
87 // TODO: Handle unicode stuff such as U+002B
88 throw new Error('Unicode tokens not supported!');
89 }
90 }
91 return this.consumeAnIdentLikeToken() || this.consumeADelimToken();
92 case '$':
93 case '*':
94 case '^':
95 case '|':
96 case '~':
97 return this.consumeAMatchToken() || this.consumeADelimToken();
98 case '-':
99 return this.consumeANumericToken() || this.consumeAnIdentLikeToken() || this.consumeCDC() || this.consumeADelimToken();
100 case '+':
101 case '.':
102 return this.consumeANumericToken() || this.consumeADelimToken();
103 case '/':
104 return this.consumeAComment() || this.consumeADelimToken();
105 case '<':
106 return this.consumeCDO() || this.consumeADelimToken();
107 case undefined:
108 return undefined;
109 default:
110 return this.consumeAnIdentLikeToken() || this.consumeADelimToken();
111 }
112 }
113 consumeADelimToken() {
114 return {
115 type: 2 /* TokenObjectType.delim */,
116 text: this.text[this.nextInputCodePointIndex++],
117 };
118 }
119 consumeAWhitespace() {
120 whitespaceRegEx.lastIndex = this.nextInputCodePointIndex;
121 whitespaceRegEx.exec(this.text);
122 this.nextInputCodePointIndex = whitespaceRegEx.lastIndex;
123 return ' ';
124 }
125 consumeAHashToken() {
126 this.nextInputCodePointIndex++;
127 const hashName = this.consumeAName();
128 if (hashName) {
129 return { type: 12 /* TokenObjectType.hash */, text: '#' + hashName.text };
130 }
131 this.nextInputCodePointIndex--;
132 return null;
133 }
134 consumeCDO() {
135 if (this.text.substr(this.nextInputCodePointIndex, 4) === '<!--') {
136 this.nextInputCodePointIndex += 4;
137 return '<!--';
138 }
139 return null;
140 }
141 consumeCDC() {
142 if (this.text.substr(this.nextInputCodePointIndex, 3) === '-->') {
143 this.nextInputCodePointIndex += 3;
144 return '-->';
145 }
146 return null;
147 }
148 consumeAMatchToken() {
149 if (this.text[this.nextInputCodePointIndex + 1] === '=') {
150 const token = this.text.substr(this.nextInputCodePointIndex, 2);
151 this.nextInputCodePointIndex += 2;
152 return token;
153 }
154 return null;
155 }
156 /**
157 * 4.3.2. Consume a numeric token
158 * https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token
159 */
160 consumeANumericToken() {
161 numberRegEx.lastIndex = this.nextInputCodePointIndex;
162 const result = numberRegEx.exec(this.text);
163 if (!result) {
164 return null;
165 }
166 this.nextInputCodePointIndex = numberRegEx.lastIndex;
167 if (this.text[this.nextInputCodePointIndex] === '%') {
168 return { type: 4 /* TokenObjectType.percentage */, text: result[0] }; // TODO: Push the actual number and unit here...
169 }
170 const name = this.consumeAName();
171 if (name) {
172 return {
173 type: 5 /* TokenObjectType.dimension */,
174 text: result[0] + name.text,
175 };
176 }
177 return { type: 3 /* TokenObjectType.number */, text: result[0] };
178 }
179 /**
180 * 4.3.3. Consume an ident-like token
181 * https://www.w3.org/TR/css-syntax-3/#consume-an-ident-like-token
182 */
183 consumeAnIdentLikeToken() {
184 const name = this.consumeAName();
185 if (!name) {
186 return null;
187 }
188 if (this.text[this.nextInputCodePointIndex] === '(') {
189 this.nextInputCodePointIndex++;
190 if (name.text.toLowerCase() === 'url') {
191 return this.consumeAURLToken();
192 }
193 return {
194 type: 8 /* TokenObjectType.functionToken */,
195 name: name.text,
196 text: name.text + '(',
197 };
198 }
199 return name;
200 }
201 /**
202 * 4.3.4. Consume a string token
203 * https://www.w3.org/TR/css-syntax-3/#consume-a-string-token
204 */
205 consumeAStringToken() {
206 const char = this.text[this.nextInputCodePointIndex];
207 let result;
208 if (char === "'") {
209 singleQuoteStringRegEx.lastIndex = this.nextInputCodePointIndex;
210 result = singleQuoteStringRegEx.exec(this.text);
211 if (!result) {
212 return null;
213 }
214 this.nextInputCodePointIndex = singleQuoteStringRegEx.lastIndex;
215 }
216 else if (char === '"') {
217 doubleQuoteStringRegEx.lastIndex = this.nextInputCodePointIndex;
218 result = doubleQuoteStringRegEx.exec(this.text);
219 if (!result) {
220 return null;
221 }
222 this.nextInputCodePointIndex = doubleQuoteStringRegEx.lastIndex;
223 }
224 // TODO: Handle bad-string.
225 // TODO: Perform string escaping.
226 return { type: 1 /* TokenObjectType.string */, text: result[0] };
227 }
228 /**
229 * 4.3.5. Consume a url token
230 * https://www.w3.org/TR/css-syntax-3/#consume-a-url-token
231 */
232 consumeAURLToken() {
233 const start = this.nextInputCodePointIndex - 3 /* url */ - 1; /* ( */
234 const urlToken = {
235 type: 7 /* TokenObjectType.url */,
236 text: undefined,
237 };
238 this.consumeAWhitespace();
239 if (this.nextInputCodePointIndex >= this.text.length) {
240 return urlToken;
241 }
242 const nextInputCodePoint = this.text[this.nextInputCodePointIndex];
243 if (nextInputCodePoint === '"' || nextInputCodePoint === "'") {
244 const stringToken = this.consumeAStringToken();
245 // TODO: Handle bad-string.
246 // TODO: Set value instead.
247 urlToken.text = stringToken.text;
248 this.consumeAWhitespace();
249 if (this.text[this.nextInputCodePointIndex] === ')' || this.nextInputCodePointIndex >= this.text.length) {
250 this.nextInputCodePointIndex++;
251 const end = this.nextInputCodePointIndex;
252 urlToken.text = this.text.substring(start, end);
253 return urlToken;
254 }
255 else {
256 // TODO: Handle bad-url.
257 return null;
258 }
259 }
260 while (this.nextInputCodePointIndex < this.text.length) {
261 const char = this.text[this.nextInputCodePointIndex++];
262 switch (char) {
263 case ')':
264 return urlToken;
265 case ' ':
266 case '\t':
267 case '\n':
268 case '\r':
269 case '\f':
270 this.consumeAWhitespace();
271 if (this.text[this.nextInputCodePointIndex] === ')') {
272 this.nextInputCodePointIndex++;
273 return urlToken;
274 }
275 else {
276 // TODO: Bar url! Consume remnants.
277 return null;
278 }
279 case '"':
280 case "'":
281 // TODO: Parse error! Bar url! Consume remnants.
282 return null;
283 case '\\':
284 // TODO: Escape!
285 throw new Error('Escaping not yet supported!');
286 default:
287 // TODO: Non-printable chars - error.
288 urlToken.text += char;
289 }
290 }
291 return urlToken;
292 }
293 /**
294 * 4.3.11. Consume a name
295 * https://www.w3.org/TR/css-syntax-3/#consume-a-name
296 */
297 consumeAName() {
298 nameRegEx.lastIndex = this.nextInputCodePointIndex;
299 const result = nameRegEx.exec(this.text);
300 if (!result) {
301 return null;
302 }
303 this.nextInputCodePointIndex = nameRegEx.lastIndex;
304 // TODO: Perform string escaping.
305 return { type: 6 /* TokenObjectType.ident */, text: result[0] };
306 }
307 consumeAtKeyword() {
308 this.nextInputCodePointIndex++;
309 const name = this.consumeAName();
310 if (name) {
311 return { type: 11 /* TokenObjectType.atKeyword */, text: name.text };
312 }
313 this.nextInputCodePointIndex--;
314 return null;
315 }
316 consumeAComment() {
317 if (this.text[this.nextInputCodePointIndex + 1] === '*') {
318 commentRegEx.lastIndex = this.nextInputCodePointIndex;
319 const result = commentRegEx.exec(this.text);
320 if (!result) {
321 return null; // TODO: Handle <bad-comment>
322 }
323 this.nextInputCodePointIndex = commentRegEx.lastIndex;
324 // The CSS spec tokenizer does not emmit comment tokens
325 return this.consumeAToken();
326 }
327 return null;
328 }
329 reconsumeTheCurrentInputToken(currentInputToken) {
330 this.reconsumedInputToken = currentInputToken;
331 }
332 /**
333 * 5.3.1. Parse a stylesheet
334 * https://www.w3.org/TR/css-syntax-3/#parse-a-stylesheet
335 */
336 parseAStylesheet() {
337 this.topLevelFlag = true;
338 return {
339 rules: this.consumeAListOfRules(),
340 };
341 }
342 /**
343 * 5.4.1. Consume a list of rules
344 * https://www.w3.org/TR/css-syntax-3/#consume-a-list-of-rules
345 */
346 consumeAListOfRules() {
347 const rules = [];
348 let inputToken;
349 while ((inputToken = this.consumeAToken())) {
350 switch (inputToken) {
351 case ' ':
352 continue;
353 case '<!--':
354 case '-->': {
355 if (this.topLevelFlag) {
356 continue;
357 }
358 this.reconsumeTheCurrentInputToken(inputToken);
359 const atRule = this.consumeAnAtRule();
360 if (atRule) {
361 rules.push(atRule);
362 }
363 continue;
364 }
365 }
366 if (inputToken.type === 11 /* TokenObjectType.atKeyword */) {
367 this.reconsumeTheCurrentInputToken(inputToken);
368 const atRule = this.consumeAnAtRule();
369 if (atRule) {
370 rules.push(atRule);
371 }
372 continue;
373 }
374 this.reconsumeTheCurrentInputToken(inputToken);
375 const qualifiedRule = this.consumeAQualifiedRule();
376 if (qualifiedRule) {
377 rules.push(qualifiedRule);
378 }
379 }
380 return rules;
381 }
382 /**
383 * 5.4.2. Consume an at-rule
384 * https://www.w3.org/TR/css-syntax-3/#consume-an-at-rule
385 */
386 consumeAnAtRule() {
387 let inputToken = this.consumeAToken();
388 const atRule = {
389 type: 'at-rule',
390 name: inputToken.text,
391 prelude: [],
392 block: undefined,
393 };
394 while ((inputToken = this.consumeAToken())) {
395 if (inputToken === ';') {
396 return atRule;
397 }
398 else if (inputToken === '{') {
399 atRule.block = this.consumeASimpleBlock(inputToken);
400 return atRule;
401 }
402 else if (inputToken.type === 9 /* TokenObjectType.simpleBlock */ && inputToken.associatedToken === '{') {
403 atRule.block = inputToken;
404 return atRule;
405 }
406 this.reconsumeTheCurrentInputToken(inputToken);
407 const component = this.consumeAComponentValue();
408 if (component) {
409 atRule.prelude.push(component);
410 }
411 }
412 return atRule;
413 }
414 /**
415 * 5.4.3. Consume a qualified rule
416 * https://www.w3.org/TR/css-syntax-3/#consume-a-qualified-rule
417 */
418 consumeAQualifiedRule() {
419 const qualifiedRule = {
420 type: 'qualified-rule',
421 prelude: [],
422 block: undefined,
423 };
424 let inputToken;
425 while ((inputToken = this.consumeAToken())) {
426 if (inputToken === '{') {
427 qualifiedRule.block = this.consumeASimpleBlock(inputToken);
428 return qualifiedRule;
429 }
430 else if (inputToken.type === 9 /* TokenObjectType.simpleBlock */) {
431 const simpleBlock = inputToken;
432 if (simpleBlock.associatedToken === '{') {
433 qualifiedRule.block = simpleBlock;
434 return qualifiedRule;
435 }
436 }
437 this.reconsumeTheCurrentInputToken(inputToken);
438 const componentValue = this.consumeAComponentValue();
439 if (componentValue) {
440 qualifiedRule.prelude.push(componentValue);
441 }
442 }
443 // TODO: This is a parse error, log parse errors!
444 return null;
445 }
446 /**
447 * 5.4.6. Consume a component value
448 * https://www.w3.org/TR/css-syntax-3/#consume-a-component-value
449 */
450 consumeAComponentValue() {
451 // const inputToken = this.consumeAToken();
452 const inputToken = this.consumeAToken();
453 switch (inputToken) {
454 case '{':
455 case '[':
456 case '(':
457 this.nextInputCodePointIndex++;
458 return this.consumeASimpleBlock(inputToken);
459 }
460 if (typeof inputToken === 'object' && inputToken.type === 8 /* TokenObjectType.functionToken */) {
461 return this.consumeAFunction(inputToken.name);
462 }
463 return inputToken;
464 }
465 /**
466 * 5.4.7. Consume a simple block
467 * https://www.w3.org/TR/css-syntax-3/#consume-a-simple-block
468 */
469 consumeASimpleBlock(associatedToken) {
470 const endianToken = {
471 '[': ']',
472 '{': '}',
473 '(': ')',
474 }[associatedToken];
475 const start = this.nextInputCodePointIndex - 1;
476 const block = {
477 type: 9 /* TokenObjectType.simpleBlock */,
478 text: undefined,
479 associatedToken,
480 values: [],
481 };
482 let nextInputToken;
483 while ((nextInputToken = this.text[this.nextInputCodePointIndex])) {
484 if (nextInputToken === endianToken) {
485 this.nextInputCodePointIndex++;
486 const end = this.nextInputCodePointIndex;
487 block.text = this.text.substring(start, end);
488 return block;
489 }
490 const value = this.consumeAComponentValue();
491 if (value) {
492 block.values.push(value);
493 }
494 }
495 block.text = this.text.substring(start);
496 return block;
497 }
498 /**
499 * 5.4.8. Consume a function
500 * https://www.w3.org/TR/css-syntax-3/#consume-a-function
501 */
502 consumeAFunction(name) {
503 const start = this.nextInputCodePointIndex;
504 const funcToken = {
505 type: 14 /* TokenObjectType.function */,
506 name,
507 text: undefined,
508 components: [],
509 };
510 do {
511 if (this.nextInputCodePointIndex >= this.text.length) {
512 funcToken.text = name + '(' + this.text.substring(start);
513 return funcToken;
514 }
515 const nextInputToken = this.text[this.nextInputCodePointIndex];
516 switch (nextInputToken) {
517 case ')': {
518 this.nextInputCodePointIndex++;
519 const end = this.nextInputCodePointIndex;
520 funcToken.text = name + '(' + this.text.substring(start, end);
521 return funcToken;
522 }
523 default: {
524 const component = this.consumeAComponentValue();
525 if (component) {
526 funcToken.components.push(component);
527 }
528 }
529 // TODO: Else we won't advance
530 }
531 } while (true);
532 }
533}
534//# sourceMappingURL=CSS3Parser.js.map
\No newline at end of file