UNPKG

5.01 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3const moo_1 = require("moo");
4const immutable_1 = require("immutable");
5/**
6 * Ambitious Goal: "invalid syntax" errors from the lexer are unclear. Can we take errors out of the lexer to
7 * the point we can clearly enumerate all files that will _not_ be accepted by the lexer? This would also
8 * facilitate.
9 *
10 * All UTF-8 strings should lex, unless they:
11 * 1. Contain non-printable characters.
12 * 2. Contain the character ` outside of a string/char/comment.
13 * 3. Contain characters outside of the UTF-8 range.
14 */
15const basicLexing = {
16 identifier: {
17 match: /[A-Za-z_][A-Za-z0-9_]*/,
18 keywords: {
19 keyword: [
20 "int",
21 "bool",
22 "string",
23 "char",
24 "void",
25 "struct",
26 "typedef",
27 "if",
28 "else",
29 "while",
30 "for",
31 "continue",
32 "break",
33 "return",
34 "assert",
35 "error",
36 "true",
37 "false",
38 "NULL",
39 "alloc",
40 "alloc_array"
41 ]
42 }
43 },
44 numeric_literal: { match: /(?:0[xX][0-9a-zA-Z]+)|(?:[1-9][0-9]*)|0/ },
45 char_delimiter: { match: /'/, push: "charComponents" },
46 string_delimiter: { match: /\"/, push: "stringComponents" },
47 logical_and: "&&",
48 symbol: /[!$%&\(\)*+,\-.\/:;<=>?\[\\\]^{\|}~]/,
49 unexpected_unicode_character: { match: /[\x00-\u{10FFFF}]/, lineBreaks: true },
50 invalid_character: { match: /./, lineBreaks: true },
51 type_identifier: "<placeholder>",
52 space: "<placeholder>"
53};
54exports.coreLexer = moo_1.states({
55 main: Object.assign({
56 newline: { match: /\r\n|\r|\n/, lineBreaks: true },
57 whitespace: { match: /[ \t\v\f]+/ },
58 anno_start: { match: "/*@", push: "multiLineAnno" },
59 comment_start: { match: "/*", push: "multiLineComment" },
60 anno_line_start: { match: "//@", push: "lineAnno" },
61 comment_line_start: { match: "//", push: "lineComment" },
62 pragma: /#.*/
63 }, basicLexing),
64 multiLineAnno: Object.assign({
65 newline: { match: /\r\n|\r|\n/, lineBreaks: true },
66 whitespace: { match: /[ \t\v\f]+/ },
67 anno_end: { match: "@*/", pop: 1 },
68 comment_start: { match: "/*", push: "multiLineComment" },
69 comment_line_start: { match: "//", push: "lineComment" },
70 annospace: { match: "@" }
71 }, basicLexing),
72 lineAnno: Object.assign({
73 anno_end: { match: /\r\n|\r|\n/, pop: 1, lineBreaks: true },
74 whitespace: { match: /[ \t\v\f]+/ },
75 comment_start: { match: "/*", push: "multiLineComment" },
76 comment_line_start: { match: "//", next: "lineComment" },
77 annospace: { match: "@" }
78 }, basicLexing),
79 stringComponents: {
80 string_delimiter: { match: /"/, pop: 1 },
81 characters: { match: /[^\\\n\r"]+/, lineBreaks: false },
82 special_character: { match: /\\[^\n\r]/, lineBreaks: false },
83 invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true }
84 },
85 charComponents: {
86 char_delimiter: { match: /'/, pop: 1 },
87 special_character: { match: /\\./, lineBreaks: true },
88 character: { match: /./, lineBreaks: false },
89 invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true, pop: 1 }
90 },
91 multiLineComment: {
92 comment_start: { match: "/*", push: "multiLineComment" },
93 comment_end: { match: "*/", pop: 1 },
94 comment: { match: /\*|\/|[^*\/\r\n]+/, lineBreaks: false },
95 newline: { match: /\n|\r|\r\n/, lineBreaks: true }
96 },
97 lineComment: {
98 comment: { match: /[^\n\r]/, lineBreaks: false },
99 comment_line_end: { match: /\n|\r|\r\n/, lineBreaks: true, pop: 1 }
100 }
101}, "main");
102class TypeLexer {
103 constructor(typeIds) {
104 this.typeIds = immutable_1.Set();
105 this.typeIds = typeIds ? typeIds : immutable_1.Set();
106 }
107 addIdentifier(typeIdentifier) {
108 this.typeIds = this.typeIds.add(typeIdentifier);
109 }
110 next() {
111 const tok = exports.coreLexer.next();
112 if (!tok)
113 return undefined;
114 else if (tok["type"] === "identifier" && this.typeIds.has(tok.value)) {
115 tok["type"] = "type_identifier";
116 return tok;
117 }
118 else if (tok["type"] === "identifier") {
119 return tok;
120 }
121 else {
122 return tok;
123 }
124 }
125 save() {
126 return exports.coreLexer.save();
127 }
128 reset(chunk, state) {
129 exports.coreLexer.reset(chunk, state);
130 }
131 formatError(token, message) {
132 return exports.coreLexer.formatError(token, message);
133 }
134 has(tokenType) {
135 return exports.coreLexer.has(tokenType);
136 }
137}
138exports.TypeLexer = TypeLexer;
139exports.lexer = new TypeLexer();
140//# sourceMappingURL=lex.js.map
\No newline at end of file