UNPKG

@calculemus/jaco/lib/lex.js

Version:

5.01 kBJavaScriptView Raw

1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3const moo_1 = require("moo");
4const immutable_1 = require("immutable");
5/**
* Ambitious Goal: "invalid syntax" errors from the lexer are unclear. Can we take errors out of the lexer to
* the point we can clearly enumerate all files that will _not_ be accepted by the lexer? This would also
* facilitate.
*
* All UTF-8 strings should lex, unless they:
*  1. Contain non-printable characters.
*  2. Contain the character ` outside of a string/char/comment.
*  3. Contain characters outside of the UTF-8 range.
*/
15const basicLexing = {
  identifier: {
      match: /[A-Za-z_][A-Za-z0-9_]*/,
      keywords: {
          keyword: [
              "int",
              "bool",
              "string",
              "char",
              "void",
              "struct",
              "typedef",
              "if",
              "else",
              "while",
              "for",
              "continue",
              "break",
              "return",
              "assert",
              "error",
              "true",
              "false",
              "NULL",
              "alloc",
              "alloc_array"
          ]
      }
  },
  numeric_literal: { match: /(?:0[xX][0-9a-zA-Z]+)|(?:[1-9][0-9]*)|0/ },
  char_delimiter: { match: /'/, push: "charComponents" },
  string_delimiter: { match: /\"/, push: "stringComponents" },
  logical_and: "&&",
  symbol: /[!$%&\(\)*+,\-.\/:;<=>?\[\\\]^{\|}~]/,
  unexpected_unicode_character: { match: /[\x00-\u{10FFFF}]/, lineBreaks: true },
  invalid_character: { match: /./, lineBreaks: true },
  type_identifier: "<placeholder>",
  space: "<placeholder>"
53};
54exports.coreLexer = moo_1.states({
  main: Object.assign({
      newline: { match: /\r\n|\r|\n/, lineBreaks: true },
      whitespace: { match: /[ \t\v\f]+/ },
      anno_start: { match: "/*@", push: "multiLineAnno" },
      comment_start: { match: "/*", push: "multiLineComment" },
      anno_line_start: { match: "//@", push: "lineAnno" },
      comment_line_start: { match: "//", push: "lineComment" },
      pragma: /#.*/
  }, basicLexing),
  multiLineAnno: Object.assign({
      newline: { match: /\r\n|\r|\n/, lineBreaks: true },
      whitespace: { match: /[ \t\v\f]+/ },
      anno_end: { match: "@*/", pop: 1 },
      comment_start: { match: "/*", push: "multiLineComment" },
      comment_line_start: { match: "//", push: "lineComment" },
      annospace: { match: "@" }
  }, basicLexing),
  lineAnno: Object.assign({
      anno_end: { match: /\r\n|\r|\n/, pop: 1, lineBreaks: true },
      whitespace: { match: /[ \t\v\f]+/ },
      comment_start: { match: "/*", push: "multiLineComment" },
      comment_line_start: { match: "//", next: "lineComment" },
      annospace: { match: "@" }
  }, basicLexing),
  stringComponents: {
      string_delimiter: { match: /"/, pop: 1 },
      characters: { match: /[^\\\n\r"]+/, lineBreaks: false },
      special_character: { match: /\\[^\n\r]/, lineBreaks: false },
      invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true }
  },
  charComponents: {
      char_delimiter: { match: /'/, pop: 1 },
      special_character: { match: /\\./, lineBreaks: true },
      character: { match: /./, lineBreaks: false },
      invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true, pop: 1 }
  },
  multiLineComment: {
      comment_start: { match: "/*", push: "multiLineComment" },
      comment_end: { match: "*/", pop: 1 },
      comment: { match: /\*|\/|[^*\/\r\n]+/, lineBreaks: false },
      newline: { match: /\n|\r|\r\n/, lineBreaks: true }
  },
  lineComment: {
      comment: { match: /[^\n\r]/, lineBreaks: false },
      comment_line_end: { match: /\n|\r|\r\n/, lineBreaks: true, pop: 1 }
  }
101}, "main");
102class TypeLexer {
  constructor(typeIds) {
      this.typeIds = immutable_1.Set();
      this.typeIds = typeIds ? typeIds : immutable_1.Set();
  }
  addIdentifier(typeIdentifier) {
      this.typeIds = this.typeIds.add(typeIdentifier);
  }
  next() {
      const tok = exports.coreLexer.next();
      if (!tok)
          return undefined;
      else if (tok["type"] === "identifier" && this.typeIds.has(tok.value)) {
          tok["type"] = "type_identifier";
          return tok;
      }
      else if (tok["type"] === "identifier") {
          return tok;
      }
      else {
          return tok;
      }
  }
  save() {
      return exports.coreLexer.save();
  }
  reset(chunk, state) {
      exports.coreLexer.reset(chunk, state);
  }
  formatError(token, message) {
      return exports.coreLexer.formatError(token, message);
  }
  has(tokenType) {
      return exports.coreLexer.has(tokenType);
  }
137}
138exports.TypeLexer = TypeLexer;
139exports.lexer = new TypeLexer();
140//# sourceMappingURL=lex.js.map
\No newline at end of file

1	`"use strict";`
2	`Object.defineProperty(exports, "__esModule", { value: true });`
3	`const moo_1 = require("moo");`
4	`const immutable_1 = require("immutable");`
5	`/**`
6	`* Ambitious Goal: "invalid syntax" errors from the lexer are unclear. Can we take errors out of the lexer to`
7	`* the point we can clearly enumerate all files that will _not_ be accepted by the lexer? This would also`
8	`* facilitate.`
9	`*`
10	`* All UTF-8 strings should lex, unless they:`
11	`* 1. Contain non-printable characters.`
12	* 2. Contain the character ` outside of a string/char/comment.
13	`* 3. Contain characters outside of the UTF-8 range.`
14	`*/`
15	`const basicLexing = {`
16	`identifier: {`
17	`match: /[A-Za-z_][A-Za-z0-9_]*/,`
18	`keywords: {`
19	`keyword: [`
20	`"int",`
21	`"bool",`
22	`"string",`
23	`"char",`
24	`"void",`
25	`"struct",`
26	`"typedef",`
27	`"if",`
28	`"else",`
29	`"while",`
30	`"for",`
31	`"continue",`
32	`"break",`
33	`"return",`
34	`"assert",`
35	`"error",`
36	`"true",`
37	`"false",`
38	`"NULL",`
39	`"alloc",`
40	`"alloc_array"`
41	`]`
42	`}`
43	`},`
44	`numeric_literal: { match: /(?:0[xX][0-9a-zA-Z]+)\|(?:[1-9][0-9]*)\|0/ },`
45	`char_delimiter: { match: /'/, push: "charComponents" },`
46	`string_delimiter: { match: /\"/, push: "stringComponents" },`
47	`logical_and: "&&",`
48	`symbol: /[!$%&\(\)*+,\-.\/:;<=>?\[\\\]^{\\|}~]/,`
49	`unexpected_unicode_character: { match: /[\x00-\u{10FFFF}]/, lineBreaks: true },`
50	`invalid_character: { match: /./, lineBreaks: true },`
51	`type_identifier: "<placeholder>",`
52	`space: "<placeholder>"`
53	`};`
54	`exports.coreLexer = moo_1.states({`
55	`main: Object.assign({`
56	`newline: { match: /\r\n\|\r\|\n/, lineBreaks: true },`
57	`whitespace: { match: /[ \t\v\f]+/ },`
58	`anno_start: { match: "/*@", push: "multiLineAnno" },`
59	`comment_start: { match: "/*", push: "multiLineComment" },`
60	`anno_line_start: { match: "//@", push: "lineAnno" },`
61	`comment_line_start: { match: "//", push: "lineComment" },`
62	`pragma: /#.*/`
63	`}, basicLexing),`
64	`multiLineAnno: Object.assign({`
65	`newline: { match: /\r\n\|\r\|\n/, lineBreaks: true },`
66	`whitespace: { match: /[ \t\v\f]+/ },`
67	`anno_end: { match: "@*/", pop: 1 },`
68	`comment_start: { match: "/*", push: "multiLineComment" },`
69	`comment_line_start: { match: "//", push: "lineComment" },`
70	`annospace: { match: "@" }`
71	`}, basicLexing),`
72	`lineAnno: Object.assign({`
73	`anno_end: { match: /\r\n\|\r\|\n/, pop: 1, lineBreaks: true },`
74	`whitespace: { match: /[ \t\v\f]+/ },`
75	`comment_start: { match: "/*", push: "multiLineComment" },`
76	`comment_line_start: { match: "//", next: "lineComment" },`
77	`annospace: { match: "@" }`
78	`}, basicLexing),`
79	`stringComponents: {`
80	`string_delimiter: { match: /"/, pop: 1 },`
81	`characters: { match: /[^\\\n\r"]+/, lineBreaks: false },`
82	`special_character: { match: /\\[^\n\r]/, lineBreaks: false },`
83	`invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true }`
84	`},`
85	`charComponents: {`
86	`char_delimiter: { match: /'/, pop: 1 },`
87	`special_character: { match: /\\./, lineBreaks: true },`
88	`character: { match: /./, lineBreaks: false },`
89	`invalid_string_character: { match: /[\x00-xFF]/, lineBreaks: true, pop: 1 }`
90	`},`
91	`multiLineComment: {`
92	`comment_start: { match: "/*", push: "multiLineComment" },`
93	`comment_end: { match: "*/", pop: 1 },`
94	`comment: { match: /\\|\/\|[^\/\r\n]+/, lineBreaks: false },`
95	`newline: { match: /\n\|\r\|\r\n/, lineBreaks: true }`
96	`},`
97	`lineComment: {`
98	`comment: { match: /[^\n\r]/, lineBreaks: false },`
99	`comment_line_end: { match: /\n\|\r\|\r\n/, lineBreaks: true, pop: 1 }`
100	`}`
101	`}, "main");`
102	`class TypeLexer {`
103	`constructor(typeIds) {`
104	`this.typeIds = immutable_1.Set();`
105	`this.typeIds = typeIds ? typeIds : immutable_1.Set();`
106	`}`
107	`addIdentifier(typeIdentifier) {`
108	`this.typeIds = this.typeIds.add(typeIdentifier);`
109	`}`
110	`next() {`
111	`const tok = exports.coreLexer.next();`
112	`if (!tok)`
113	`return undefined;`
114	`else if (tok["type"] === "identifier" && this.typeIds.has(tok.value)) {`
115	`tok["type"] = "type_identifier";`
116	`return tok;`
117	`}`
118	`else if (tok["type"] === "identifier") {`
119	`return tok;`
120	`}`
121	`else {`
122	`return tok;`
123	`}`
124	`}`
125	`save() {`
126	`return exports.coreLexer.save();`
127	`}`
128	`reset(chunk, state) {`
129	`exports.coreLexer.reset(chunk, state);`
130	`}`
131	`formatError(token, message) {`
132	`return exports.coreLexer.formatError(token, message);`
133	`}`
134	`has(tokenType) {`
135	`return exports.coreLexer.has(tokenType);`
136	`}`
137	`}`
138	`exports.TypeLexer = TypeLexer;`
139	`exports.lexer = new TypeLexer();`
140	`//# sourceMappingURL=lex.js.map`
\	No newline at end of file