1 | "use strict";
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.Tokenizer = void 0;
|
4 | var TextRange_1 = require("./TextRange");
|
5 | var Token_1 = require("./Token");
|
6 | var Tokenizer = (function () {
|
7 | function Tokenizer() {
|
8 | }
|
9 | |
10 |
|
11 |
|
12 |
|
13 | Tokenizer.readTokens = function (lines) {
|
14 | Tokenizer._ensureInitialized();
|
15 | var tokens = [];
|
16 | var lastLine = undefined;
|
17 | for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
|
18 | var line = lines_1[_i];
|
19 | Tokenizer._pushTokensForLine(tokens, line);
|
20 | lastLine = line;
|
21 | }
|
22 | if (lastLine) {
|
23 | tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
|
24 | }
|
25 | else {
|
26 | tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, TextRange_1.TextRange.empty, TextRange_1.TextRange.empty));
|
27 | }
|
28 | return tokens;
|
29 | };
|
30 | |
31 |
|
32 |
|
33 |
|
34 | Tokenizer.isPunctuation = function (tokenKind) {
|
35 | Tokenizer._ensureInitialized();
|
36 | return Tokenizer._punctuationTokens[tokenKind] || false;
|
37 | };
|
38 | Tokenizer._pushTokensForLine = function (tokens, line) {
|
39 | var buffer = line.buffer;
|
40 | var end = line.end;
|
41 | var bufferIndex = line.pos;
|
42 | var tokenKind = undefined;
|
43 | var tokenPos = bufferIndex;
|
44 | while (bufferIndex < end) {
|
45 |
|
46 | var charCode = buffer.charCodeAt(bufferIndex);
|
47 | var characterKind = Tokenizer._charCodeMap[charCode];
|
48 | if (characterKind === undefined) {
|
49 | characterKind = Token_1.TokenKind.Other;
|
50 | }
|
51 |
|
52 |
|
53 |
|
54 |
|
55 | if (tokenKind !== undefined &&
|
56 | characterKind === tokenKind &&
|
57 | Tokenizer._isMultiCharacterToken(tokenKind)) {
|
58 |
|
59 | }
|
60 | else {
|
61 |
|
62 | if (tokenKind !== undefined) {
|
63 | tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
|
64 | }
|
65 | tokenPos = bufferIndex;
|
66 | tokenKind = characterKind;
|
67 | }
|
68 | ++bufferIndex;
|
69 | }
|
70 |
|
71 | if (tokenKind !== undefined) {
|
72 | tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
|
73 | }
|
74 | tokens.push(new Token_1.Token(Token_1.TokenKind.Newline, line.getNewRange(line.end, line.end), line));
|
75 | };
|
76 | |
77 |
|
78 |
|
79 | Tokenizer._isMultiCharacterToken = function (kind) {
|
80 | switch (kind) {
|
81 | case Token_1.TokenKind.Spacing:
|
82 | case Token_1.TokenKind.AsciiWord:
|
83 | case Token_1.TokenKind.Other:
|
84 | return true;
|
85 | }
|
86 | return false;
|
87 | };
|
88 | Tokenizer._ensureInitialized = function () {
|
89 | if (Tokenizer._charCodeMap) {
|
90 | return;
|
91 | }
|
92 | Tokenizer._charCodeMap = {};
|
93 | Tokenizer._punctuationTokens = {};
|
94 |
|
95 | var punctuation = Tokenizer._commonMarkPunctuationCharacters;
|
96 | for (var i = 0; i < punctuation.length; ++i) {
|
97 | var charCode = punctuation.charCodeAt(i);
|
98 | Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.OtherPunctuation;
|
99 | }
|
100 |
|
101 |
|
102 | var specialMap = {
|
103 | '\\': Token_1.TokenKind.Backslash,
|
104 | '<': Token_1.TokenKind.LessThan,
|
105 | '>': Token_1.TokenKind.GreaterThan,
|
106 | '=': Token_1.TokenKind.Equals,
|
107 | "'": Token_1.TokenKind.SingleQuote,
|
108 | '"': Token_1.TokenKind.DoubleQuote,
|
109 | '/': Token_1.TokenKind.Slash,
|
110 | '-': Token_1.TokenKind.Hyphen,
|
111 | '@': Token_1.TokenKind.AtSign,
|
112 | '{': Token_1.TokenKind.LeftCurlyBracket,
|
113 | '}': Token_1.TokenKind.RightCurlyBracket,
|
114 | '`': Token_1.TokenKind.Backtick,
|
115 | '.': Token_1.TokenKind.Period,
|
116 | ':': Token_1.TokenKind.Colon,
|
117 | ',': Token_1.TokenKind.Comma,
|
118 | '[': Token_1.TokenKind.LeftSquareBracket,
|
119 | ']': Token_1.TokenKind.RightSquareBracket,
|
120 | '|': Token_1.TokenKind.Pipe,
|
121 | '(': Token_1.TokenKind.LeftParenthesis,
|
122 | ')': Token_1.TokenKind.RightParenthesis,
|
123 | '#': Token_1.TokenKind.PoundSymbol,
|
124 | '+': Token_1.TokenKind.Plus,
|
125 | $: Token_1.TokenKind.DollarSign
|
126 | };
|
127 | for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
|
128 | var key = _a[_i];
|
129 | Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
|
130 | Tokenizer._punctuationTokens[specialMap[key]] = true;
|
131 | }
|
132 | Tokenizer._punctuationTokens[Token_1.TokenKind.OtherPunctuation] = true;
|
133 | var word = Tokenizer._wordCharacters;
|
134 | for (var i = 0; i < word.length; ++i) {
|
135 | var charCode = word.charCodeAt(i);
|
136 | Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.AsciiWord;
|
137 | }
|
138 | Tokenizer._charCodeMap[' '.charCodeAt(0)] = Token_1.TokenKind.Spacing;
|
139 | Tokenizer._charCodeMap['\t'.charCodeAt(0)] = Token_1.TokenKind.Spacing;
|
140 | };
|
141 | Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
|
142 | Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
|
143 | return Tokenizer;
|
144 | }());
|
145 | exports.Tokenizer = Tokenizer;
|
146 |
|
\ | No newline at end of file |