1 | import { TextRange } from './TextRange';
|
2 | import { Token, TokenKind } from './Token';
|
3 | var Tokenizer = (function () {
|
4 | function Tokenizer() {
|
5 | }
|
6 | |
7 |
|
8 |
|
9 |
|
10 | Tokenizer.readTokens = function (lines) {
|
11 | Tokenizer._ensureInitialized();
|
12 | var tokens = [];
|
13 | var lastLine = undefined;
|
14 | for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
|
15 | var line = lines_1[_i];
|
16 | Tokenizer._pushTokensForLine(tokens, line);
|
17 | lastLine = line;
|
18 | }
|
19 | if (lastLine) {
|
20 | tokens.push(new Token(TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
|
21 | }
|
22 | else {
|
23 | tokens.push(new Token(TokenKind.EndOfInput, TextRange.empty, TextRange.empty));
|
24 | }
|
25 | return tokens;
|
26 | };
|
27 | |
28 |
|
29 |
|
30 |
|
31 | Tokenizer.isPunctuation = function (tokenKind) {
|
32 | Tokenizer._ensureInitialized();
|
33 | return Tokenizer._punctuationTokens[tokenKind] || false;
|
34 | };
|
35 | Tokenizer._pushTokensForLine = function (tokens, line) {
|
36 | var buffer = line.buffer;
|
37 | var end = line.end;
|
38 | var bufferIndex = line.pos;
|
39 | var tokenKind = undefined;
|
40 | var tokenPos = bufferIndex;
|
41 | while (bufferIndex < end) {
|
42 |
|
43 | var charCode = buffer.charCodeAt(bufferIndex);
|
44 | var characterKind = Tokenizer._charCodeMap[charCode];
|
45 | if (characterKind === undefined) {
|
46 | characterKind = TokenKind.Other;
|
47 | }
|
48 |
|
49 |
|
50 |
|
51 |
|
52 | if (tokenKind !== undefined &&
|
53 | characterKind === tokenKind &&
|
54 | Tokenizer._isMultiCharacterToken(tokenKind)) {
|
55 |
|
56 | }
|
57 | else {
|
58 |
|
59 | if (tokenKind !== undefined) {
|
60 | tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
|
61 | }
|
62 | tokenPos = bufferIndex;
|
63 | tokenKind = characterKind;
|
64 | }
|
65 | ++bufferIndex;
|
66 | }
|
67 |
|
68 | if (tokenKind !== undefined) {
|
69 | tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
|
70 | }
|
71 | tokens.push(new Token(TokenKind.Newline, line.getNewRange(line.end, line.end), line));
|
72 | };
|
73 | |
74 |
|
75 |
|
76 | Tokenizer._isMultiCharacterToken = function (kind) {
|
77 | switch (kind) {
|
78 | case TokenKind.Spacing:
|
79 | case TokenKind.AsciiWord:
|
80 | case TokenKind.Other:
|
81 | return true;
|
82 | }
|
83 | return false;
|
84 | };
|
85 | Tokenizer._ensureInitialized = function () {
|
86 | if (Tokenizer._charCodeMap) {
|
87 | return;
|
88 | }
|
89 | Tokenizer._charCodeMap = {};
|
90 | Tokenizer._punctuationTokens = {};
|
91 |
|
92 | var punctuation = Tokenizer._commonMarkPunctuationCharacters;
|
93 | for (var i = 0; i < punctuation.length; ++i) {
|
94 | var charCode = punctuation.charCodeAt(i);
|
95 | Tokenizer._charCodeMap[charCode] = TokenKind.OtherPunctuation;
|
96 | }
|
97 |
|
98 |
|
99 | var specialMap = {
|
100 | '\\': TokenKind.Backslash,
|
101 | '<': TokenKind.LessThan,
|
102 | '>': TokenKind.GreaterThan,
|
103 | '=': TokenKind.Equals,
|
104 | "'": TokenKind.SingleQuote,
|
105 | '"': TokenKind.DoubleQuote,
|
106 | '/': TokenKind.Slash,
|
107 | '-': TokenKind.Hyphen,
|
108 | '@': TokenKind.AtSign,
|
109 | '{': TokenKind.LeftCurlyBracket,
|
110 | '}': TokenKind.RightCurlyBracket,
|
111 | '`': TokenKind.Backtick,
|
112 | '.': TokenKind.Period,
|
113 | ':': TokenKind.Colon,
|
114 | ',': TokenKind.Comma,
|
115 | '[': TokenKind.LeftSquareBracket,
|
116 | ']': TokenKind.RightSquareBracket,
|
117 | '|': TokenKind.Pipe,
|
118 | '(': TokenKind.LeftParenthesis,
|
119 | ')': TokenKind.RightParenthesis,
|
120 | '#': TokenKind.PoundSymbol,
|
121 | '+': TokenKind.Plus,
|
122 | $: TokenKind.DollarSign
|
123 | };
|
124 | for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
|
125 | var key = _a[_i];
|
126 | Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
|
127 | Tokenizer._punctuationTokens[specialMap[key]] = true;
|
128 | }
|
129 | Tokenizer._punctuationTokens[TokenKind.OtherPunctuation] = true;
|
130 | var word = Tokenizer._wordCharacters;
|
131 | for (var i = 0; i < word.length; ++i) {
|
132 | var charCode = word.charCodeAt(i);
|
133 | Tokenizer._charCodeMap[charCode] = TokenKind.AsciiWord;
|
134 | }
|
135 | Tokenizer._charCodeMap[' '.charCodeAt(0)] = TokenKind.Spacing;
|
136 | Tokenizer._charCodeMap['\t'.charCodeAt(0)] = TokenKind.Spacing;
|
137 | };
|
138 | Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
|
139 | Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
|
140 | return Tokenizer;
|
141 | }());
|
142 | export { Tokenizer };
|
143 |
|
\ | No newline at end of file |