UNPKG

5.8 kBJavaScriptView Raw
1import { TextRange } from './TextRange';
2import { Token, TokenKind } from './Token';
3var Tokenizer = /** @class */ (function () {
4 function Tokenizer() {
5 }
6 /**
7 * Given a list of input lines, this returns an array of extracted tokens.
8 * The last token will always be TokenKind.EndOfInput.
9 */
10 Tokenizer.readTokens = function (lines) {
11 Tokenizer._ensureInitialized();
12 var tokens = [];
13 var lastLine = undefined;
14 for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
15 var line = lines_1[_i];
16 Tokenizer._pushTokensForLine(tokens, line);
17 lastLine = line;
18 }
19 if (lastLine) {
20 tokens.push(new Token(TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
21 }
22 else {
23 tokens.push(new Token(TokenKind.EndOfInput, TextRange.empty, TextRange.empty));
24 }
25 return tokens;
26 };
27 /**
28 * Returns true if the token is a CommonMark punctuation character.
29 * These are basically all the ASCII punctuation characters.
30 */
31 Tokenizer.isPunctuation = function (tokenKind) {
32 Tokenizer._ensureInitialized();
33 return Tokenizer._punctuationTokens[tokenKind] || false;
34 };
35 Tokenizer._pushTokensForLine = function (tokens, line) {
36 var buffer = line.buffer;
37 var end = line.end;
38 var bufferIndex = line.pos;
39 var tokenKind = undefined;
40 var tokenPos = bufferIndex;
41 while (bufferIndex < end) {
42 // Read a character and determine its kind
43 var charCode = buffer.charCodeAt(bufferIndex);
44 var characterKind = Tokenizer._charCodeMap[charCode];
45 if (characterKind === undefined) {
46 characterKind = TokenKind.Other;
47 }
48 // Can we append to an existing token? Yes if:
49 // 1. There is an existing token, AND
50 // 2. It is the same kind of token, AND
51 // 3. It's not punctuation (which is always one character)
52 if (tokenKind !== undefined &&
53 characterKind === tokenKind &&
54 Tokenizer._isMultiCharacterToken(tokenKind)) {
55 // yes, append
56 }
57 else {
58 // Is there a previous completed token to push?
59 if (tokenKind !== undefined) {
60 tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
61 }
62 tokenPos = bufferIndex;
63 tokenKind = characterKind;
64 }
65 ++bufferIndex;
66 }
67 // Is there a previous completed token to push?
68 if (tokenKind !== undefined) {
69 tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
70 }
71 tokens.push(new Token(TokenKind.Newline, line.getNewRange(line.end, line.end), line));
72 };
73 /**
74 * Returns true if the token can be comprised of multiple characters
75 */
76 Tokenizer._isMultiCharacterToken = function (kind) {
77 switch (kind) {
78 case TokenKind.Spacing:
79 case TokenKind.AsciiWord:
80 case TokenKind.Other:
81 return true;
82 }
83 return false;
84 };
85 Tokenizer._ensureInitialized = function () {
86 if (Tokenizer._charCodeMap) {
87 return;
88 }
89 Tokenizer._charCodeMap = {};
90 Tokenizer._punctuationTokens = {};
91 // All Markdown punctuation characters
92 var punctuation = Tokenizer._commonMarkPunctuationCharacters;
93 for (var i = 0; i < punctuation.length; ++i) {
94 var charCode = punctuation.charCodeAt(i);
95 Tokenizer._charCodeMap[charCode] = TokenKind.OtherPunctuation;
96 }
97 // Special symbols
98 // !"#$%&\'()*+,\-.\/:;<=>?@[\\]^_`{|}~
99 var specialMap = {
100 '\\': TokenKind.Backslash,
101 '<': TokenKind.LessThan,
102 '>': TokenKind.GreaterThan,
103 '=': TokenKind.Equals,
104 "'": TokenKind.SingleQuote,
105 '"': TokenKind.DoubleQuote,
106 '/': TokenKind.Slash,
107 '-': TokenKind.Hyphen,
108 '@': TokenKind.AtSign,
109 '{': TokenKind.LeftCurlyBracket,
110 '}': TokenKind.RightCurlyBracket,
111 '`': TokenKind.Backtick,
112 '.': TokenKind.Period,
113 ':': TokenKind.Colon,
114 ',': TokenKind.Comma,
115 '[': TokenKind.LeftSquareBracket,
116 ']': TokenKind.RightSquareBracket,
117 '|': TokenKind.Pipe,
118 '(': TokenKind.LeftParenthesis,
119 ')': TokenKind.RightParenthesis,
120 '#': TokenKind.PoundSymbol,
121 '+': TokenKind.Plus,
122 $: TokenKind.DollarSign
123 };
124 for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
125 var key = _a[_i];
126 Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
127 Tokenizer._punctuationTokens[specialMap[key]] = true;
128 }
129 Tokenizer._punctuationTokens[TokenKind.OtherPunctuation] = true;
130 var word = Tokenizer._wordCharacters;
131 for (var i = 0; i < word.length; ++i) {
132 var charCode = word.charCodeAt(i);
133 Tokenizer._charCodeMap[charCode] = TokenKind.AsciiWord;
134 }
135 Tokenizer._charCodeMap[' '.charCodeAt(0)] = TokenKind.Spacing;
136 Tokenizer._charCodeMap['\t'.charCodeAt(0)] = TokenKind.Spacing;
137 };
138 Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
139 Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
140 return Tokenizer;
141}());
142export { Tokenizer };
143//# sourceMappingURL=Tokenizer.js.map
\No newline at end of file