UNPKG

6.25 kBJavaScriptView Raw
1"use strict";
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.Tokenizer = void 0;
4var TextRange_1 = require("./TextRange");
5var Token_1 = require("./Token");
6var Tokenizer = /** @class */ (function () {
7 function Tokenizer() {
8 }
9 /**
10 * Given a list of input lines, this returns an array of extracted tokens.
11 * The last token will always be TokenKind.EndOfInput.
12 */
13 Tokenizer.readTokens = function (lines) {
14 Tokenizer._ensureInitialized();
15 var tokens = [];
16 var lastLine = undefined;
17 for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
18 var line = lines_1[_i];
19 Tokenizer._pushTokensForLine(tokens, line);
20 lastLine = line;
21 }
22 if (lastLine) {
23 tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
24 }
25 else {
26 tokens.push(new Token_1.Token(Token_1.TokenKind.EndOfInput, TextRange_1.TextRange.empty, TextRange_1.TextRange.empty));
27 }
28 return tokens;
29 };
30 /**
31 * Returns true if the token is a CommonMark punctuation character.
32 * These are basically all the ASCII punctuation characters.
33 */
34 Tokenizer.isPunctuation = function (tokenKind) {
35 Tokenizer._ensureInitialized();
36 return Tokenizer._punctuationTokens[tokenKind] || false;
37 };
38 Tokenizer._pushTokensForLine = function (tokens, line) {
39 var buffer = line.buffer;
40 var end = line.end;
41 var bufferIndex = line.pos;
42 var tokenKind = undefined;
43 var tokenPos = bufferIndex;
44 while (bufferIndex < end) {
45 // Read a character and determine its kind
46 var charCode = buffer.charCodeAt(bufferIndex);
47 var characterKind = Tokenizer._charCodeMap[charCode];
48 if (characterKind === undefined) {
49 characterKind = Token_1.TokenKind.Other;
50 }
51 // Can we append to an existing token? Yes if:
52 // 1. There is an existing token, AND
53 // 2. It is the same kind of token, AND
54 // 3. It's not punctuation (which is always one character)
55 if (tokenKind !== undefined &&
56 characterKind === tokenKind &&
57 Tokenizer._isMultiCharacterToken(tokenKind)) {
58 // yes, append
59 }
60 else {
61 // Is there a previous completed token to push?
62 if (tokenKind !== undefined) {
63 tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
64 }
65 tokenPos = bufferIndex;
66 tokenKind = characterKind;
67 }
68 ++bufferIndex;
69 }
70 // Is there a previous completed token to push?
71 if (tokenKind !== undefined) {
72 tokens.push(new Token_1.Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
73 }
74 tokens.push(new Token_1.Token(Token_1.TokenKind.Newline, line.getNewRange(line.end, line.end), line));
75 };
76 /**
77 * Returns true if the token can be comprised of multiple characters
78 */
79 Tokenizer._isMultiCharacterToken = function (kind) {
80 switch (kind) {
81 case Token_1.TokenKind.Spacing:
82 case Token_1.TokenKind.AsciiWord:
83 case Token_1.TokenKind.Other:
84 return true;
85 }
86 return false;
87 };
88 Tokenizer._ensureInitialized = function () {
89 if (Tokenizer._charCodeMap) {
90 return;
91 }
92 Tokenizer._charCodeMap = {};
93 Tokenizer._punctuationTokens = {};
94 // All Markdown punctuation characters
95 var punctuation = Tokenizer._commonMarkPunctuationCharacters;
96 for (var i = 0; i < punctuation.length; ++i) {
97 var charCode = punctuation.charCodeAt(i);
98 Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.OtherPunctuation;
99 }
100 // Special symbols
101 // !"#$%&\'()*+,\-.\/:;<=>?@[\\]^_`{|}~
102 var specialMap = {
103 '\\': Token_1.TokenKind.Backslash,
104 '<': Token_1.TokenKind.LessThan,
105 '>': Token_1.TokenKind.GreaterThan,
106 '=': Token_1.TokenKind.Equals,
107 "'": Token_1.TokenKind.SingleQuote,
108 '"': Token_1.TokenKind.DoubleQuote,
109 '/': Token_1.TokenKind.Slash,
110 '-': Token_1.TokenKind.Hyphen,
111 '@': Token_1.TokenKind.AtSign,
112 '{': Token_1.TokenKind.LeftCurlyBracket,
113 '}': Token_1.TokenKind.RightCurlyBracket,
114 '`': Token_1.TokenKind.Backtick,
115 '.': Token_1.TokenKind.Period,
116 ':': Token_1.TokenKind.Colon,
117 ',': Token_1.TokenKind.Comma,
118 '[': Token_1.TokenKind.LeftSquareBracket,
119 ']': Token_1.TokenKind.RightSquareBracket,
120 '|': Token_1.TokenKind.Pipe,
121 '(': Token_1.TokenKind.LeftParenthesis,
122 ')': Token_1.TokenKind.RightParenthesis,
123 '#': Token_1.TokenKind.PoundSymbol,
124 '+': Token_1.TokenKind.Plus,
125 $: Token_1.TokenKind.DollarSign
126 };
127 for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
128 var key = _a[_i];
129 Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
130 Tokenizer._punctuationTokens[specialMap[key]] = true;
131 }
132 Tokenizer._punctuationTokens[Token_1.TokenKind.OtherPunctuation] = true;
133 var word = Tokenizer._wordCharacters;
134 for (var i = 0; i < word.length; ++i) {
135 var charCode = word.charCodeAt(i);
136 Tokenizer._charCodeMap[charCode] = Token_1.TokenKind.AsciiWord;
137 }
138 Tokenizer._charCodeMap[' '.charCodeAt(0)] = Token_1.TokenKind.Spacing;
139 Tokenizer._charCodeMap['\t'.charCodeAt(0)] = Token_1.TokenKind.Spacing;
140 };
141 Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
142 Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
143 return Tokenizer;
144}());
145exports.Tokenizer = Tokenizer;
146//# sourceMappingURL=Tokenizer.js.map
\No newline at end of file