UNPKG

@microsoft/tsdoc/lib/parser/Tokenizer.js

Version:

5.8 kBJavaScriptView Raw

1import { TextRange } from './TextRange';
2import { Token, TokenKind } from './Token';
3var Tokenizer = /** @class */ (function () {
  function Tokenizer() {
  }
  /**
   * Given a list of input lines, this returns an array of extracted tokens.
   * The last token will always be TokenKind.EndOfInput.
   */
  Tokenizer.readTokens = function (lines) {
      Tokenizer._ensureInitialized();
      var tokens = [];
      var lastLine = undefined;
      for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {
          var line = lines_1[_i];
          Tokenizer._pushTokensForLine(tokens, line);
          lastLine = line;
      }
      if (lastLine) {
          tokens.push(new Token(TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));
      }
      else {
          tokens.push(new Token(TokenKind.EndOfInput, TextRange.empty, TextRange.empty));
      }
      return tokens;
  };
  /**
   * Returns true if the token is a CommonMark punctuation character.
   * These are basically all the ASCII punctuation characters.
   */
  Tokenizer.isPunctuation = function (tokenKind) {
      Tokenizer._ensureInitialized();
      return Tokenizer._punctuationTokens[tokenKind] || false;
  };
  Tokenizer._pushTokensForLine = function (tokens, line) {
      var buffer = line.buffer;
      var end = line.end;
      var bufferIndex = line.pos;
      var tokenKind = undefined;
      var tokenPos = bufferIndex;
      while (bufferIndex < end) {
          // Read a character and determine its kind
          var charCode = buffer.charCodeAt(bufferIndex);
          var characterKind = Tokenizer._charCodeMap[charCode];
          if (characterKind === undefined) {
              characterKind = TokenKind.Other;
          }
          // Can we append to an existing token?  Yes if:
          // 1. There is an existing token, AND
          // 2. It is the same kind of token, AND
          // 3. It's not punctuation (which is always one character)
          if (tokenKind !== undefined &&
              characterKind === tokenKind &&
              Tokenizer._isMultiCharacterToken(tokenKind)) {
              // yes, append
          }
          else {
              // Is there a previous completed token to push?
              if (tokenKind !== undefined) {
                  tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
              }
              tokenPos = bufferIndex;
              tokenKind = characterKind;
          }
          ++bufferIndex;
      }
      // Is there a previous completed token to push?
      if (tokenKind !== undefined) {
          tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));
      }
      tokens.push(new Token(TokenKind.Newline, line.getNewRange(line.end, line.end), line));
  };
  /**
   * Returns true if the token can be comprised of multiple characters
   */
  Tokenizer._isMultiCharacterToken = function (kind) {
      switch (kind) {
          case TokenKind.Spacing:
          case TokenKind.AsciiWord:
          case TokenKind.Other:
              return true;
      }
      return false;
  };
  Tokenizer._ensureInitialized = function () {
      if (Tokenizer._charCodeMap) {
          return;
      }
      Tokenizer._charCodeMap = {};
      Tokenizer._punctuationTokens = {};
      // All Markdown punctuation characters
      var punctuation = Tokenizer._commonMarkPunctuationCharacters;
      for (var i = 0; i < punctuation.length; ++i) {
          var charCode = punctuation.charCodeAt(i);
          Tokenizer._charCodeMap[charCode] = TokenKind.OtherPunctuation;
      }
      // Special symbols
      // !"#$%&\'()*+,\-.\/:;<=>?@[\\]^_`{|}~
      var specialMap = {
          '\\': TokenKind.Backslash,
          '<': TokenKind.LessThan,
          '>': TokenKind.GreaterThan,
          '=': TokenKind.Equals,
          "'": TokenKind.SingleQuote,
          '"': TokenKind.DoubleQuote,
          '/': TokenKind.Slash,
          '-': TokenKind.Hyphen,
          '@': TokenKind.AtSign,
          '{': TokenKind.LeftCurlyBracket,
          '}': TokenKind.RightCurlyBracket,
          '`': TokenKind.Backtick,
          '.': TokenKind.Period,
          ':': TokenKind.Colon,
          ',': TokenKind.Comma,
          '[': TokenKind.LeftSquareBracket,
          ']': TokenKind.RightSquareBracket,
          '|': TokenKind.Pipe,
          '(': TokenKind.LeftParenthesis,
          ')': TokenKind.RightParenthesis,
          '#': TokenKind.PoundSymbol,
          '+': TokenKind.Plus,
          $: TokenKind.DollarSign
      };
      for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {
          var key = _a[_i];
          Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];
          Tokenizer._punctuationTokens[specialMap[key]] = true;
      }
      Tokenizer._punctuationTokens[TokenKind.OtherPunctuation] = true;
      var word = Tokenizer._wordCharacters;
      for (var i = 0; i < word.length; ++i) {
          var charCode = word.charCodeAt(i);
          Tokenizer._charCodeMap[charCode] = TokenKind.AsciiWord;
      }
      Tokenizer._charCodeMap[' '.charCodeAt(0)] = TokenKind.Spacing;
      Tokenizer._charCodeMap['\t'.charCodeAt(0)] = TokenKind.Spacing;
  };
  Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~';
  Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
  return Tokenizer;
141}());
142export { Tokenizer };
143//# sourceMappingURL=Tokenizer.js.map
\No newline at end of file

1	`import { TextRange } from './TextRange';`
2	`import { Token, TokenKind } from './Token';`
3	`var Tokenizer = /** @class */ (function () {`
4	`function Tokenizer() {`
5	`}`
6	`/**`
7	`* Given a list of input lines, this returns an array of extracted tokens.`
8	`* The last token will always be TokenKind.EndOfInput.`
9	`*/`
10	`Tokenizer.readTokens = function (lines) {`
11	`Tokenizer._ensureInitialized();`
12	`var tokens = [];`
13	`var lastLine = undefined;`
14	`for (var _i = 0, lines_1 = lines; _i < lines_1.length; _i++) {`
15	`var line = lines_1[_i];`
16	`Tokenizer._pushTokensForLine(tokens, line);`
17	`lastLine = line;`
18	`}`
19	`if (lastLine) {`
20	`tokens.push(new Token(TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine));`
21	`}`
22	`else {`
23	`tokens.push(new Token(TokenKind.EndOfInput, TextRange.empty, TextRange.empty));`
24	`}`
25	`return tokens;`
26	`};`
27	`/**`
28	`* Returns true if the token is a CommonMark punctuation character.`
29	`* These are basically all the ASCII punctuation characters.`
30	`*/`
31	`Tokenizer.isPunctuation = function (tokenKind) {`
32	`Tokenizer._ensureInitialized();`
33	`return Tokenizer._punctuationTokens[tokenKind] \|\| false;`
34	`};`
35	`Tokenizer._pushTokensForLine = function (tokens, line) {`
36	`var buffer = line.buffer;`
37	`var end = line.end;`
38	`var bufferIndex = line.pos;`
39	`var tokenKind = undefined;`
40	`var tokenPos = bufferIndex;`
41	`while (bufferIndex < end) {`
42	`// Read a character and determine its kind`
43	`var charCode = buffer.charCodeAt(bufferIndex);`
44	`var characterKind = Tokenizer._charCodeMap[charCode];`
45	`if (characterKind === undefined) {`
46	`characterKind = TokenKind.Other;`
47	`}`
48	`// Can we append to an existing token? Yes if:`
49	`// 1. There is an existing token, AND`
50	`// 2. It is the same kind of token, AND`
51	`// 3. It's not punctuation (which is always one character)`
52	`if (tokenKind !== undefined &&`
53	`characterKind === tokenKind &&`
54	`Tokenizer._isMultiCharacterToken(tokenKind)) {`
55	`// yes, append`
56	`}`
57	`else {`
58	`// Is there a previous completed token to push?`
59	`if (tokenKind !== undefined) {`
60	`tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));`
61	`}`
62	`tokenPos = bufferIndex;`
63	`tokenKind = characterKind;`
64	`}`
65	`++bufferIndex;`
66	`}`
67	`// Is there a previous completed token to push?`
68	`if (tokenKind !== undefined) {`
69	`tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));`
70	`}`
71	`tokens.push(new Token(TokenKind.Newline, line.getNewRange(line.end, line.end), line));`
72	`};`
73	`/**`
74	`* Returns true if the token can be comprised of multiple characters`
75	`*/`
76	`Tokenizer._isMultiCharacterToken = function (kind) {`
77	`switch (kind) {`
78	`case TokenKind.Spacing:`
79	`case TokenKind.AsciiWord:`
80	`case TokenKind.Other:`
81	`return true;`
82	`}`
83	`return false;`
84	`};`
85	`Tokenizer._ensureInitialized = function () {`
86	`if (Tokenizer._charCodeMap) {`
87	`return;`
88	`}`
89	`Tokenizer._charCodeMap = {};`
90	`Tokenizer._punctuationTokens = {};`
91	`// All Markdown punctuation characters`
92	`var punctuation = Tokenizer._commonMarkPunctuationCharacters;`
93	`for (var i = 0; i < punctuation.length; ++i) {`
94	`var charCode = punctuation.charCodeAt(i);`
95	`Tokenizer._charCodeMap[charCode] = TokenKind.OtherPunctuation;`
96	`}`
97	`// Special symbols`
98	// !"#$%&\'()*+,\-.\/:;<=>?@[\\]^_`{\|}~
99	`var specialMap = {`
100	`'\\': TokenKind.Backslash,`
101	`'<': TokenKind.LessThan,`
102	`'>': TokenKind.GreaterThan,`
103	`'=': TokenKind.Equals,`
104	`"'": TokenKind.SingleQuote,`
105	`'"': TokenKind.DoubleQuote,`
106	`'/': TokenKind.Slash,`
107	`'-': TokenKind.Hyphen,`
108	`'@': TokenKind.AtSign,`
109	`'{': TokenKind.LeftCurlyBracket,`
110	`'}': TokenKind.RightCurlyBracket,`
111	'`': TokenKind.Backtick,
112	`'.': TokenKind.Period,`
113	`':': TokenKind.Colon,`
114	`',': TokenKind.Comma,`
115	`'[': TokenKind.LeftSquareBracket,`
116	`']': TokenKind.RightSquareBracket,`
117	`'\|': TokenKind.Pipe,`
118	`'(': TokenKind.LeftParenthesis,`
119	`')': TokenKind.RightParenthesis,`
120	`'#': TokenKind.PoundSymbol,`
121	`'+': TokenKind.Plus,`
122	`$: TokenKind.DollarSign`
123	`};`
124	`for (var _i = 0, _a = Object.getOwnPropertyNames(specialMap); _i < _a.length; _i++) {`
125	`var key = _a[_i];`
126	`Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];`
127	`Tokenizer._punctuationTokens[specialMap[key]] = true;`
128	`}`
129	`Tokenizer._punctuationTokens[TokenKind.OtherPunctuation] = true;`
130	`var word = Tokenizer._wordCharacters;`
131	`for (var i = 0; i < word.length; ++i) {`
132	`var charCode = word.charCodeAt(i);`
133	`Tokenizer._charCodeMap[charCode] = TokenKind.AsciiWord;`
134	`}`
135	`Tokenizer._charCodeMap[' '.charCodeAt(0)] = TokenKind.Spacing;`
136	`Tokenizer._charCodeMap['\t'.charCodeAt(0)] = TokenKind.Spacing;`
137	`};`
138	Tokenizer._commonMarkPunctuationCharacters = '!"#$%&\'()*+,-./:;<=>?@[\\]^`{\|}~';
139	`Tokenizer._wordCharacters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';`
140	`return Tokenizer;`
141	`}());`
142	`export { Tokenizer };`
143	`//# sourceMappingURL=Tokenizer.js.map`
\	No newline at end of file