UNPKG

10.4 kBSource Map (JSON)View Raw
1{"version":3,"file":"Tokenizer.js","sourceRoot":"","sources":["../../src/parser/Tokenizer.ts"],"names":[],"mappings":";;;AAAA,yCAAwC;AACxC,iCAA2C;AAE3C;IAAA;IAkKA,CAAC;IA1JC;;;OAGG;IACW,oBAAU,GAAxB,UAAyB,KAAkB;QACzC,SAAS,CAAC,kBAAkB,EAAE,CAAC;QAE/B,IAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,IAAI,QAAQ,GAA0B,SAAS,CAAC;QAEhD,KAAmB,UAAK,EAAL,eAAK,EAAL,mBAAK,EAAL,IAAK,EAAE;YAArB,IAAM,IAAI,cAAA;YACb,SAAS,CAAC,kBAAkB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC3C,QAAQ,GAAG,IAAI,CAAC;SACjB;QAED,IAAI,QAAQ,EAAE;YACZ,MAAM,CAAC,IAAI,CACT,IAAI,aAAK,CAAC,iBAAS,CAAC,UAAU,EAAE,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,CAAC,GAAG,CAAC,EAAE,QAAQ,CAAC,CAC5F,CAAC;SACH;aAAM;YACL,MAAM,CAAC,IAAI,CAAC,IAAI,aAAK,CAAC,iBAAS,CAAC,UAAU,EAAE,qBAAS,CAAC,KAAK,EAAE,qBAAS,CAAC,KAAK,CAAC,CAAC,CAAC;SAChF;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACW,uBAAa,GAA3B,UAA4B,SAAoB;QAC9C,SAAS,CAAC,kBAAkB,EAAE,CAAC;QAC/B,OAAO,SAAS,CAAC,kBAAkB,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC;IAC1D,CAAC;IAEc,4BAAkB,GAAjC,UAAkC,MAAe,EAAE,IAAe;QAChE,IAAM,MAAM,GAAW,IAAI,CAAC,MAAM,CAAC;QACnC,IAAM,GAAG,GAAW,IAAI,CAAC,GAAG,CAAC;QAE7B,IAAI,WAAW,GAAW,IAAI,CAAC,GAAG,CAAC;QACnC,IAAI,SAAS,GAA0B,SAAS,CAAC;QACjD,IAAI,QAAQ,GAAW,WAAW,CAAC;QAEnC,OAAO,WAAW,GAAG,GAAG,EAAE;YACxB,0CAA0C;YAC1C,IAAM,QAAQ,GAAW,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;YACxD,IAAI,aAAa,GAA0B,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;YAC5E,IAAI,aAAa,KAAK,SAAS,EAAE;gBAC/B,aAAa,GAAG,iBAAS,CAAC,KAAK,CAAC;aACjC;YAED,+CAA+C;YAC/C,qCAAqC;YACrC,uCAAuC;YACvC,0DAA0D;YAC1D,IACE,SAAS,KAAK,SAAS;gBACvB,aAAa,KAAK,SAAS;gBAC3B,SAAS,CAAC,sBAAsB,CAAC,SAAS,CAAC,EAC3C;gBACA,cAAc;aACf;iBAAM;gBACL,+CAA+C;gBAC/C,IAAI,SAAS,KAAK,SAAS,EAAE;oBAC3B,MAAM,CAAC,IAAI,CAAC,IAAI,aAAK,CAAC,SAAS,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,WAAW,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;iBAClF;gBAED,QAAQ,GAAG,WAAW,CAAC;gBACvB,SAAS,GAAG,aAAa,CAAC;aAC3B;YAED,EAAE,WAAW,CAAC;SACf;QAED,+CAA+C;QAC/C,IAAI,SAAS,KAAK,SAAS,EAAE;YAC3B,MAAM,CAAC,IAAI,CAAC,IAAI,aAAK,CAAC,SAAS,EAAE,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,WAAW,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;SAClF;QAED,MAAM,CAAC,IAAI,CAAC,IAAI,aAAK,CAAC,iBAAS,CAAC,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;IACxF,CAAC;IAED;;OAEG;IACY,gCAAsB,GAArC,UAAsC,IAAe;QACnD,QAAQ,IAAI,EAAE;YACZ,KAAK,iBAAS,CAAC,OAAO,CAAC;YACvB,KAAK,iBAAS,CAAC,SAAS,CAAC;YACzB,KAAK,iBAAS,CAAC,KAAK;gBAClB,OAAO,IAAI,CAAC;SACf;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEc,4BAAkB,GAAjC;QACE,IAAI,SAAS,CAAC,YAAY,EAAE;YAC1B,OAAO;SACR;QAED,SAAS,CAAC,YAAY,GAAG,EAAE,CAAC;QAC5B,SAAS,CAAC,kBAAkB,GAAG,EAAE,CAAC;QAElC,sCAAsC;QACtC,IAAM,WAAW,GAAW,SAAS,CAAC,gCAAgC,CAAC;QACvE,KAAK,IAAI,CAAC,GAAW,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YACnD,IAAM,QAAQ,GAAW,WAAW,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACnD,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,iBAAS,CAAC,gBAAgB,CAAC;SAC/D;QAED,kBAAkB;QAElB,uCAAuC;QACvC,IAAM,UAAU,GAAuC;YACrD,IAAI,EAAE,iBAAS,CAAC,SAAS;YACzB,GAAG,EAAE,iBAAS,CAAC,QAAQ;YACvB,GAAG,EAAE,iBAAS,CAAC,WAAW;YAC1B,GAAG,EAAE,iBAAS,CAAC,MAAM;YACrB,GAAG,EAAE,iBAAS,CAAC,WAAW;YAC1B,GAAG,EAAE,iBAAS,CAAC,WAAW;YAC1B,GAAG,EAAE,iBAAS,CAAC,KAAK;YACpB,GAAG,EAAE,iBAAS,CAAC,MAAM;YACrB,GAAG,EAAE,iBAAS,CAAC,MAAM;YACrB,GAAG,EAAE,iBAAS,CAAC,gBAAgB;YAC/B,GAAG,EAAE,iBAAS,CAAC,iBAAiB;YAChC,GAAG,EAAE,iBAAS,CAAC,QAAQ;YACvB,GAAG,EAAE,iBAAS,CAAC,MAAM;YACrB,GAAG,EAAE,iBAAS,CAAC,KAAK;YACpB,GAAG,EAAE,iBAAS,CAAC,KAAK;YACpB,GAAG,EAAE,iBAAS,CAAC,iBAAiB;YAChC,GAAG,EAAE,iBAAS,CAAC,kBAAkB;YACjC,GAAG,EAAE,iBAAS,CAAC,IAAI;YACnB,GAAG,EAAE,iBAAS,CAAC,eAAe;YAC9B,GAAG,EAAE,iBAAS,CAAC,gBAAgB;YAC/B,GAAG,EAAE,iBAAS,CAAC,WAAW;YAC1B,GAAG,EAAE,iBAAS,CAAC,IAAI;YACnB,CAAC,EAAE,iBAAS,CAAC,UAAU;SACxB,CAAC;QACF,KAAkB,UAAsC,EAAtC,KAAA,MAAM,CAAC,mBAAmB,CAAC,UAAU,CAAC,EAAtC,cAAsC,EAAtC,IAAsC,EAAE;YAArD,IAAM,GAAG,SAAA;YACZ,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAC5D,SAAS,CAAC,kBAAkB,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC;SACtD;QAED,SAAS,CAAC,kBAAkB,CAAC,iBAAS,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC;QAEhE,IAAM,IAAI,GAAW,SAAS,CAAC,eAAe,CAAC;QAC/C,KAAK,IAAI,CAAC,GAAW,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YAC5C,IAAM,QAAQ,GAAW,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC5C,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,iBAAS,CAAC,SAAS,CAAC;SACxD;QACD,SAAS,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,iBAAS,CAAC,OAAO,CAAC;QAC9D,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,iBAAS,CAAC,OAAO,CAAC;IACjE,CAAC;IAhKuB,0CAAgC,GAAW,mCAAmC,CAAC;IAC/E,yBAAe,GACrC,iEAAiE,CAAC;IA+JtE,gBAAC;CAAA,AAlKD,IAkKC;AAlKY,8BAAS","sourcesContent":["import { TextRange } from './TextRange';\r\nimport { Token, TokenKind } from './Token';\r\n\r\nexport class Tokenizer {\r\n private static readonly _commonMarkPunctuationCharacters: string = '!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^`{|}~';\r\n private static readonly _wordCharacters: string =\r\n 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';\r\n\r\n private static _charCodeMap: { [charCode: number]: TokenKind | undefined };\r\n private static _punctuationTokens: { [tokenKind: number]: boolean };\r\n\r\n /**\r\n * Given a list of input lines, this returns an array of extracted tokens.\r\n * The last token will always be TokenKind.EndOfInput.\r\n */\r\n public static readTokens(lines: TextRange[]): Token[] {\r\n Tokenizer._ensureInitialized();\r\n\r\n const tokens: Token[] = [];\r\n\r\n let lastLine: TextRange | undefined = undefined;\r\n\r\n for (const line of lines) {\r\n Tokenizer._pushTokensForLine(tokens, line);\r\n lastLine = line;\r\n }\r\n\r\n if (lastLine) {\r\n tokens.push(\r\n new Token(TokenKind.EndOfInput, lastLine.getNewRange(lastLine.end, lastLine.end), lastLine)\r\n );\r\n } else {\r\n tokens.push(new Token(TokenKind.EndOfInput, TextRange.empty, TextRange.empty));\r\n }\r\n\r\n return tokens;\r\n }\r\n\r\n /**\r\n * Returns true if the token is a CommonMark punctuation character.\r\n * These are basically all the ASCII punctuation characters.\r\n */\r\n public static isPunctuation(tokenKind: TokenKind): boolean {\r\n Tokenizer._ensureInitialized();\r\n return Tokenizer._punctuationTokens[tokenKind] || false;\r\n }\r\n\r\n private static _pushTokensForLine(tokens: Token[], line: TextRange): void {\r\n const buffer: string = line.buffer;\r\n const end: number = line.end;\r\n\r\n let bufferIndex: number = line.pos;\r\n let tokenKind: TokenKind | undefined = undefined;\r\n let tokenPos: number = bufferIndex;\r\n\r\n while (bufferIndex < end) {\r\n // Read a character and determine its kind\r\n const charCode: number = buffer.charCodeAt(bufferIndex);\r\n let characterKind: TokenKind | undefined = Tokenizer._charCodeMap[charCode];\r\n if (characterKind === undefined) {\r\n characterKind = TokenKind.Other;\r\n }\r\n\r\n // Can we append to an existing token? Yes if:\r\n // 1. There is an existing token, AND\r\n // 2. It is the same kind of token, AND\r\n // 3. It's not punctuation (which is always one character)\r\n if (\r\n tokenKind !== undefined &&\r\n characterKind === tokenKind &&\r\n Tokenizer._isMultiCharacterToken(tokenKind)\r\n ) {\r\n // yes, append\r\n } else {\r\n // Is there a previous completed token to push?\r\n if (tokenKind !== undefined) {\r\n tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));\r\n }\r\n\r\n tokenPos = bufferIndex;\r\n tokenKind = characterKind;\r\n }\r\n\r\n ++bufferIndex;\r\n }\r\n\r\n // Is there a previous completed token to push?\r\n if (tokenKind !== undefined) {\r\n tokens.push(new Token(tokenKind, line.getNewRange(tokenPos, bufferIndex), line));\r\n }\r\n\r\n tokens.push(new Token(TokenKind.Newline, line.getNewRange(line.end, line.end), line));\r\n }\r\n\r\n /**\r\n * Returns true if the token can be comprised of multiple characters\r\n */\r\n private static _isMultiCharacterToken(kind: TokenKind): boolean {\r\n switch (kind) {\r\n case TokenKind.Spacing:\r\n case TokenKind.AsciiWord:\r\n case TokenKind.Other:\r\n return true;\r\n }\r\n return false;\r\n }\r\n\r\n private static _ensureInitialized(): void {\r\n if (Tokenizer._charCodeMap) {\r\n return;\r\n }\r\n\r\n Tokenizer._charCodeMap = {};\r\n Tokenizer._punctuationTokens = {};\r\n\r\n // All Markdown punctuation characters\r\n const punctuation: string = Tokenizer._commonMarkPunctuationCharacters;\r\n for (let i: number = 0; i < punctuation.length; ++i) {\r\n const charCode: number = punctuation.charCodeAt(i);\r\n Tokenizer._charCodeMap[charCode] = TokenKind.OtherPunctuation;\r\n }\r\n\r\n // Special symbols\r\n\r\n // !\"#$%&\\'()*+,\\-.\\/:;<=>?@[\\\\]^_`{|}~\r\n const specialMap: { [character: string]: TokenKind } = {\r\n '\\\\': TokenKind.Backslash,\r\n '<': TokenKind.LessThan,\r\n '>': TokenKind.GreaterThan,\r\n '=': TokenKind.Equals,\r\n \"'\": TokenKind.SingleQuote,\r\n '\"': TokenKind.DoubleQuote,\r\n '/': TokenKind.Slash,\r\n '-': TokenKind.Hyphen,\r\n '@': TokenKind.AtSign,\r\n '{': TokenKind.LeftCurlyBracket,\r\n '}': TokenKind.RightCurlyBracket,\r\n '`': TokenKind.Backtick,\r\n '.': TokenKind.Period,\r\n ':': TokenKind.Colon,\r\n ',': TokenKind.Comma,\r\n '[': TokenKind.LeftSquareBracket,\r\n ']': TokenKind.RightSquareBracket,\r\n '|': TokenKind.Pipe,\r\n '(': TokenKind.LeftParenthesis,\r\n ')': TokenKind.RightParenthesis,\r\n '#': TokenKind.PoundSymbol,\r\n '+': TokenKind.Plus,\r\n $: TokenKind.DollarSign\r\n };\r\n for (const key of Object.getOwnPropertyNames(specialMap)) {\r\n Tokenizer._charCodeMap[key.charCodeAt(0)] = specialMap[key];\r\n Tokenizer._punctuationTokens[specialMap[key]] = true;\r\n }\r\n\r\n Tokenizer._punctuationTokens[TokenKind.OtherPunctuation] = true;\r\n\r\n const word: string = Tokenizer._wordCharacters;\r\n for (let i: number = 0; i < word.length; ++i) {\r\n const charCode: number = word.charCodeAt(i);\r\n Tokenizer._charCodeMap[charCode] = TokenKind.AsciiWord;\r\n }\r\n Tokenizer._charCodeMap[' '.charCodeAt(0)] = TokenKind.Spacing;\r\n Tokenizer._charCodeMap['\\t'.charCodeAt(0)] = TokenKind.Spacing;\r\n }\r\n}\r\n"]}
\No newline at end of file