1 | /**
|
2 | * @license
|
3 | * Copyright 2018 Google LLC
|
4 | *
|
5 | * Licensed under the Apache License, Version 2.0 (the "License");
|
6 | * you may not use this file except in compliance with the License.
|
7 | * You may obtain a copy of the License at
|
8 | *
|
9 | * https://www.apache.org/licenses/LICENSE-2.0
|
10 | *
|
11 | * Unless required by applicable law or agreed to in writing, software
|
12 | * distributed under the License is distributed on an "AS IS" BASIS,
|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 | * See the License for the specific language governing permissions and
|
15 | * limitations under the License.
|
16 | */
|
17 |
|
18 | ;
|
19 |
|
20 | // A simple lexer for MySQL SQL.
|
21 | // SQL has many divergent dialects with subtly different
|
22 | // conventions for string escaping and comments.
|
23 | // This just attempts to roughly tokenize MySQL's specific variant.
|
24 | // See also
|
25 | // https://www.w3.org/2005/05/22-SPARQL-MySQL/sql_yacc
|
26 | // https://github.com/twitter/mysql/blob/master/sql/sql_lex.cc
|
27 | // https://dev.mysql.com/doc/refman/5.7/en/string-literals.html
|
28 |
|
29 | // "--" followed by whitespace starts a line comment
|
30 | // "#"
|
31 | // "/*" starts an inline comment ended at first "*/"
|
32 | // \N means null
|
33 | // Prefixed strings x'...' is a hex string, b'...' is a binary string, ....
|
34 | // '...', "..." are strings. `...` escapes identifiers.
|
35 | // doubled delimiters and backslash both escape
|
36 | // doubled delimiters work in `...` identifiers
|
37 |
|
38 | // eslint-disable-next-line no-use-before-define
|
39 | exports.makeLexer = makeLexer;
|
40 |
|
41 | const WSP = '[\\t\\r\\n ]';
|
42 | const PREFIX_BEFORE_DELIMITER = new RegExp(
|
43 | '^(?:' +
|
44 |
|
45 | // Comment
|
46 | // https://dev.mysql.com/doc/refman/5.7/en/comments.html
|
47 | // https://dev.mysql.com/doc/refman/5.7/en/ansi-diff-comments.html
|
48 | // If we do not see a newline at the end of a comment, then it is
|
49 | // a concatenation hazard; a fragment concatened at the end would
|
50 | // start in a comment context.
|
51 | `--(?=${ WSP })[^\\r\\n]*[\r\n]` +
|
52 | '|#[^\\r\\n]*[\r\n]' +
|
53 | '|/[*][\\s\\S]*?[*]/' +
|
54 | '|' +
|
55 |
|
56 | // Run of non-comment non-string starts
|
57 | `(?:[^'"\`\\-/#]|-(?!-${ WSP })|/(?![*]))` +
|
58 | ')*');
|
59 | const DELIMITED_BODIES = {
|
60 | '\'': /^(?:[^'\\]|\\[\s\S]|'')*/,
|
61 | '"': /^(?:[^"\\]|\\[\s\S]|"")*/,
|
62 | '`': /^(?:[^`\\]|\\[\s\S]|``)*/,
|
63 | };
|
64 |
|
65 | /**
|
66 | * Template tag that creates a new Error with a message.
|
67 | * @param {!Array.<string>} strs a valid TemplateObject.
|
68 | * @return {string} A message suitable for the Error constructor.
|
69 | */
|
70 | function msg(strs, ...dyn) {
|
71 | let message = String(strs[0]);
|
72 | for (let i = 0; i < dyn.length; ++i) {
|
73 | message += JSON.stringify(dyn[i]) + strs[i + 1];
|
74 | }
|
75 | return message;
|
76 | }
|
77 |
|
78 | /**
|
79 | * Returns a stateful function that can be fed chunks of input and
|
80 | * which returns a delimiter context.
|
81 | *
|
82 | * @return {!function (string) : string}
|
83 | * a stateful function that takes a string of SQL text and
|
84 | * returns the context after it. Subsequent calls will assume
|
85 | * that context.
|
86 | */
|
87 | function makeLexer() {
|
88 | let errorMessage = null;
|
89 | let delimiter = null;
|
90 | return (text) => {
|
91 | if (errorMessage) {
|
92 | // Replay the error message if we've already failed.
|
93 | throw new Error(errorMessage);
|
94 | }
|
95 | if (text === null) {
|
96 | if (delimiter) {
|
97 | throw new Error(
|
98 | errorMessage = `Unclosed quoted string: ${ delimiter }`);
|
99 | }
|
100 | }
|
101 | text = String(text);
|
102 | while (text) {
|
103 | const pattern = delimiter ?
|
104 | DELIMITED_BODIES[delimiter] :
|
105 | PREFIX_BEFORE_DELIMITER;
|
106 | const match = pattern.exec(text);
|
107 | // Match must be defined since all possible values of pattern have
|
108 | // an outer Kleene-* and no postcondition so will fallback to matching
|
109 | // the empty string.
|
110 | let nConsumed = match[0].length;
|
111 | if (text.length > nConsumed) {
|
112 | const chr = text.charAt(nConsumed);
|
113 | if (delimiter) {
|
114 | if (chr === delimiter) {
|
115 | delimiter = null;
|
116 | ++nConsumed;
|
117 | } else {
|
118 | throw new Error(
|
119 | errorMessage = msg`Expected ${ chr } at ${ text }`);
|
120 | }
|
121 | } else if (Object.hasOwnProperty.call(DELIMITED_BODIES, chr)) {
|
122 | delimiter = chr;
|
123 | ++nConsumed;
|
124 | } else {
|
125 | throw new Error(
|
126 | errorMessage = msg`Expected delimiter at ${ text }`);
|
127 | }
|
128 | }
|
129 | text = text.substring(nConsumed);
|
130 | }
|
131 | return delimiter;
|
132 | };
|
133 | }
|