1 /*
2  * Copyright 2015-2018 HuntLabs.cn
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 module hunt.sql.dialect.mysql.parser.MySqlLexer;
17 
18 // import  hunt.sql.parser.CharTypes.isFirstIdentifierChar;
19 // import  hunt.sql.parser.LayoutCharacters.LayoutCharacters.EOI;
20 
21 import  hunt.sql.parser.CharTypes;
22 import  hunt.sql.parser.LayoutCharacters;
23 
24 
25 import hunt.collection;
26 import std.conv;
27 import hunt.String;
28 import hunt.sql.parser;
29 import hunt.sql.util.FnvHash;
30 import hunt.sql.util.DBType;
31 import std.string;
32 
33 import std.concurrency : initOnce;
34 
35 import hunt.text;
36 alias  hunt_charAt = hunt.text.Common.charAt;
37 
38 public class MySqlLexer : Lexer {
39     // __gshared SymbolTable quoteTable;
40     // __gshared Keywords DEFAULT_MYSQL_KEYWORDS;
41     // __gshared bool[] identifierFlags;
42 
43     static SymbolTable quoteTable() {
44         __gshared SymbolTable inst;
45         return initOnce!inst(initSymbolTable());
46     }
47 
48     public static Keywords DEFAULT_MYSQL_KEYWORDS() {
49         __gshared Keywords inst;
50         return initOnce!inst(initKeywords());
51     }
52     
53     
54     public static bool[] identifierFlags() {
55         __gshared bool[] inst;
56         return initOnce!inst(initIdentifierFlags());
57     } 
58 
59     private static SymbolTable initSymbolTable() {
60         SymbolTable quoteTable = new SymbolTable(8192);
61         return quoteTable;
62     }
63 
64     private static Keywords initKeywords() {
65         Map!(string, Token) map = new HashMap!(string, Token)();
66 
67         map.putAll(Keywords.DEFAULT_KEYWORDS.getKeywords());
68 
69         map.put("DUAL", Token.DUAL);
70         map.put("FALSE", Token.FALSE);
71         map.put("IDENTIFIED", Token.IDENTIFIED);
72         map.put("IF", Token.IF);
73         map.put("KILL", Token.KILL);
74 
75         map.put("LIMIT", Token.LIMIT);
76         map.put("TRUE", Token.TRUE);
77         map.put("BINARY", Token.BINARY);
78         map.put("SHOW", Token.SHOW);
79         map.put("CACHE", Token.CACHE);
80         map.put("ANALYZE", Token.ANALYZE);
81         map.put("OPTIMIZE", Token.OPTIMIZE);
82         map.put("ROW", Token.ROW);
83         map.put("BEGIN", Token.BEGIN);
84         map.put("END", Token.END);
85         map.put("DIV", Token.DIV);
86         map.put("MERGE", Token.MERGE);
87         
88         // for oceanbase & mysql 5.7
89         map.put("PARTITION", Token.PARTITION);
90         
91         map.put("CONTINUE", Token.CONTINUE);
92         map.put("UNDO", Token.UNDO);
93         map.put("SQLSTATE", Token.SQLSTATE);
94         map.put("CONDITION", Token.CONDITION);
95         map.put("MOD", Token.MOD);
96         map.put("CONTAINS", Token.CONTAINS);
97         map.put("RLIKE", Token.RLIKE);
98         map.put("FULLTEXT", Token.FULLTEXT);
99 
100         return new Keywords(map);
101     }
102 
103     private static bool[] initIdentifierFlags() {
104         bool[] flags = new bool[256];
105 
106         for (dchar c = 0; c < flags.length; ++c) {
107             if (c >= 'A' && c <= 'Z') {
108                 flags[c] = true;
109             } else if (c >= 'a' && c <= 'z') {
110                 flags[c] = true;
111             } else if (c >= '0' && c <= '9') {
112                 flags[c] = true;
113             }
114         }
115         // identifierFlags['`'] = true;
116         flags['_'] = true;
117         //identifierFlags['-'] = true; // mysql
118 
119         return flags;
120     }
121 
122     public this(char[] input, int inputLength, bool skipComment){
123         dbType = DBType.MYSQL.name;
124 
125         super(input, inputLength, skipComment);
126         super.keywods = DEFAULT_MYSQL_KEYWORDS;
127     }
128 
129     public this(string input){
130         this(input, true, true);
131     }
132 
133     public this(string input, SQLParserFeature[] features...){
134         dbType = DBType.MYSQL.name;
135 
136         super(input, true);
137         this.keepComments = true;
138         super.keywods = DEFAULT_MYSQL_KEYWORDS;
139 
140         foreach(SQLParserFeature feature ; features) {
141             config(feature, true);
142         }
143     }
144 
145     public this(string input, bool skipComment, bool keepComments){
146         dbType = DBType.MYSQL.name;
147 
148         super(input, skipComment);
149         this.skipComment = skipComment;
150         this.keepComments = keepComments;
151         super.keywods = DEFAULT_MYSQL_KEYWORDS;
152     }
153 
154     override public void scanSharp() {
155         if (ch != '#') {
156             throw new ParserException("illegal stat. "  ~ info());
157         }
158 
159         if (charAt(_pos + 1) == '{') {
160             scanVariable();
161             return;
162         }
163 
164         Token lastToken = this._token;
165 
166         scanChar();
167         _mark = _pos;
168         bufPos = 0;
169         for (;;) {
170             if (ch == '\r') {
171                 if (charAt(_pos + 1) == '\n') {
172                     bufPos += 2;
173                     scanChar();
174                     break;
175                 }
176                 bufPos++;
177                 break;
178             } else if (ch == LayoutCharacters.EOI) {
179                 break;
180             }
181 
182             if (ch == '\n') {
183                 scanChar();
184                 bufPos++;
185                 break;
186             }
187 
188             scanChar();
189             bufPos++;
190         }
191 
192         _stringVal = subString(_mark - 1, bufPos + 1);
193         _token = Token.LINE_COMMENT;
194         commentCount++;
195         if (keepComments) {
196             addComment(_stringVal);
197         }
198 
199         if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) {
200             return;
201         }
202         
203         endOfComment = isEOF();
204 
205         if (!isAllowComment() && (isEOF() || !isSafeComment(_stringVal))) {
206             throw new NotAllowCommentException();
207         }
208     }
209 
210     override public void scanVariable() {
211         if (ch != ':' && ch != '#' && ch != '$') {
212             throw new ParserException("illegal variable. "  ~ info());
213         }
214 
215         _mark = _pos;
216         bufPos = 1;
217 
218         if (charAt(_pos + 1) == '`') {
219             ++_pos;
220             ++bufPos;
221             char ch;
222             for (;;) {
223                 ch = charAt(++_pos);
224 
225                 if (ch == '`') {
226                     bufPos++;
227                     ch = charAt(++_pos);
228                     break;
229                 } else if (ch == LayoutCharacters.EOI) {
230                     throw new ParserException("illegal identifier. "  ~ info());
231                 }
232 
233                 bufPos++;
234                 continue;
235             }
236 
237             this.ch = charAt(_pos);
238 
239             _stringVal = subString(_mark, bufPos);
240             _token = Token.VARIANT;
241         } else if (charAt(_pos + 1) == '{') {
242             ++_pos;
243             ++bufPos;
244             char ch;
245             for (;;) {
246                 ch = charAt(++_pos);
247 
248                 if (ch == '}') {
249                     bufPos++;
250                     ch = charAt(++_pos);
251                     break;
252                 } else if (ch == LayoutCharacters.EOI) {
253                     throw new ParserException("illegal identifier. "  ~ info());
254                 }
255 
256                 bufPos++;
257                 continue;
258             }
259 
260             this.ch = charAt(_pos);
261 
262             _stringVal = subString(_mark, bufPos);
263             _token = Token.VARIANT;
264         } else {
265             for (;;) {
266                 ch = charAt(++_pos);
267 
268                 if (!isIdentifierChar(ch)) {
269                     break;
270                 }
271 
272                 bufPos++;
273                 continue;
274             }
275         }
276 
277         this.ch = charAt(_pos);
278 
279         _stringVal = subString(_mark, bufPos);
280         _token = Token.VARIANT;
281     }
282 
283     override protected void scanVariable_at() {
284         if (ch != '@') {
285             throw new ParserException("illegal variable. "  ~ info());
286         }
287 
288         _mark = _pos;
289         bufPos = 1;
290 
291         if (charAt(_pos + 1) == '@') {
292             ch = charAt(++_pos);
293             bufPos++;
294         }
295 
296         if (charAt(_pos + 1) == '`') {
297             ++_pos;
298             ++bufPos;
299             char ch;
300             for (;;) {
301                 ch = charAt(++_pos);
302 
303                 if (ch == '`') {
304                     bufPos++;
305                     ++_pos;
306                     break;
307                 } else if (ch == LayoutCharacters.EOI) {
308                     throw new ParserException("illegal identifier. "  ~ info());
309                 }
310 
311                 bufPos++;
312                 continue;
313             }
314 
315             this.ch = charAt(_pos);
316 
317             _stringVal = subString(_mark, bufPos);
318             _token = Token.VARIANT;
319         } else {
320             for (; ; ) {
321                 ch = charAt(++_pos);
322 
323                 if (!isIdentifierChar(ch)) {
324                     break;
325                 }
326 
327                 bufPos++;
328                 continue;
329             }
330         }
331 
332         this.ch = charAt(_pos);
333 
334         _stringVal = subString(_mark, bufPos);
335         _token = Token.VARIANT;
336     }
337 
338     override public void scanIdentifier() {
339         _hash_lower = 0;
340         hash = 0;
341 
342          char first = ch;
343 
344         if (ch == 'b'
345                 && charAt(_pos + 1) == '\'') {
346             int i = 2;
347             int _mark = _pos + 2;
348             for (;;++i) {
349                 char ch = charAt(_pos + i);
350                 if (ch == '0' || ch == '1') {
351                     continue;
352                 } else if (ch == '\'') {
353                     bufPos += i;
354                     _pos += (i + 1);
355                     _stringVal = subString(_mark, i - 2);
356                     this.ch = charAt(_pos);
357                     _token = Token.BITS;
358                     return;
359                 } else if (ch == LayoutCharacters.EOI) {
360                     throw new ParserException("illegal identifier. "  ~ info());
361                 } else {
362                     break;
363                 }
364             }
365         }
366 
367         if (ch == '`') {
368             _mark = _pos;
369             bufPos = 1;
370             char ch;
371 
372             int startPos = _pos + 1;
373             int quoteIndex = cast(int)indexOf(text, '`',startPos);
374             if (quoteIndex == -1) {
375                 throw new ParserException("illegal identifier. "  ~ info());
376             }
377 
378             _hash_lower = 0xcbf29ce484222325L;
379             hash = 0xcbf29ce484222325L;
380 
381             for (int i = startPos; i < quoteIndex; ++i) {
382                 ch = hunt_charAt(text, i);
383 
384                 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch);
385                 _hash_lower *= 0x100000001b3L;
386 
387                 hash ^= ch;
388                 hash *= 0x100000001b3L;
389             }
390 
391             _stringVal = quoteTable.addSymbol(text, _pos, quoteIndex + 1 - _pos, hash);
392             //_stringVal = text.substring(_mark, _pos);
393             _pos = quoteIndex + 1;
394             this.ch = charAt(_pos);
395             _token = Token.IDENTIFIER;
396         } else {
397              bool firstFlag = CharTypes.isFirstIdentifierChar(first);
398             if (!firstFlag) {
399                 throw new ParserException("illegal identifier. "  ~ info());
400             }
401 
402             _hash_lower = 0xcbf29ce484222325L;
403             hash = 0xcbf29ce484222325L;
404 
405             _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch);
406             _hash_lower *= 0x100000001b3L;
407 
408             hash ^= ch;
409             hash *= 0x100000001b3L;
410 
411             _mark = _pos;
412             bufPos = 1;
413             char ch = '\0';
414             for (;;) {
415                 ch = charAt(++_pos);
416 
417                 if (!isIdentifierChar(ch)) {
418                     break;
419                 }
420 
421                 bufPos++;
422 
423                 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch);
424                 _hash_lower *= 0x100000001b3L;
425 
426                 hash ^= ch;
427                 hash *= 0x100000001b3L;
428 
429                 continue;
430             }
431 
432             this.ch = charAt(_pos);
433 
434             if (bufPos == 1) {
435                 _token = Token.IDENTIFIER;
436                 _stringVal = CharTypes.valueOf(first);
437                 if (_stringVal is null) {
438                     _stringVal = to!string(first);
439                 }
440                 return;
441             }
442 
443             Token tok = keywods.getKeyword(_hash_lower);
444             if (tok !is null) {
445                 _token = tok;
446                 if (_token == Token.IDENTIFIER) {
447                     _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash);
448                 } else {
449                     _stringVal = null;
450                 }
451             } else {
452                 _token = Token.IDENTIFIER;
453                 _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash);
454             }
455 
456         }
457     }
458 
459 
460 
461     override protected  void scanString() {
462         scanString2();
463     }
464 
465     public void skipFirstHintsOrMultiCommentAndNextToken() {
466         int starIndex = _pos + 2;
467 
468         for (;;) {
469             starIndex =cast(int)indexOf(text,'*', starIndex);
470             if (starIndex == -1 || starIndex == text.length - 1) {
471                 this._token = Token.ERROR;
472                 return;
473             }
474 
475             int slashIndex = starIndex + 1;
476             if (charAt(slashIndex) == '/') {
477                 _pos = slashIndex + 1;
478                 ch = hunt_charAt(text, _pos);
479                 if (_pos < text.length - 6) {
480                     int pos_6 = _pos + 6;
481                     char c0 = ch;
482                     char c1 = hunt_charAt(text, _pos + 1);
483                     char c2 = hunt_charAt(text, _pos + 2);
484                     char c3 = hunt_charAt(text, _pos + 3);
485                     char c4 = hunt_charAt(text, _pos + 4);
486                     char c5 = hunt_charAt(text, _pos + 5);
487                     char c6 = hunt_charAt(text, pos_6);
488                     if (c0 == 's' && c1 == 'e' && c2 == 'l' && c3 == 'e' && c4 == 'c' && c5 == 't' && c6 == ' ') {
489                         this.comments = null;
490                         reset(pos_6, ' ', Token.SELECT);
491                         return;
492                     }
493 
494                     if (c0 == 'i' && c1 == 'n' && c2 == 's' && c3 == 'e' && c4 == 'r' && c5 == 't' && c6 == ' ') {
495                         this.comments = null;
496                         reset(pos_6, ' ', Token.INSERT);
497                         return;
498                     }
499 
500                     if (c0 == 'u' && c1 == 'p' && c2 == 'd' && c3 == 'a' && c4 == 't' && c5 == 'e' && c6 == ' ') {
501                         this.comments = null;
502                         reset(pos_6, ' ', Token.UPDATE);
503                         return;
504                     }
505 
506 
507                     if (c0 == 'd' && c1 == 'e' && c2 == 'l' && c3 == 'e' && c4 == 't' && c5 == 'e' && c6 == ' ') {
508                         this.comments = null;
509                         reset(pos_6, ' ', Token.DELETE);
510                         return;
511                     }
512 
513                     if (c0 == 'S' && c1 == 'E' && c2 == 'L' && c3 == 'E' && c4 == 'C' && c5 == 'T' && c6 == ' ') {
514                         this.comments = null;
515                         reset(pos_6, ' ', Token.SELECT);
516                         return;
517                     }
518 
519                     if (c0 == 'I' && c1 == 'N' && c2 == 'S' && c3 == 'E' && c4 == 'R' && c5 == 'T' && c6 == ' ') {
520                         this.comments = null;
521                         reset(pos_6, ' ', Token.INSERT);
522                         return;
523                     }
524 
525                     if (c0 == 'U' && c1 == 'P' && c2 == 'D' && c3 == 'A' && c4 == 'T' && c5 == 'E' && c6 == ' ') {
526                         this.comments = null;
527                         reset(pos_6, ' ', Token.UPDATE);
528                         return;
529                     }
530 
531                     if (c0 == 'D' && c1 == 'E' && c2 == 'L' && c3 == 'E' && c4 == 'T' && c5 == 'E' && c6 == ' ') {
532                         this.comments = null;
533                         reset(pos_6, ' ', Token.DELETE);
534                         return;
535                     }
536 
537                     nextToken();
538                     return;
539                 } else {
540                     nextToken();
541                     return;
542                 }
543             }
544             starIndex++;
545         }
546     }
547 
548     override public void scanComment() {
549         Token lastToken = this._token;
550         
551         if (ch == '-') {
552             char next_2 = charAt(_pos + 2);
553             if (isDigit(next_2)) {
554                 scanChar();
555                 _token = Token.SUB;
556                 return;
557             }
558         } else if (ch != '/') {
559             throw new Exception("IllegalState");
560         }
561 
562         _mark = _pos;
563         bufPos = 0;
564         scanChar();
565 
566         // /*+ */
567         if (ch == '*') {
568             scanChar();
569             bufPos++;
570 
571             while (ch == ' ') {
572                 scanChar();
573                 bufPos++;
574             }
575 
576             bool isHint = false;
577             int startHintSp = bufPos + 1;
578             if (ch == '!' //
579                     || ch == '+' // oceanbase hints
580                     ) {
581                 isHint = true;
582                 scanChar();
583                 bufPos++;
584             }
585 
586             int starIndex = _pos;
587 
588             for (;;) {
589                 starIndex = cast(int)indexOf(text,'*', starIndex);
590                 if (starIndex == -1 || starIndex == text.length - 1) {
591                     this._token = Token.ERROR;
592                     return;
593                 }
594                 if (charAt(starIndex + 1) == '/') {
595                     if (isHint) {
596                         //_stringVal = subString(_mark + startHintSp, (bufPos - startHintSp) - 2);
597                         _stringVal = this.subString(_mark + startHintSp, starIndex - startHintSp - _mark);
598                         _token = Token.HINT;
599                     } else {
600                         if (!optimizedForParameterized) {
601                             _stringVal = this.subString(_mark, starIndex + 2 - _mark);
602                         }
603                         _token = Token.MULTI_LINE_COMMENT;
604                         commentCount++;
605                         if (keepComments) {
606                             addComment(_stringVal);
607                         }
608                     }
609                     _pos = starIndex + 2;
610                     ch = charAt(_pos);
611                     break;
612                 }
613                 starIndex++;
614             }
615 
616             endOfComment = isEOF();
617             
618             if (commentHandler !is null
619                     && commentHandler.handle(lastToken, _stringVal)) {
620                 return;
621             }
622 
623             if (!isHint && !isAllowComment() && !isSafeComment(_stringVal)) {
624                 throw new NotAllowCommentException();
625             }
626 
627             return;
628         }
629 
630         if (ch == '/' || ch == '-') {
631             scanChar();
632             bufPos++;
633 
634             for (;;) {
635                 if (ch == '\r') {
636                     if (charAt(_pos + 1) == '\n') {
637                         bufPos += 2;
638                         scanChar();
639                         break;
640                     }
641                     bufPos++;
642                     break;
643                 } else if (ch == LayoutCharacters.EOI) {
644                     break;
645                 }
646 
647                 if (ch == '\n') {
648                     scanChar();
649                     bufPos++;
650                     break;
651                 }
652 
653                 scanChar();
654                 bufPos++;
655             }
656 
657             _stringVal = subString(_mark, bufPos);
658             _token = Token.LINE_COMMENT;
659             commentCount++;
660             if (keepComments) {
661                 addComment(_stringVal);
662             }
663 
664             if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) {
665                 return;
666             }
667 
668             endOfComment = isEOF();
669             
670             if (!isAllowComment() && (isEOF() || !isSafeComment(_stringVal))) {
671                 throw new NotAllowCommentException();
672             }
673 
674             return;
675         }
676     }
677 
678     // static {
679         
680     // }
681 
682     public static bool isIdentifierChar(char c) {
683         if (c <= identifierFlags.length) {
684             return identifierFlags[c];
685         }
686         return c != ' ' && c != ',';
687     }
688 
689     override public void scanNumber() {
690         _mark = _pos;
691 
692         if (ch == '0' && charAt(_pos + 1) == 'b') {
693             int i = 2;
694             int _mark = _pos + 2;
695             for (;;++i) {
696                 char ch = charAt(_pos + i);
697                 if (ch == '0' || ch == '1') {
698                     continue;
699                 } else if (ch >= '2' && ch <= '9') {
700                     break;
701                 } else {
702                     bufPos += i;
703                     _pos += i;
704                     _stringVal = subString(_mark, i - 2);
705                     this.ch = charAt(_pos);
706                     _token = Token.BITS;
707                     return;
708                 }
709             }
710         }
711 
712         if (ch == '-') {
713             bufPos++;
714             ch = charAt(++_pos);
715         }
716 
717         for (;;) {
718             if (ch >= '0' && ch <= '9') {
719                 bufPos++;
720             } else {
721                 break;
722             }
723             ch = charAt(++_pos);
724         }
725 
726         bool isDouble = false;
727 
728         if (ch == '.') {
729             if (charAt(_pos + 1) == '.') {
730                 _token = Token.LITERAL_INT;
731                 return;
732             }
733             bufPos++;
734             ch = charAt(++_pos);
735             isDouble = true;
736 
737             for (;;) {
738                 if (ch >= '0' && ch <= '9') {
739                     bufPos++;
740                 } else {
741                     break;
742                 }
743                 ch = charAt(++_pos);
744             }
745         }
746 
747         if (ch == 'e' || ch == 'E') {
748             bufPos++;
749             ch = charAt(++_pos);
750 
751             if (ch == '+' || ch == '-') {
752                 bufPos++;
753                 ch = charAt(++_pos);
754             }
755 
756             for (;;) {
757                 if (ch >= '0' && ch <= '9') {
758                     bufPos++;
759                 } else {
760                     break;
761                 }
762                 ch = charAt(++_pos);
763             }
764 
765             isDouble = true;
766         }
767 
768         if (isDouble) {
769             _token = Token.LITERAL_FLOAT;
770         } else {
771             if (CharTypes.isFirstIdentifierChar(ch) && !(ch == 'b' && bufPos == 1 && charAt(_pos - 1) == '0')) {
772                 bufPos++;
773                 for (;;) {
774                     ch = charAt(++_pos);
775 
776                     if (!isIdentifierChar(ch)) {
777                         break;
778                     }
779 
780                     bufPos++;
781                     continue;
782                 }
783 
784                 _stringVal = addSymbol();
785                 _token = Token.IDENTIFIER;
786             } else {
787                 _token = Token.LITERAL_INT;
788             }
789         }
790     }
791 }