1 /* 2 * Copyright 2015-2018 HuntLabs.cn 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 module hunt.sql.dialect.mysql.parser.MySqlLexer; 17 18 // import hunt.sql.parser.CharTypes.isFirstIdentifierChar; 19 // import hunt.sql.parser.LayoutCharacters.LayoutCharacters.EOI; 20 21 import hunt.sql.parser.CharTypes; 22 import hunt.sql.parser.LayoutCharacters; 23 24 25 import hunt.collection; 26 import std.conv; 27 import hunt.String; 28 import hunt.sql.parser; 29 import hunt.sql.util.FnvHash; 30 import hunt.sql.util.DBType; 31 import std.string; 32 33 import std.concurrency : initOnce; 34 35 import hunt.text; 36 alias hunt_charAt = hunt.text.Common.charAt; 37 38 public class MySqlLexer : Lexer { 39 // __gshared SymbolTable quoteTable; 40 // __gshared Keywords DEFAULT_MYSQL_KEYWORDS; 41 // __gshared bool[] identifierFlags; 42 43 static SymbolTable quoteTable() { 44 __gshared SymbolTable inst; 45 return initOnce!inst(initSymbolTable()); 46 } 47 48 public static Keywords DEFAULT_MYSQL_KEYWORDS() { 49 __gshared Keywords inst; 50 return initOnce!inst(initKeywords()); 51 } 52 53 54 public static bool[] identifierFlags() { 55 __gshared bool[] inst; 56 return initOnce!inst(initIdentifierFlags()); 57 } 58 59 private static SymbolTable initSymbolTable() { 60 SymbolTable quoteTable = new SymbolTable(8192); 61 return quoteTable; 62 } 63 64 private static Keywords initKeywords() { 65 Map!(string, Token) map = new HashMap!(string, Token)(); 66 67 map.putAll(Keywords.DEFAULT_KEYWORDS.getKeywords()); 68 69 map.put("DUAL", Token.DUAL); 70 map.put("FALSE", Token.FALSE); 71 map.put("IDENTIFIED", Token.IDENTIFIED); 72 map.put("IF", Token.IF); 73 map.put("KILL", Token.KILL); 74 75 map.put("LIMIT", Token.LIMIT); 76 map.put("TRUE", Token.TRUE); 77 map.put("BINARY", Token.BINARY); 78 map.put("SHOW", Token.SHOW); 79 map.put("CACHE", Token.CACHE); 80 map.put("ANALYZE", Token.ANALYZE); 81 map.put("OPTIMIZE", Token.OPTIMIZE); 82 map.put("ROW", Token.ROW); 83 map.put("BEGIN", Token.BEGIN); 84 map.put("END", Token.END); 85 map.put("DIV", Token.DIV); 86 map.put("MERGE", Token.MERGE); 87 88 // for oceanbase & mysql 5.7 89 map.put("PARTITION", Token.PARTITION); 90 91 map.put("CONTINUE", Token.CONTINUE); 92 map.put("UNDO", Token.UNDO); 93 map.put("SQLSTATE", Token.SQLSTATE); 94 map.put("CONDITION", Token.CONDITION); 95 map.put("MOD", Token.MOD); 96 map.put("CONTAINS", Token.CONTAINS); 97 map.put("RLIKE", Token.RLIKE); 98 map.put("FULLTEXT", Token.FULLTEXT); 99 100 return new Keywords(map); 101 } 102 103 private static bool[] initIdentifierFlags() { 104 bool[] flags = new bool[256]; 105 106 for (dchar c = 0; c < flags.length; ++c) { 107 if (c >= 'A' && c <= 'Z') { 108 flags[c] = true; 109 } else if (c >= 'a' && c <= 'z') { 110 flags[c] = true; 111 } else if (c >= '0' && c <= '9') { 112 flags[c] = true; 113 } 114 } 115 // identifierFlags['`'] = true; 116 flags['_'] = true; 117 //identifierFlags['-'] = true; // mysql 118 119 return flags; 120 } 121 122 public this(char[] input, int inputLength, bool skipComment){ 123 dbType = DBType.MYSQL.name; 124 125 super(input, inputLength, skipComment); 126 super.keywods = DEFAULT_MYSQL_KEYWORDS; 127 } 128 129 public this(string input){ 130 this(input, true, true); 131 } 132 133 public this(string input, SQLParserFeature[] features...){ 134 dbType = DBType.MYSQL.name; 135 136 super(input, true); 137 this.keepComments = true; 138 super.keywods = DEFAULT_MYSQL_KEYWORDS; 139 140 foreach(SQLParserFeature feature ; features) { 141 config(feature, true); 142 } 143 } 144 145 public this(string input, bool skipComment, bool keepComments){ 146 dbType = DBType.MYSQL.name; 147 148 super(input, skipComment); 149 this.skipComment = skipComment; 150 this.keepComments = keepComments; 151 super.keywods = DEFAULT_MYSQL_KEYWORDS; 152 } 153 154 override public void scanSharp() { 155 if (ch != '#') { 156 throw new ParserException("illegal stat. " ~ info()); 157 } 158 159 if (charAt(_pos + 1) == '{') { 160 scanVariable(); 161 return; 162 } 163 164 Token lastToken = this._token; 165 166 scanChar(); 167 _mark = _pos; 168 bufPos = 0; 169 for (;;) { 170 if (ch == '\r') { 171 if (charAt(_pos + 1) == '\n') { 172 bufPos += 2; 173 scanChar(); 174 break; 175 } 176 bufPos++; 177 break; 178 } else if (ch == LayoutCharacters.EOI) { 179 break; 180 } 181 182 if (ch == '\n') { 183 scanChar(); 184 bufPos++; 185 break; 186 } 187 188 scanChar(); 189 bufPos++; 190 } 191 192 _stringVal = subString(_mark - 1, bufPos + 1); 193 _token = Token.LINE_COMMENT; 194 commentCount++; 195 if (keepComments) { 196 addComment(_stringVal); 197 } 198 199 if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) { 200 return; 201 } 202 203 endOfComment = isEOF(); 204 205 if (!isAllowComment() && (isEOF() || !isSafeComment(_stringVal))) { 206 throw new NotAllowCommentException(); 207 } 208 } 209 210 override public void scanVariable() { 211 if (ch != ':' && ch != '#' && ch != '$') { 212 throw new ParserException("illegal variable. " ~ info()); 213 } 214 215 _mark = _pos; 216 bufPos = 1; 217 218 if (charAt(_pos + 1) == '`') { 219 ++_pos; 220 ++bufPos; 221 char ch; 222 for (;;) { 223 ch = charAt(++_pos); 224 225 if (ch == '`') { 226 bufPos++; 227 ch = charAt(++_pos); 228 break; 229 } else if (ch == LayoutCharacters.EOI) { 230 throw new ParserException("illegal identifier. " ~ info()); 231 } 232 233 bufPos++; 234 continue; 235 } 236 237 this.ch = charAt(_pos); 238 239 _stringVal = subString(_mark, bufPos); 240 _token = Token.VARIANT; 241 } else if (charAt(_pos + 1) == '{') { 242 ++_pos; 243 ++bufPos; 244 char ch; 245 for (;;) { 246 ch = charAt(++_pos); 247 248 if (ch == '}') { 249 bufPos++; 250 ch = charAt(++_pos); 251 break; 252 } else if (ch == LayoutCharacters.EOI) { 253 throw new ParserException("illegal identifier. " ~ info()); 254 } 255 256 bufPos++; 257 continue; 258 } 259 260 this.ch = charAt(_pos); 261 262 _stringVal = subString(_mark, bufPos); 263 _token = Token.VARIANT; 264 } else { 265 for (;;) { 266 ch = charAt(++_pos); 267 268 if (!isIdentifierChar(ch)) { 269 break; 270 } 271 272 bufPos++; 273 continue; 274 } 275 } 276 277 this.ch = charAt(_pos); 278 279 _stringVal = subString(_mark, bufPos); 280 _token = Token.VARIANT; 281 } 282 283 override protected void scanVariable_at() { 284 if (ch != '@') { 285 throw new ParserException("illegal variable. " ~ info()); 286 } 287 288 _mark = _pos; 289 bufPos = 1; 290 291 if (charAt(_pos + 1) == '@') { 292 ch = charAt(++_pos); 293 bufPos++; 294 } 295 296 if (charAt(_pos + 1) == '`') { 297 ++_pos; 298 ++bufPos; 299 char ch; 300 for (;;) { 301 ch = charAt(++_pos); 302 303 if (ch == '`') { 304 bufPos++; 305 ++_pos; 306 break; 307 } else if (ch == LayoutCharacters.EOI) { 308 throw new ParserException("illegal identifier. " ~ info()); 309 } 310 311 bufPos++; 312 continue; 313 } 314 315 this.ch = charAt(_pos); 316 317 _stringVal = subString(_mark, bufPos); 318 _token = Token.VARIANT; 319 } else { 320 for (; ; ) { 321 ch = charAt(++_pos); 322 323 if (!isIdentifierChar(ch)) { 324 break; 325 } 326 327 bufPos++; 328 continue; 329 } 330 } 331 332 this.ch = charAt(_pos); 333 334 _stringVal = subString(_mark, bufPos); 335 _token = Token.VARIANT; 336 } 337 338 override public void scanIdentifier() { 339 _hash_lower = 0; 340 hash = 0; 341 342 char first = ch; 343 344 if (ch == 'b' 345 && charAt(_pos + 1) == '\'') { 346 int i = 2; 347 int _mark = _pos + 2; 348 for (;;++i) { 349 char ch = charAt(_pos + i); 350 if (ch == '0' || ch == '1') { 351 continue; 352 } else if (ch == '\'') { 353 bufPos += i; 354 _pos += (i + 1); 355 _stringVal = subString(_mark, i - 2); 356 this.ch = charAt(_pos); 357 _token = Token.BITS; 358 return; 359 } else if (ch == LayoutCharacters.EOI) { 360 throw new ParserException("illegal identifier. " ~ info()); 361 } else { 362 break; 363 } 364 } 365 } 366 367 if (ch == '`') { 368 _mark = _pos; 369 bufPos = 1; 370 char ch; 371 372 int startPos = _pos + 1; 373 int quoteIndex = cast(int)indexOf(text, '`',startPos); 374 if (quoteIndex == -1) { 375 throw new ParserException("illegal identifier. " ~ info()); 376 } 377 378 _hash_lower = 0xcbf29ce484222325L; 379 hash = 0xcbf29ce484222325L; 380 381 for (int i = startPos; i < quoteIndex; ++i) { 382 ch = hunt_charAt(text, i); 383 384 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 385 _hash_lower *= 0x100000001b3L; 386 387 hash ^= ch; 388 hash *= 0x100000001b3L; 389 } 390 391 _stringVal = quoteTable.addSymbol(text, _pos, quoteIndex + 1 - _pos, hash); 392 //_stringVal = text.substring(_mark, _pos); 393 _pos = quoteIndex + 1; 394 this.ch = charAt(_pos); 395 _token = Token.IDENTIFIER; 396 } else { 397 bool firstFlag = CharTypes.isFirstIdentifierChar(first); 398 if (!firstFlag) { 399 throw new ParserException("illegal identifier. " ~ info()); 400 } 401 402 _hash_lower = 0xcbf29ce484222325L; 403 hash = 0xcbf29ce484222325L; 404 405 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 406 _hash_lower *= 0x100000001b3L; 407 408 hash ^= ch; 409 hash *= 0x100000001b3L; 410 411 _mark = _pos; 412 bufPos = 1; 413 char ch = '\0'; 414 for (;;) { 415 ch = charAt(++_pos); 416 417 if (!isIdentifierChar(ch)) { 418 break; 419 } 420 421 bufPos++; 422 423 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 424 _hash_lower *= 0x100000001b3L; 425 426 hash ^= ch; 427 hash *= 0x100000001b3L; 428 429 continue; 430 } 431 432 this.ch = charAt(_pos); 433 434 if (bufPos == 1) { 435 _token = Token.IDENTIFIER; 436 _stringVal = CharTypes.valueOf(first); 437 if (_stringVal is null) { 438 _stringVal = to!string(first); 439 } 440 return; 441 } 442 443 Token tok = keywods.getKeyword(_hash_lower); 444 if (tok !is null) { 445 _token = tok; 446 if (_token == Token.IDENTIFIER) { 447 _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash); 448 } else { 449 _stringVal = null; 450 } 451 } else { 452 _token = Token.IDENTIFIER; 453 _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash); 454 } 455 456 } 457 } 458 459 460 461 override protected void scanString() { 462 scanString2(); 463 } 464 465 public void skipFirstHintsOrMultiCommentAndNextToken() { 466 int starIndex = _pos + 2; 467 468 for (;;) { 469 starIndex =cast(int)indexOf(text,'*', starIndex); 470 if (starIndex == -1 || starIndex == text.length - 1) { 471 this._token = Token.ERROR; 472 return; 473 } 474 475 int slashIndex = starIndex + 1; 476 if (charAt(slashIndex) == '/') { 477 _pos = slashIndex + 1; 478 ch = hunt_charAt(text, _pos); 479 if (_pos < text.length - 6) { 480 int pos_6 = _pos + 6; 481 char c0 = ch; 482 char c1 = hunt_charAt(text, _pos + 1); 483 char c2 = hunt_charAt(text, _pos + 2); 484 char c3 = hunt_charAt(text, _pos + 3); 485 char c4 = hunt_charAt(text, _pos + 4); 486 char c5 = hunt_charAt(text, _pos + 5); 487 char c6 = hunt_charAt(text, pos_6); 488 if (c0 == 's' && c1 == 'e' && c2 == 'l' && c3 == 'e' && c4 == 'c' && c5 == 't' && c6 == ' ') { 489 this.comments = null; 490 reset(pos_6, ' ', Token.SELECT); 491 return; 492 } 493 494 if (c0 == 'i' && c1 == 'n' && c2 == 's' && c3 == 'e' && c4 == 'r' && c5 == 't' && c6 == ' ') { 495 this.comments = null; 496 reset(pos_6, ' ', Token.INSERT); 497 return; 498 } 499 500 if (c0 == 'u' && c1 == 'p' && c2 == 'd' && c3 == 'a' && c4 == 't' && c5 == 'e' && c6 == ' ') { 501 this.comments = null; 502 reset(pos_6, ' ', Token.UPDATE); 503 return; 504 } 505 506 507 if (c0 == 'd' && c1 == 'e' && c2 == 'l' && c3 == 'e' && c4 == 't' && c5 == 'e' && c6 == ' ') { 508 this.comments = null; 509 reset(pos_6, ' ', Token.DELETE); 510 return; 511 } 512 513 if (c0 == 'S' && c1 == 'E' && c2 == 'L' && c3 == 'E' && c4 == 'C' && c5 == 'T' && c6 == ' ') { 514 this.comments = null; 515 reset(pos_6, ' ', Token.SELECT); 516 return; 517 } 518 519 if (c0 == 'I' && c1 == 'N' && c2 == 'S' && c3 == 'E' && c4 == 'R' && c5 == 'T' && c6 == ' ') { 520 this.comments = null; 521 reset(pos_6, ' ', Token.INSERT); 522 return; 523 } 524 525 if (c0 == 'U' && c1 == 'P' && c2 == 'D' && c3 == 'A' && c4 == 'T' && c5 == 'E' && c6 == ' ') { 526 this.comments = null; 527 reset(pos_6, ' ', Token.UPDATE); 528 return; 529 } 530 531 if (c0 == 'D' && c1 == 'E' && c2 == 'L' && c3 == 'E' && c4 == 'T' && c5 == 'E' && c6 == ' ') { 532 this.comments = null; 533 reset(pos_6, ' ', Token.DELETE); 534 return; 535 } 536 537 nextToken(); 538 return; 539 } else { 540 nextToken(); 541 return; 542 } 543 } 544 starIndex++; 545 } 546 } 547 548 override public void scanComment() { 549 Token lastToken = this._token; 550 551 if (ch == '-') { 552 char next_2 = charAt(_pos + 2); 553 if (isDigit(next_2)) { 554 scanChar(); 555 _token = Token.SUB; 556 return; 557 } 558 } else if (ch != '/') { 559 throw new Exception("IllegalState"); 560 } 561 562 _mark = _pos; 563 bufPos = 0; 564 scanChar(); 565 566 // /*+ */ 567 if (ch == '*') { 568 scanChar(); 569 bufPos++; 570 571 while (ch == ' ') { 572 scanChar(); 573 bufPos++; 574 } 575 576 bool isHint = false; 577 int startHintSp = bufPos + 1; 578 if (ch == '!' // 579 || ch == '+' // oceanbase hints 580 ) { 581 isHint = true; 582 scanChar(); 583 bufPos++; 584 } 585 586 int starIndex = _pos; 587 588 for (;;) { 589 starIndex = cast(int)indexOf(text,'*', starIndex); 590 if (starIndex == -1 || starIndex == text.length - 1) { 591 this._token = Token.ERROR; 592 return; 593 } 594 if (charAt(starIndex + 1) == '/') { 595 if (isHint) { 596 //_stringVal = subString(_mark + startHintSp, (bufPos - startHintSp) - 2); 597 _stringVal = this.subString(_mark + startHintSp, starIndex - startHintSp - _mark); 598 _token = Token.HINT; 599 } else { 600 if (!optimizedForParameterized) { 601 _stringVal = this.subString(_mark, starIndex + 2 - _mark); 602 } 603 _token = Token.MULTI_LINE_COMMENT; 604 commentCount++; 605 if (keepComments) { 606 addComment(_stringVal); 607 } 608 } 609 _pos = starIndex + 2; 610 ch = charAt(_pos); 611 break; 612 } 613 starIndex++; 614 } 615 616 endOfComment = isEOF(); 617 618 if (commentHandler !is null 619 && commentHandler.handle(lastToken, _stringVal)) { 620 return; 621 } 622 623 if (!isHint && !isAllowComment() && !isSafeComment(_stringVal)) { 624 throw new NotAllowCommentException(); 625 } 626 627 return; 628 } 629 630 if (ch == '/' || ch == '-') { 631 scanChar(); 632 bufPos++; 633 634 for (;;) { 635 if (ch == '\r') { 636 if (charAt(_pos + 1) == '\n') { 637 bufPos += 2; 638 scanChar(); 639 break; 640 } 641 bufPos++; 642 break; 643 } else if (ch == LayoutCharacters.EOI) { 644 break; 645 } 646 647 if (ch == '\n') { 648 scanChar(); 649 bufPos++; 650 break; 651 } 652 653 scanChar(); 654 bufPos++; 655 } 656 657 _stringVal = subString(_mark, bufPos); 658 _token = Token.LINE_COMMENT; 659 commentCount++; 660 if (keepComments) { 661 addComment(_stringVal); 662 } 663 664 if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) { 665 return; 666 } 667 668 endOfComment = isEOF(); 669 670 if (!isAllowComment() && (isEOF() || !isSafeComment(_stringVal))) { 671 throw new NotAllowCommentException(); 672 } 673 674 return; 675 } 676 } 677 678 // static { 679 680 // } 681 682 public static bool isIdentifierChar(char c) { 683 if (c <= identifierFlags.length) { 684 return identifierFlags[c]; 685 } 686 return c != ' ' && c != ','; 687 } 688 689 override public void scanNumber() { 690 _mark = _pos; 691 692 if (ch == '0' && charAt(_pos + 1) == 'b') { 693 int i = 2; 694 int _mark = _pos + 2; 695 for (;;++i) { 696 char ch = charAt(_pos + i); 697 if (ch == '0' || ch == '1') { 698 continue; 699 } else if (ch >= '2' && ch <= '9') { 700 break; 701 } else { 702 bufPos += i; 703 _pos += i; 704 _stringVal = subString(_mark, i - 2); 705 this.ch = charAt(_pos); 706 _token = Token.BITS; 707 return; 708 } 709 } 710 } 711 712 if (ch == '-') { 713 bufPos++; 714 ch = charAt(++_pos); 715 } 716 717 for (;;) { 718 if (ch >= '0' && ch <= '9') { 719 bufPos++; 720 } else { 721 break; 722 } 723 ch = charAt(++_pos); 724 } 725 726 bool isDouble = false; 727 728 if (ch == '.') { 729 if (charAt(_pos + 1) == '.') { 730 _token = Token.LITERAL_INT; 731 return; 732 } 733 bufPos++; 734 ch = charAt(++_pos); 735 isDouble = true; 736 737 for (;;) { 738 if (ch >= '0' && ch <= '9') { 739 bufPos++; 740 } else { 741 break; 742 } 743 ch = charAt(++_pos); 744 } 745 } 746 747 if (ch == 'e' || ch == 'E') { 748 bufPos++; 749 ch = charAt(++_pos); 750 751 if (ch == '+' || ch == '-') { 752 bufPos++; 753 ch = charAt(++_pos); 754 } 755 756 for (;;) { 757 if (ch >= '0' && ch <= '9') { 758 bufPos++; 759 } else { 760 break; 761 } 762 ch = charAt(++_pos); 763 } 764 765 isDouble = true; 766 } 767 768 if (isDouble) { 769 _token = Token.LITERAL_FLOAT; 770 } else { 771 if (CharTypes.isFirstIdentifierChar(ch) && !(ch == 'b' && bufPos == 1 && charAt(_pos - 1) == '0')) { 772 bufPos++; 773 for (;;) { 774 ch = charAt(++_pos); 775 776 if (!isIdentifierChar(ch)) { 777 break; 778 } 779 780 bufPos++; 781 continue; 782 } 783 784 _stringVal = addSymbol(); 785 _token = Token.IDENTIFIER; 786 } else { 787 _token = Token.LITERAL_INT; 788 } 789 } 790 } 791 }