1 module hunt.sql.parser.Lexer; 2 3 /* 4 * Copyright 2015-2018 HuntLabs.cn 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 import hunt.sql.ast.expr.SQLNumberExpr; 20 import hunt.sql.dialect.mysql.parser.MySqlLexer; 21 import hunt.sql.parser.SymbolTable; 22 import hunt.sql.parser.CharTypes; 23 import hunt.sql.parser.LayoutCharacters; 24 import hunt.sql.parser.Token; 25 import hunt.sql.parser.Keywords; 26 import hunt.sql.parser.SQLParserFeature; 27 import hunt.sql.util.DBType; 28 import hunt.sql.parser.ParserException; 29 import hunt.sql.util.FnvHash; 30 import hunt.sql.util.Utils; 31 import hunt.Exceptions; 32 import hunt.sql.parser.SQLParserUtils; 33 34 import hunt.collection; 35 import std.string; 36 import std.bigint; 37 import std.uni; 38 import std.conv; 39 import std.algorithm.mutation; 40 import hunt.Number; 41 import hunt.Long; 42 import hunt.Integer; 43 import hunt.String; 44 import hunt.math; 45 import hunt.text; 46 47 import std.concurrency : initOnce; 48 49 public class Lexer { 50 // public static SymbolTable symbols_l2; 51 52 static SymbolTable symbols_l2() { 53 __gshared SymbolTable inst; 54 return initOnce!inst(new SymbolTable(512)); 55 } 56 57 static int[] digits() { 58 __gshared int[] inst; 59 return initOnce!inst(initDigits); 60 } 61 62 63 // public static int[] digits = new int[cast(int) '9' + 1]; 64 65 // static this(){ 66 // symbols_l2 = new SymbolTable(512); 67 // for (int i = '0'; i <= '9'; ++i) { 68 // digits[i] = i - '0'; 69 // } 70 // } 71 72 private static int[] initDigits() { 73 int[] r = new int[cast(int) '9' + 1]; 74 for (int i = '0'; i <= '9'; ++i) 75 { 76 r[i] = i - '0'; 77 } 78 79 return r; 80 } 81 82 protected int features = 0; //SQLParserFeature.of(SQLParserFeature.EnableSQLBinaryOpExprGroup); 83 public string text; 84 protected int _pos; 85 public int _mark; 86 87 protected char ch; 88 89 protected char[] buf; 90 protected int bufPos; 91 92 public Token _token; 93 94 protected Keywords keywods; 95 96 public string _stringVal; 97 protected long _hash_lower; // fnv1a_64 98 protected long hash; 99 100 public int commentCount = 0; 101 public List!string comments = null; 102 protected bool skipComment = true; 103 private SavePoint savePoint = null; 104 105 /* 106 * anti sql injection 107 */ 108 private bool allowComment = true; 109 private int varIndex = -1; 110 protected CommentHandler commentHandler; 111 protected bool endOfComment = false; 112 protected bool keepComments = false; 113 protected int line = 0; 114 protected int lines = 0; 115 public string dbType; 116 117 protected bool optimizedForParameterized = false; 118 119 private int startPos; 120 private int _posLine; 121 private int _posColumn; 122 123 public this(string input){ 124 this(input, null); 125 } 126 127 public this(string input, CommentHandler commentHandler){ 128 this(input, true); 129 this.commentHandler = commentHandler; 130 } 131 132 public this(string input, CommentHandler commentHandler, string dbType){ 133 this(input, true); 134 this.commentHandler = commentHandler; 135 this.dbType = dbType; 136 137 if (DBType.SQLITE == dbType) { 138 this.keywods = Keywords.SQLITE_KEYWORDS; 139 } 140 } 141 142 public bool isKeepComments() { 143 return keepComments; 144 } 145 146 public void setKeepComments(bool keepComments) { 147 this.keepComments = keepComments; 148 } 149 150 public CommentHandler getCommentHandler() { 151 return commentHandler; 152 } 153 154 public void setCommentHandler(CommentHandler commentHandler) { 155 this.commentHandler = commentHandler; 156 } 157 158 public char charAt(int index) { 159 if (index >= text.length) { 160 return LayoutCharacters.EOI; 161 } 162 163 return .charAt(text,index); 164 } 165 166 public string addSymbol() { 167 return subString(_mark, bufPos); 168 } 169 170 public string subString(int offset, int count) { 171 return text.substring(offset, offset + count); 172 } 173 174 public char[] sub_chars(int offset, int count) { 175 // char[] chars = new char[count]; 176 // text.getChars(offset, offset + count, chars, 0); 177 return cast(char[])text[offset .. offset + count]; 178 } 179 180 protected void initBuff(int size) { 181 if (buf is null) { 182 if (size < 32) { 183 buf = new char[32]; 184 } else { 185 buf = new char[size + 32]; 186 } 187 } else if (buf.length < size) { 188 //buf = Arrays.copyOf(buf, size); 189 auto tmp = new char[size]; 190 buf.copy(tmp); 191 buf = tmp.dup; 192 } 193 } 194 195 public void arraycopy(int srcPos, char[] dest, int destPos, int length) { 196 //text.getChars(srcPos, srcPos + length, dest, destPos); 197 dest[destPos .. destPos+length] = cast(char[])text[srcPos .. srcPos + length].dup; 198 } 199 200 public bool isAllowComment() { 201 return allowComment; 202 } 203 204 public void setAllowComment(bool allowComment) { 205 this.allowComment = allowComment; 206 } 207 208 public int nextVarIndex() { 209 return ++varIndex; 210 } 211 212 public static class SavePoint { 213 int bp; 214 int sp; 215 int np; 216 char ch; 217 long hash; 218 long _hash_lower; 219 public Token _token; 220 string _stringVal; 221 } 222 223 public Keywords getKeywods() { 224 return keywods; 225 } 226 227 public SavePoint mark() { 228 SavePoint savePoint = new SavePoint(); 229 savePoint.bp = _pos; 230 savePoint.sp = bufPos; 231 savePoint.np = _mark; 232 savePoint.ch = ch; 233 savePoint._token = _token; 234 savePoint._stringVal = _stringVal; 235 savePoint.hash = hash; 236 savePoint._hash_lower = _hash_lower; 237 return this.savePoint = savePoint; 238 } 239 240 public void reset(SavePoint savePoint) { 241 this._pos = savePoint.bp; 242 this.bufPos = savePoint.sp; 243 this._mark = savePoint.np; 244 this.ch = savePoint.ch; 245 this._token = savePoint._token; 246 this._stringVal = savePoint._stringVal; 247 this.hash = savePoint.hash; 248 this._hash_lower = savePoint._hash_lower; 249 } 250 251 public void reset() { 252 this.reset(this.savePoint); 253 } 254 255 public void reset(int _pos) { 256 this._pos = _pos; 257 this.ch = charAt(_pos); 258 } 259 260 public this(string input, bool skipComment){ 261 this.skipComment = skipComment; 262 this.keywods = Keywords.DEFAULT_KEYWORDS; 263 this.text = input; 264 this._pos = 0; 265 ch = charAt(_pos); 266 } 267 268 public this(char[] input, int inputLength, bool skipComment){ 269 this(cast(string)input[0..inputLength], skipComment); 270 } 271 272 protected void scanChar() { 273 ch = charAt(++_pos); 274 } 275 276 protected void unscan() { 277 ch = charAt(--_pos); 278 } 279 280 public bool isEOF() { 281 return _pos >= text.length; 282 } 283 284 /** 285 * Report an error at the given _position using the provided arguments. 286 */ 287 protected void lexError(string key, Object[] args...) { 288 _token = Token.ERROR; 289 } 290 291 /** 292 * Return the current _token, set by nextToken(). 293 */ 294 public Token token() { 295 return _token; 296 } 297 298 public string getDbType() { 299 return this.dbType; 300 } 301 302 public string info() { 303 int line = 1; 304 int column = 1; 305 for (int i = 0; i < startPos; ++i, column++) { 306 char ch = .charAt(text, i); 307 if (ch == '\n') { 308 column = 1; 309 line++; 310 } 311 } 312 313 this._posLine = line; 314 this._posColumn = _posColumn; 315 316 StringBuilder buf = new StringBuilder(); 317 buf 318 .append("_pos ") 319 .append(_pos) 320 .append(", line ") 321 .append(line) 322 .append(", column ") 323 .append(column) 324 .append(", _token ") 325 .append(_token); 326 327 if (_token == Token.IDENTIFIER || _token == Token.LITERAL_ALIAS || _token == Token.LITERAL_CHARS) { 328 buf.append(" ").append(_stringVal); 329 } 330 331 return buf.toString(); 332 } 333 334 public void nextTokenComma() { 335 if (ch == ' ') { 336 scanChar(); 337 } 338 339 if (ch == ',' || ch == ',') { 340 scanChar(); 341 _token = Token.COMMA; 342 return; 343 } 344 345 if (ch == ')' || ch == ')') { 346 scanChar(); 347 _token = Token.RPAREN; 348 return; 349 } 350 351 if (ch == '.') { 352 scanChar(); 353 _token = Token.DOT; 354 return; 355 } 356 357 if (ch == 'a' || ch == 'A') { 358 char ch_next = charAt(_pos + 1); 359 if (ch_next == 's' || ch_next == 'S') { 360 char ch_next_2 = charAt(_pos + 2); 361 if (ch_next_2 == ' ') { 362 _pos += 2; 363 ch = ' '; 364 _token = Token.AS; 365 _stringVal = "AS"; 366 return; 367 } 368 } 369 } 370 371 nextToken(); 372 } 373 374 public void nextTokenCommaValue() { 375 if (ch == ' ') { 376 scanChar(); 377 } 378 379 if (ch == ',' || ch == ',') { 380 scanChar(); 381 _token = Token.COMMA; 382 return; 383 } 384 385 if (ch == ')' || ch == ')') { 386 scanChar(); 387 _token = Token.RPAREN; 388 return; 389 } 390 391 if (ch == '.') { 392 scanChar(); 393 _token = Token.DOT; 394 return; 395 } 396 397 if (ch == 'a' || ch == 'A') { 398 char ch_next = charAt(_pos + 1); 399 if (ch_next == 's' || ch_next == 'S') { 400 char ch_next_2 = charAt(_pos + 2); 401 if (ch_next_2 == ' ') { 402 _pos += 2; 403 ch = ' '; 404 _token = Token.AS; 405 _stringVal = "AS"; 406 return; 407 } 408 } 409 } 410 411 nextTokenValue(); 412 } 413 414 public void nextTokenEq() { 415 if (ch == ' ') { 416 scanChar(); 417 } 418 419 if (ch == '=') { 420 scanChar(); 421 _token = Token.EQ; 422 return; 423 } 424 425 if (ch == '.') { 426 scanChar(); 427 _token = Token.DOT; 428 return; 429 } 430 431 if (ch == 'a' || ch == 'A') { 432 char ch_next = charAt(_pos + 1); 433 if (ch_next == 's' || ch_next == 'S') { 434 char ch_next_2 = charAt(_pos + 2); 435 if (ch_next_2 == ' ') { 436 _pos += 2; 437 ch = ' '; 438 _token = Token.AS; 439 _stringVal = "AS"; 440 return; 441 } 442 } 443 } 444 445 nextToken(); 446 } 447 448 public void nextTokenLParen() { 449 if (ch == ' ') { 450 scanChar(); 451 } 452 453 if (ch == '(' || ch == '(') { 454 scanChar(); 455 _token = Token.LPAREN; 456 return; 457 } 458 nextToken(); 459 } 460 461 public void nextTokenValue() { 462 this.startPos = _pos; 463 if (ch == ' ') { 464 scanChar(); 465 } 466 467 if (ch == '\'') { 468 bufPos = 0; 469 scanString(); 470 return; 471 } 472 473 if (ch == '"') { 474 bufPos = 0; 475 scanString2_d(); 476 return; 477 } 478 479 if (ch == '0') { 480 bufPos = 0; 481 if (charAt(_pos + 1) == 'x') { 482 scanChar(); 483 scanChar(); 484 scanHexaDecimal(); 485 } else { 486 scanNumber(); 487 } 488 return; 489 } 490 491 if (ch > '0' && ch <= '9') { 492 bufPos = 0; 493 scanNumber(); 494 return; 495 } 496 497 if (ch == '?') { 498 scanChar(); 499 _token = Token.QUES; 500 return; 501 } 502 503 if (ch == 'n' || ch == 'N') { 504 char c1 = 0, c2, c3, c4; 505 if (_pos + 4 < text.length 506 && ((c1 = .charAt(text, _pos + 1)) == 'u' || c1 == 'U') 507 && ((c2 = .charAt(text, _pos + 2)) == 'l' || c2 == 'L') 508 && ((c3 = .charAt(text, _pos + 3)) == 'l' || c3 == 'L') 509 && (CharTypes.isWhitespace(c4 = .charAt(text, _pos + 4)) || c4 == ',' || c4 == ')')) { 510 _pos += 4; 511 ch = c4; 512 _token = Token.NULL; 513 _stringVal = "NULL"; 514 return; 515 } 516 517 if (c1 == '\'') { 518 bufPos = 0; 519 ++_pos; 520 ch = '\''; 521 scanString(); 522 _token = Token.LITERAL_NCHARS; 523 return; 524 } 525 } 526 527 if (ch == ')') { 528 scanChar(); 529 _token = Token.RPAREN; 530 return; 531 } 532 533 if (CharTypes.isFirstIdentifierChar(ch)) { 534 scanIdentifier(); 535 return; 536 } 537 538 nextToken(); 539 } 540 541 public void nextTokenBy() { 542 while (ch == ' ') { 543 scanChar(); 544 } 545 546 if (ch == 'b' || ch == 'B') { 547 char ch_next = charAt(_pos + 1); 548 if (ch_next == 'y' || ch_next == 'Y') { 549 char ch_next_2 = charAt(_pos + 2); 550 if (ch_next_2 == ' ') { 551 _pos += 2; 552 ch = ' '; 553 _token = Token.BY; 554 _stringVal = "BY"; 555 return; 556 } 557 } 558 } 559 560 nextToken(); 561 } 562 563 public void nextTokenNotOrNull() { 564 while (ch == ' ') { 565 scanChar(); 566 } 567 568 569 if ((ch == 'n' || ch == 'N') && _pos + 3 < text.length) { 570 char c1 = .charAt(text, _pos + 1); 571 char c2 = .charAt(text, _pos + 2); 572 char c3 = .charAt(text, _pos + 3); 573 574 if ((c1 == 'o' || c1 == 'O') 575 && (c2 == 't' || c2 == 'T') 576 && CharTypes.isWhitespace(c3)) { 577 _pos += 3; 578 ch = c3; 579 _token = Token.NOT; 580 _stringVal = "NOT"; 581 return; 582 } 583 584 char c4; 585 if (_pos + 4 < text.length 586 && (c1 == 'u' || c1 == 'U') 587 && (c2 == 'l' || c2 == 'L') 588 && (c3 == 'l' || c3 == 'L') 589 && CharTypes.isWhitespace(c4 = .charAt(text, _pos + 4))) { 590 _pos += 4; 591 ch = c4; 592 _token = Token.NULL; 593 _stringVal = "NULL"; 594 return; 595 } 596 } 597 598 nextToken(); 599 } 600 601 public void nextTokenIdent() { 602 while (ch == ' ') { 603 scanChar(); 604 } 605 606 if (CharTypes.isFirstIdentifierChar(ch)) { 607 scanIdentifier(); 608 return; 609 } 610 611 if (ch == ')') { 612 scanChar(); 613 _token = Token.RPAREN; 614 return; 615 } 616 617 nextToken(); 618 } 619 620 public void nextToken() { 621 startPos = _pos; 622 bufPos = 0; 623 if (comments !is null && comments.size() > 0) { 624 comments = null; 625 } 626 627 this.lines = 0; 628 int startLine = line; 629 630 for (;;) { 631 if (CharTypes.isWhitespace(ch)) { 632 if (ch == '\n') { 633 line++; 634 635 lines = line - startLine; 636 } 637 638 ch = charAt(++_pos); 639 continue; 640 } 641 642 if (ch == '$' && charAt(_pos + 1) == '{') { 643 scanVariable(); 644 return; 645 } 646 647 if (CharTypes.isFirstIdentifierChar(ch)) { 648 if (ch == '(') { 649 scanChar(); 650 _token = Token.LPAREN; 651 return; 652 } else if (ch == ')') { 653 scanChar(); 654 _token = Token.RPAREN; 655 return; 656 } 657 658 if (ch == 'N' || ch == 'n') { 659 if (charAt(_pos + 1) == '\'') { 660 ++_pos; 661 ch = '\''; 662 scanString(); 663 _token = Token.LITERAL_NCHARS; 664 return; 665 } 666 } 667 668 scanIdentifier(); 669 return; 670 } 671 672 switch (ch) { 673 case '0': 674 if (charAt(_pos + 1) == 'x') { 675 scanChar(); 676 scanChar(); 677 scanHexaDecimal(); 678 } else { 679 scanNumber(); 680 } 681 return; 682 case '1': 683 case '2': 684 case '3': 685 case '4': 686 case '5': 687 case '6': 688 case '7': 689 case '8': 690 case '9': 691 scanNumber(); 692 return; 693 case ',': 694 case ',': 695 scanChar(); 696 _token = Token.COMMA; 697 return; 698 case '(': 699 case '(': 700 scanChar(); 701 _token = Token.LPAREN; 702 return; 703 case ')': 704 case ')': 705 scanChar(); 706 _token = Token.RPAREN; 707 return; 708 case '[': 709 scanLBracket(); 710 return; 711 case ']': 712 scanChar(); 713 _token = Token.RBRACKET; 714 return; 715 case '{': 716 scanChar(); 717 _token = Token.LBRACE; 718 return; 719 case '}': 720 scanChar(); 721 _token = Token.RBRACE; 722 return; 723 case ':': 724 scanChar(); 725 if (ch == '=') { 726 scanChar(); 727 _token = Token.COLONEQ; 728 } else if (ch == ':') { 729 scanChar(); 730 _token = Token.COLONCOLON; 731 } else { 732 unscan(); 733 scanVariable(); 734 } 735 return; 736 case '#': 737 scanSharp(); 738 if ((_token == Token.LINE_COMMENT || _token == Token.MULTI_LINE_COMMENT) && skipComment) { 739 bufPos = 0; 740 continue; 741 } 742 return; 743 case '.': 744 scanChar(); 745 if (isDigit(ch) && !CharTypes.isFirstIdentifierChar(charAt(pos - 2))) { 746 unscan(); 747 scanNumber(); 748 return; 749 } else if (ch == '.') { 750 scanChar(); 751 if (ch == '.') { 752 scanChar(); 753 _token = Token.DOTDOTDOT; 754 } else { 755 _token = Token.DOTDOT; 756 } 757 } else { 758 _token = Token.DOT; 759 } 760 return; 761 case '\'': 762 scanString(); 763 return; 764 case '\"': 765 scanAlias(); 766 return; 767 case '*': 768 scanChar(); 769 _token = Token.STAR; 770 return; 771 case '?': 772 scanChar(); 773 if (ch == '?' && DBType.POSTGRESQL == dbType) { 774 scanChar(); 775 if (ch == '|') { 776 scanChar(); 777 _token = Token.QUESBAR; 778 } else { 779 _token = Token.QUESQUES; 780 } 781 } else if (ch == '|' && DBType.POSTGRESQL == (dbType)) { 782 scanChar(); 783 if (ch == '|') { 784 unscan(); 785 _token = Token.QUES; 786 } else { 787 _token = Token.QUESBAR; 788 } 789 } else if (ch == '&' && DBType.POSTGRESQL == (dbType)) { 790 scanChar(); 791 _token = Token.QUESAMP; 792 } else { 793 _token = Token.QUES; 794 } 795 return; 796 case ';': 797 scanChar(); 798 _token = Token.SEMI; 799 return; 800 case '`': 801 throw new ParserException("TODO. " ~ info()); // TODO 802 case '@': 803 scanVariable_at(); 804 return; 805 case '-': 806 if (charAt(_pos +1) == '-') { 807 scanComment(); 808 if ((_token == Token.LINE_COMMENT || _token == Token.MULTI_LINE_COMMENT) && skipComment) { 809 bufPos = 0; 810 continue; 811 } 812 } else { 813 scanOperator(); 814 } 815 return; 816 case '/': 817 int nextChar = charAt(_pos + 1); 818 if (nextChar == '/' || nextChar == '*') { 819 scanComment(); 820 if ((_token == Token.LINE_COMMENT || _token == Token.MULTI_LINE_COMMENT) && skipComment) { 821 bufPos = 0; 822 continue; 823 } 824 } else { 825 _token = Token.SLASH; 826 scanChar(); 827 } 828 return; 829 default: 830 if (isAlpha(ch)) { 831 scanIdentifier(); 832 return; 833 } 834 835 if (isOperator(ch)) { 836 scanOperator(); 837 return; 838 } 839 840 if (ch == '\\' && charAt(_pos + 1) == 'N' 841 && DBType.MYSQL == (dbType)) { 842 scanChar(); 843 scanChar(); 844 _token = Token.NULL; 845 return; 846 } 847 848 // QS_TODO ? 849 if (isEOF()) { // JLS 850 _token = Token.EOF; 851 } else { 852 //lexError("illegal.char", String.valueOf((int) ch)); 853 scanChar(); 854 } 855 856 return; 857 } 858 } 859 860 } 861 862 protected void scanLBracket() { 863 scanChar(); 864 _token = Token.LBRACKET; 865 } 866 867 private void scanOperator() { 868 switch (ch) { 869 case '+': 870 scanChar(); 871 _token = Token.PLUS; 872 break; 873 case '-': 874 scanChar(); 875 if (ch == '>') { 876 scanChar(); 877 if (ch == '>') { 878 scanChar(); 879 _token = Token.SUBGTGT; 880 } else { 881 _token = Token.SUBGT; 882 } 883 } else { 884 _token = Token.SUB; 885 } 886 break; 887 case '*': 888 scanChar(); 889 _token = Token.STAR; 890 break; 891 case '/': 892 scanChar(); 893 _token = Token.SLASH; 894 break; 895 case '&': 896 scanChar(); 897 if (ch == '&') { 898 scanChar(); 899 _token = Token.AMPAMP; 900 } else { 901 _token = Token.AMP; 902 } 903 break; 904 case '|': 905 scanChar(); 906 if (ch == '|') { 907 scanChar(); 908 if (ch == '/') { 909 scanChar(); 910 _token = Token.BARBARSLASH; 911 } else { 912 _token = Token.BARBAR; 913 } 914 } else if (ch == '/') { 915 scanChar(); 916 _token = Token.BARSLASH; 917 } else { 918 _token = Token.BAR; 919 } 920 break; 921 case '^': 922 scanChar(); 923 if (ch == '=') { 924 scanChar(); 925 _token = Token.CARETEQ; 926 } else { 927 _token = Token.CARET; 928 } 929 break; 930 case '%': 931 scanChar(); 932 _token = Token.PERCENT; 933 break; 934 case '=': 935 scanChar(); 936 if (ch == '=') { 937 scanChar(); 938 _token = Token.EQEQ; 939 } else if (ch == '>') { 940 scanChar(); 941 _token = Token.EQGT; 942 } else { 943 _token = Token.EQ; 944 } 945 break; 946 case '>': 947 scanChar(); 948 if (ch == '=') { 949 scanChar(); 950 _token = Token.GTEQ; 951 } else if (ch == '>') { 952 scanChar(); 953 _token = Token.GTGT; 954 } else { 955 _token = Token.GT; 956 } 957 break; 958 case '<': 959 scanChar(); 960 if (ch == '=') { 961 scanChar(); 962 if (ch == '>') { 963 _token = Token.LTEQGT; 964 scanChar(); 965 } else { 966 _token = Token.LTEQ; 967 } 968 } else if (ch == '>') { 969 scanChar(); 970 _token = Token.LTGT; 971 } else if (ch == '<') { 972 scanChar(); 973 _token = Token.LTLT; 974 } else if (ch == '@') { 975 scanChar(); 976 _token = Token.LT_MONKEYS_AT; 977 } else if (ch == '-' && charAt(_pos + 1) == '>') { 978 scanChar(); 979 scanChar(); 980 _token = Token.LT_SUB_GT; 981 } else { 982 if (ch == ' ') { 983 char c1 = charAt(_pos + 1); 984 if (c1 == '=') { 985 scanChar(); 986 scanChar(); 987 if (ch == '>') { 988 _token = Token.LTEQGT; 989 scanChar(); 990 } else { 991 _token = Token.LTEQ; 992 } 993 } else if (c1 == '>') { 994 scanChar(); 995 scanChar(); 996 _token = Token.LTGT; 997 } else if (c1 == '<') { 998 scanChar(); 999 scanChar(); 1000 _token = Token.LTLT; 1001 } else if (c1 == '@') { 1002 scanChar(); 1003 scanChar(); 1004 _token = Token.LT_MONKEYS_AT; 1005 } else if (c1 == '-' && charAt(_pos + 2) == '>') { 1006 scanChar(); 1007 scanChar(); 1008 scanChar(); 1009 _token = Token.LT_SUB_GT; 1010 } else { 1011 _token = Token.LT; 1012 } 1013 } else { 1014 _token = Token.LT; 1015 } 1016 } 1017 break; 1018 case '!': 1019 scanChar(); 1020 while (CharTypes.isWhitespace(ch)) { 1021 scanChar(); 1022 } 1023 if (ch == '=') { 1024 scanChar(); 1025 _token = Token.BANGEQ; 1026 } else if (ch == '>') { 1027 scanChar(); 1028 _token = Token.BANGGT; 1029 } else if (ch == '<') { 1030 scanChar(); 1031 _token = Token.BANGLT; 1032 } else if (ch == '!') { 1033 scanChar(); 1034 _token = Token.BANGBANG; // _postsql 1035 } else if (ch == '~') { 1036 scanChar(); 1037 if (ch == '*') { 1038 scanChar(); 1039 _token = Token.BANG_TILDE_STAR; // _postsql 1040 } else { 1041 _token = Token.BANG_TILDE; // _postsql 1042 } 1043 } else { 1044 _token = Token.BANG; 1045 } 1046 break; 1047 case '?': 1048 scanChar(); 1049 _token = Token.QUES; 1050 break; 1051 case '~': 1052 scanChar(); 1053 if (ch == '*') { 1054 scanChar(); 1055 _token = Token.TILDE_STAR; 1056 } else if (ch == '=') { 1057 scanChar(); 1058 _token = Token.TILDE_EQ; // _postsql 1059 } else { 1060 _token = Token.TILDE; 1061 } 1062 break; 1063 default: 1064 throw new ParserException("TODO. " ~ info()); 1065 } 1066 } 1067 1068 protected void scanString() { 1069 _mark = _pos; 1070 bool hasSpecial = false; 1071 Token preToken = this._token; 1072 1073 for (;;) { 1074 if (isEOF()) { 1075 lexError("unclosed.str.lit"); 1076 return; 1077 } 1078 1079 ch = charAt(++_pos); 1080 1081 if (ch == '\'') { 1082 scanChar(); 1083 if (ch != '\'') { 1084 _token = Token.LITERAL_CHARS; 1085 break; 1086 } else { 1087 if (!hasSpecial) { 1088 initBuff(bufPos); 1089 arraycopy(_mark + 1, buf, 0, bufPos); 1090 hasSpecial = true; 1091 } 1092 putChar('\''); 1093 continue; 1094 } 1095 } 1096 1097 if (!hasSpecial) { 1098 bufPos++; 1099 continue; 1100 } 1101 1102 if (bufPos == buf.length) { 1103 putChar(ch); 1104 } else { 1105 buf[bufPos++] = ch; 1106 } 1107 } 1108 1109 if (!hasSpecial) { 1110 if (preToken == Token.AS) { 1111 _stringVal = subString(_mark, bufPos + 2); 1112 } else { 1113 _stringVal = subString(_mark + 1, bufPos); 1114 } 1115 } else { 1116 _stringVal = cast(string)buf[0..bufPos]; 1117 } 1118 } 1119 1120 protected void scanString2() { 1121 { 1122 bool hasSpecial = false; 1123 int startIndex = _pos + 1; 1124 int endIndex = -1; // text.indexOf('\'', startIndex); 1125 for (int i = startIndex; i < text.length; ++i) { 1126 char ch = .charAt(text, i); 1127 if (ch == '\\') { 1128 hasSpecial = true; 1129 continue; 1130 } 1131 if (ch == '\'') { 1132 endIndex = i; 1133 break; 1134 } 1135 } 1136 1137 if (endIndex == -1) { 1138 throw new ParserException("unclosed str. " ~ info()); 1139 } 1140 1141 string _stringVal; 1142 if (_token == Token.AS) { 1143 _stringVal = subString(_pos, endIndex + 1 - _pos); 1144 } else { 1145 _stringVal = subString(startIndex, endIndex - startIndex); 1146 } 1147 // hasSpecial = _stringVal.indexOf('\\') != -1; 1148 1149 if (!hasSpecial) { 1150 this._stringVal = _stringVal; 1151 int _pos = endIndex + 1; 1152 char ch = charAt(_pos); 1153 if (ch != '\'') { 1154 this._pos = _pos; 1155 this.ch = ch; 1156 _token = Token.LITERAL_CHARS; 1157 return; 1158 } 1159 } 1160 } 1161 1162 _mark = _pos; 1163 bool hasSpecial = false; 1164 for (;;) { 1165 if (isEOF()) { 1166 lexError("unclosed.str.lit"); 1167 return; 1168 } 1169 1170 ch = charAt(++_pos); 1171 1172 if (ch == '\\') { 1173 scanChar(); 1174 if (!hasSpecial) { 1175 initBuff(bufPos); 1176 arraycopy(_mark + 1, buf, 0, bufPos); 1177 hasSpecial = true; 1178 } 1179 1180 switch (ch) { 1181 case '0': 1182 putChar('\0'); 1183 break; 1184 case '\'': 1185 putChar('\''); 1186 break; 1187 case '"': 1188 putChar('"'); 1189 break; 1190 case 'b': 1191 putChar('\b'); 1192 break; 1193 case 'n': 1194 putChar('\n'); 1195 break; 1196 case 'r': 1197 putChar('\r'); 1198 break; 1199 case 't': 1200 putChar('\t'); 1201 break; 1202 case '\\': 1203 putChar('\\'); 1204 break; 1205 case '_': 1206 if(DBType.MYSQL == (dbType)) { 1207 putChar('\\'); 1208 } 1209 putChar('_'); 1210 break; 1211 case 'Z': 1212 putChar(cast(char) 0x1A); // ctrl + Z 1213 break; 1214 case '%': 1215 putChar('\\'); 1216 putChar(ch); 1217 break; 1218 default: 1219 putChar(ch); 1220 break; 1221 } 1222 1223 continue; 1224 } 1225 if (ch == '\'') { 1226 scanChar(); 1227 if (ch != '\'') { 1228 _token = Token.LITERAL_CHARS; 1229 break; 1230 } else { 1231 if (!hasSpecial) { 1232 initBuff(bufPos); 1233 arraycopy(_mark + 1, buf, 0, bufPos); 1234 hasSpecial = true; 1235 } 1236 putChar('\''); 1237 continue; 1238 } 1239 } 1240 1241 if (!hasSpecial) { 1242 bufPos++; 1243 continue; 1244 } 1245 1246 if (bufPos == buf.length) { 1247 putChar(ch); 1248 } else { 1249 buf[bufPos++] = ch; 1250 } 1251 } 1252 1253 if (!hasSpecial) { 1254 _stringVal = subString(_mark + 1, bufPos); 1255 } else { 1256 _stringVal = cast(string)buf[0..bufPos]; 1257 } 1258 } 1259 1260 protected void scanString2_d() { 1261 { 1262 bool hasSpecial = false; 1263 int startIndex = _pos + 1; 1264 int endIndex = -1; // text.indexOf('\'', startIndex); 1265 for (int i = startIndex; i < text.length; ++i) { 1266 char ch = .charAt(text, i); 1267 if (ch == '\\') { 1268 hasSpecial = true; 1269 continue; 1270 } 1271 if (ch == '"') { 1272 endIndex = i; 1273 break; 1274 } 1275 } 1276 1277 if (endIndex == -1) { 1278 throw new ParserException("unclosed str. " ~ info()); 1279 } 1280 1281 string _stringVal; 1282 if (_token == Token.AS) { 1283 _stringVal = subString(_pos, endIndex + 1 - _pos); 1284 } else { 1285 _stringVal = subString(startIndex, endIndex - startIndex); 1286 } 1287 // hasSpecial = _stringVal.indexOf('\\') != -1; 1288 1289 if (!hasSpecial) { 1290 this._stringVal = _stringVal; 1291 int _pos = endIndex + 1; 1292 char ch = charAt(_pos); 1293 if (ch != '\'') { 1294 this._pos = _pos; 1295 this.ch = ch; 1296 _token = Token.LITERAL_CHARS; 1297 return; 1298 } 1299 } 1300 } 1301 1302 _mark = _pos; 1303 bool hasSpecial = false; 1304 for (;;) { 1305 if (isEOF()) { 1306 lexError("unclosed.str.lit"); 1307 return; 1308 } 1309 1310 ch = charAt(++_pos); 1311 1312 if (ch == '\\') { 1313 scanChar(); 1314 if (!hasSpecial) { 1315 initBuff(bufPos); 1316 arraycopy(_mark + 1, buf, 0, bufPos); 1317 hasSpecial = true; 1318 } 1319 1320 1321 switch (ch) { 1322 case '0': 1323 putChar('\0'); 1324 break; 1325 case '\'': 1326 putChar('\''); 1327 break; 1328 case '"': 1329 putChar('"'); 1330 break; 1331 case 'b': 1332 putChar('\b'); 1333 break; 1334 case 'n': 1335 putChar('\n'); 1336 break; 1337 case 'r': 1338 putChar('\r'); 1339 break; 1340 case 't': 1341 putChar('\t'); 1342 break; 1343 case '\\': 1344 putChar('\\'); 1345 break; 1346 case 'Z': 1347 putChar(cast(char) 0x1A); // ctrl + Z 1348 break; 1349 case '%': 1350 if(DBType.MYSQL == (dbType)) { 1351 putChar('\\'); 1352 } 1353 putChar('%'); 1354 break; 1355 case '_': 1356 if(DBType.MYSQL == (dbType)) { 1357 putChar('\\'); 1358 } 1359 putChar('_'); 1360 break; 1361 default: 1362 putChar(ch); 1363 break; 1364 } 1365 1366 continue; 1367 } 1368 if (ch == '"') { 1369 scanChar(); 1370 if (ch != '"') { 1371 _token = Token.LITERAL_CHARS; 1372 break; 1373 } else { 1374 if (!hasSpecial) { 1375 initBuff(bufPos); 1376 arraycopy(_mark + 1, buf, 0, bufPos); 1377 hasSpecial = true; 1378 } 1379 putChar('"'); 1380 continue; 1381 } 1382 } 1383 1384 if (!hasSpecial) { 1385 bufPos++; 1386 continue; 1387 } 1388 1389 if (bufPos == buf.length) { 1390 putChar(ch); 1391 } else { 1392 buf[bufPos++] = ch; 1393 } 1394 } 1395 1396 if (!hasSpecial) { 1397 _stringVal = subString(_mark + 1, bufPos); 1398 } else { 1399 _stringVal = cast(string)buf[0..bufPos]; 1400 } 1401 } 1402 1403 protected void scanAlias() { 1404 { 1405 bool hasSpecial = false; 1406 int startIndex = _pos + 1; 1407 int endIndex = -1; // text.indexOf('\'', startIndex); 1408 for (int i = startIndex; i < text.length; ++i) { 1409 char ch = .charAt(text, i); 1410 if (ch == '\\') { 1411 hasSpecial = true; 1412 continue; 1413 } 1414 if (ch == '"') { 1415 if (i + 1 < text.length) { 1416 char ch_next = charAt(i + 1); 1417 if (ch_next == '"' || ch_next == '\'') { 1418 hasSpecial = true; 1419 i++; 1420 continue; 1421 } 1422 } 1423 if (i > 0) { 1424 char ch_last = charAt(i - 1); 1425 if (ch_last == '\'') { 1426 hasSpecial = true; 1427 continue; 1428 } 1429 } 1430 endIndex = i; 1431 break; 1432 } 1433 } 1434 1435 if (endIndex == -1) { 1436 throw new ParserException("unclosed str. " ~ info()); 1437 } 1438 1439 string _stringVal = subString(_pos, endIndex + 1 - _pos); 1440 // hasSpecial = _stringVal.indexOf('\\') != -1; 1441 1442 if (!hasSpecial) { 1443 this._stringVal = _stringVal; 1444 int _pos = endIndex + 1; 1445 char ch = charAt(_pos); 1446 if (ch != '\'') { 1447 this._pos = _pos; 1448 this.ch = ch; 1449 _token = Token.LITERAL_ALIAS; 1450 return; 1451 } 1452 } 1453 } 1454 1455 _mark = _pos; 1456 initBuff(bufPos); 1457 //putChar(ch); 1458 1459 for (;;) { 1460 if (isEOF()) { 1461 lexError("unclosed.str.lit"); 1462 return; 1463 } 1464 1465 ch = charAt(++_pos); 1466 1467 if (ch == '\\') { 1468 scanChar(); 1469 1470 switch (ch) { 1471 case '0': 1472 putChar('\0'); 1473 break; 1474 case '\'': 1475 putChar('\''); 1476 break; 1477 case '"': 1478 putChar('"'); 1479 break; 1480 case 'b': 1481 putChar('\b'); 1482 break; 1483 case 'n': 1484 putChar('\n'); 1485 break; 1486 case 'r': 1487 putChar('\r'); 1488 break; 1489 case 't': 1490 putChar('\t'); 1491 break; 1492 case '\\': 1493 putChar('\\'); 1494 break; 1495 case 'Z': 1496 putChar(cast(char) 0x1A); // ctrl + Z 1497 break; 1498 default: 1499 putChar(ch); 1500 break; 1501 } 1502 1503 continue; 1504 } 1505 1506 // if (ch == '\'') { 1507 // char ch_next = charAt(_pos + 1); 1508 // if (ch_next == '"') { 1509 // scanChar(); 1510 // continue; 1511 // } 1512 // } else 1513 if (ch == '\"') { 1514 char ch_next = charAt(_pos + 1); 1515 if (ch_next == '"' || ch_next == '\'') { 1516 scanChar(); 1517 continue; 1518 } 1519 1520 //putChar(ch); 1521 scanChar(); 1522 _token = Token.LITERAL_CHARS; 1523 break; 1524 } 1525 1526 if (bufPos == buf.length) { 1527 putChar(ch); 1528 } else { 1529 buf[bufPos++] = ch; 1530 } 1531 } 1532 1533 _stringVal = cast(string)buf[0..bufPos]; 1534 } 1535 1536 public void scanSharp() { 1537 scanVariable(); 1538 } 1539 1540 public void scanVariable() { 1541 if (ch != ':' && ch != '#' && ch != '$') { 1542 throw new ParserException("illegal variable. " ~ info()); 1543 } 1544 1545 _mark = _pos; 1546 bufPos = 1; 1547 char ch; 1548 1549 char c1 = charAt(_pos + 1); 1550 if (c1 == '>' && DBType.POSTGRESQL == toLower(dbType)) { 1551 _pos += 2; 1552 _token = Token.MONKEYS_AT_GT; 1553 this.ch = charAt(++_pos); 1554 return; 1555 } else if (c1 == '{') { 1556 _pos++; 1557 bufPos++; 1558 1559 for (;;) { 1560 ch = charAt(++_pos); 1561 1562 if (ch == '}') { 1563 break; 1564 } 1565 1566 bufPos++; 1567 continue; 1568 } 1569 1570 if (ch != '}') { 1571 throw new ParserException("syntax error. " ~ info()); 1572 } 1573 ++_pos; 1574 bufPos++; 1575 1576 this.ch = charAt(_pos); 1577 1578 _stringVal = addSymbol(); 1579 _token = Token.VARIANT; 1580 return; 1581 } 1582 1583 for (;;) { 1584 ch = charAt(++_pos); 1585 1586 if (!CharTypes.isIdentifierChar(ch)) { 1587 break; 1588 } 1589 1590 bufPos++; 1591 continue; 1592 } 1593 1594 this.ch = charAt(_pos); 1595 1596 _stringVal = addSymbol(); 1597 _token = Token.VARIANT; 1598 } 1599 1600 protected void scanVariable_at() { 1601 if (ch != '@') { 1602 throw new ParserException("illegal variable. " ~ info()); 1603 } 1604 1605 _mark = _pos; 1606 bufPos = 1; 1607 char ch; 1608 1609 char c1 = charAt(_pos + 1); 1610 if (c1 == '@') { 1611 ++_pos; 1612 bufPos++; 1613 } 1614 1615 for (;;) { 1616 ch = charAt(++_pos); 1617 1618 if (!CharTypes.isIdentifierChar(ch)) { 1619 break; 1620 } 1621 1622 bufPos++; 1623 continue; 1624 } 1625 1626 this.ch = charAt(_pos); 1627 1628 _stringVal = addSymbol(); 1629 _token = Token.VARIANT; 1630 } 1631 1632 public void scanComment() { 1633 if (!allowComment) { 1634 throw new Exception("not allow comment"); 1635 } 1636 1637 if ((ch == '/' && charAt(_pos + 1) == '/') 1638 || (ch == '-' && charAt(_pos + 1) == '-')) { 1639 scanSingleLineComment(); 1640 } else if (ch == '/' && charAt(_pos + 1) == '*') { 1641 scanMultiLineComment(); 1642 } else { 1643 throw new Exception("Exception"); 1644 } 1645 } 1646 1647 private void scanMultiLineComment() { 1648 Token lastToken = this._token; 1649 1650 scanChar(); 1651 scanChar(); 1652 _mark = _pos; 1653 bufPos = 0; 1654 1655 for (;;) { 1656 if (ch == '*' && charAt(_pos + 1) == '/') { 1657 scanChar(); 1658 scanChar(); 1659 break; 1660 } 1661 1662 // multiline comment结束符错误 1663 if (ch == LayoutCharacters.EOI) { 1664 throw new ParserException("unterminated /* comment. " ~ info()); 1665 } 1666 scanChar(); 1667 bufPos++; 1668 } 1669 1670 _stringVal = subString(_mark, bufPos); 1671 _token = Token.MULTI_LINE_COMMENT; 1672 commentCount++; 1673 if (keepComments) { 1674 addComment(_stringVal); 1675 } 1676 1677 if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) { 1678 return; 1679 } 1680 1681 if (!isAllowComment() && !isSafeComment(_stringVal)) { 1682 throw new Exception("NotAllowComment"); 1683 } 1684 } 1685 1686 private void scanSingleLineComment() { 1687 Token lastToken = this._token; 1688 1689 scanChar(); 1690 scanChar(); 1691 _mark = _pos; 1692 bufPos = 0; 1693 1694 for (;;) { 1695 if (ch == '\r') { 1696 if (charAt(_pos + 1) == '\n') { 1697 line++; 1698 scanChar(); 1699 break; 1700 } 1701 bufPos++; 1702 break; 1703 } 1704 1705 if (ch == '\n') { 1706 line++; 1707 scanChar(); 1708 break; 1709 } 1710 1711 // single line comment结束符错误 1712 if (ch == LayoutCharacters.EOI) { 1713 throw new ParserException("syntax error at end of input. " ~ info()); 1714 } 1715 1716 scanChar(); 1717 bufPos++; 1718 } 1719 1720 _stringVal = subString(_mark, bufPos); 1721 _token = Token.LINE_COMMENT; 1722 commentCount++; 1723 if (keepComments) { 1724 addComment(_stringVal); 1725 } 1726 1727 if (commentHandler !is null && commentHandler.handle(lastToken, _stringVal)) { 1728 return; 1729 } 1730 1731 if (!isAllowComment() && !isSafeComment(_stringVal)) { 1732 throw new Exception("NotAllowComment"); 1733 } 1734 } 1735 1736 public void scanIdentifier() { 1737 this._hash_lower = 0; 1738 this.hash = 0; 1739 1740 char first = ch; 1741 1742 if (ch == '`') { 1743 _mark = _pos; 1744 bufPos = 1; 1745 char ch; 1746 1747 int startPos = _pos + 1; 1748 int quoteIndex = cast(int)text.indexOf('`', cast(ulong)startPos); 1749 if (quoteIndex == -1) { 1750 throw new ParserException("illegal identifier. " ~ info()); 1751 } 1752 1753 _hash_lower = 0xcbf29ce484222325L; 1754 hash = 0xcbf29ce484222325L; 1755 1756 for (int i = startPos; i < quoteIndex; ++i) { 1757 ch = .charAt(text, i); 1758 1759 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 1760 _hash_lower *= 0x100000001b3L; 1761 1762 hash ^= ch; 1763 hash *= 0x100000001b3L; 1764 } 1765 1766 _stringVal = MySqlLexer.quoteTable.addSymbol(text, _pos, quoteIndex + 1 - _pos, hash); //@gxc 1767 //_stringVal = text.substring(_mark, _pos); 1768 _pos = quoteIndex + 1; 1769 this.ch = charAt(_pos); 1770 _token = Token.IDENTIFIER; 1771 return; 1772 } 1773 1774 bool firstFlag = CharTypes.isFirstIdentifierChar(first); 1775 if (!firstFlag) { 1776 throw new ParserException("illegal identifier. " ~ info()); 1777 } 1778 1779 _hash_lower = 0xcbf29ce484222325L; 1780 hash = 0xcbf29ce484222325L; 1781 1782 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 1783 _hash_lower *= 0x100000001b3L; 1784 1785 hash ^= ch; 1786 hash *= 0x100000001b3L; 1787 1788 _mark = _pos; 1789 bufPos = 1; 1790 char ch; 1791 for (;;) { 1792 ch = charAt(++_pos); 1793 1794 if (!CharTypes.isIdentifierChar(ch)) { 1795 break; 1796 } 1797 1798 _hash_lower ^= ((ch >= 'A' && ch <= 'Z') ? (ch + 32) : ch); 1799 _hash_lower *= 0x100000001b3L; 1800 1801 hash ^= ch; 1802 hash *= 0x100000001b3L; 1803 1804 bufPos++; 1805 continue; 1806 } 1807 1808 this.ch = charAt(_pos); 1809 1810 if (bufPos == 1) { 1811 _token = Token.IDENTIFIER; 1812 _stringVal = CharTypes.valueOf(first); 1813 if (_stringVal is null) { 1814 _stringVal = to!string(first); 1815 } 1816 return; 1817 } 1818 1819 Token tok = keywods.getKeyword(_hash_lower); 1820 if (tok !is null) { 1821 _token = tok; 1822 if (_token == Token.IDENTIFIER) { 1823 _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash); 1824 } else { 1825 _stringVal = null; 1826 } 1827 } else { 1828 _token = Token.IDENTIFIER; 1829 _stringVal = SymbolTable.global.addSymbol(text, _mark, bufPos, hash); 1830 } 1831 } 1832 1833 public void scanNumber() { 1834 _mark = _pos; 1835 1836 if (ch == '-') { 1837 bufPos++; 1838 ch = charAt(++_pos); 1839 } 1840 1841 for (;;) { 1842 if (ch >= '0' && ch <= '9') { 1843 bufPos++; 1844 } else { 1845 break; 1846 } 1847 ch = charAt(++_pos); 1848 } 1849 1850 bool isDouble = false; 1851 1852 if (ch == '.') { 1853 if (charAt(_pos + 1) == '.') { 1854 _token = Token.LITERAL_INT; 1855 return; 1856 } 1857 bufPos++; 1858 ch = charAt(++_pos); 1859 isDouble = true; 1860 1861 for (;;) { 1862 if (ch >= '0' && ch <= '9') { 1863 bufPos++; 1864 } else { 1865 break; 1866 } 1867 ch = charAt(++_pos); 1868 } 1869 } 1870 1871 if (ch == 'e' || ch == 'E') { 1872 bufPos++; 1873 ch = charAt(++_pos); 1874 1875 if (ch == '+' || ch == '-') { 1876 bufPos++; 1877 ch = charAt(++_pos); 1878 } 1879 1880 for (;;) { 1881 if (ch >= '0' && ch <= '9') { 1882 bufPos++; 1883 } else { 1884 break; 1885 } 1886 ch = charAt(++_pos); 1887 } 1888 1889 isDouble = true; 1890 } 1891 1892 if (isDouble) { 1893 _token = Token.LITERAL_FLOAT; 1894 } else { 1895 _token = Token.LITERAL_INT; 1896 } 1897 } 1898 1899 public void scanHexaDecimal() { 1900 _mark = _pos; 1901 1902 if (ch == '-') { 1903 bufPos++; 1904 ch = charAt(++_pos); 1905 } 1906 1907 for (;;) { 1908 if (CharTypes.isHex(ch)) { 1909 bufPos++; 1910 } else { 1911 break; 1912 } 1913 ch = charAt(++_pos); 1914 } 1915 1916 _token = Token.LITERAL_HEX; 1917 } 1918 1919 public string hexString() { 1920 return subString(_mark, bufPos); 1921 } 1922 1923 public bool isDigit(char ch) { 1924 return ch >= '0' && ch <= '9'; 1925 } 1926 1927 /** 1928 * Append a character to sbuf. 1929 */ 1930 protected void putChar(char ch) { 1931 if (bufPos == buf.length) { 1932 char[] newsbuf = new char[buf.length * 2]; 1933 buf.copy(newsbuf); 1934 // System.arraycopy(buf, 0, newsbuf, 0, buf.length); 1935 buf = newsbuf; 1936 } 1937 buf[bufPos++] = ch; 1938 } 1939 1940 /** 1941 * Return the current _token's _position: a 0-based offset from beginning of the raw input stream (before unicode 1942 * translation) 1943 */ 1944 public int pos() { 1945 return _pos; 1946 } 1947 1948 /** 1949 * The value of a literal _token, recorded as a string. For integers, leading 0x and 'l' suffixes are suppressed. 1950 */ 1951 public string stringVal() { 1952 if (_stringVal is null) { 1953 _stringVal = subString(_mark, bufPos); 1954 } 1955 return _stringVal; 1956 } 1957 1958 private void stringVal(StringBuilder sb) { 1959 if (_stringVal !is null) { 1960 sb.append(_stringVal); 1961 return; 1962 } 1963 1964 sb.append(text, _mark, _mark + bufPos); 1965 } 1966 1967 public bool identifierEquals(string text) { 1968 if (_token != Token.IDENTIFIER) { 1969 return false; 1970 } 1971 1972 if (_stringVal is null) { 1973 _stringVal = subString(_mark, bufPos); 1974 } 1975 return toLower(text) == toLower(_stringVal); 1976 } 1977 1978 public bool identifierEquals(long _hash_lower) { 1979 if (_token != Token.IDENTIFIER) { 1980 return false; 1981 } 1982 1983 if (this._hash_lower == 0) { 1984 if (_stringVal is null) { 1985 _stringVal = subString(_mark, bufPos); 1986 } 1987 this._hash_lower = FnvHash.fnv1a_64_lower(_stringVal); 1988 } 1989 return this._hash_lower == _hash_lower; 1990 } 1991 1992 public long hash_lower() { 1993 if (this._hash_lower == 0) { 1994 if (_stringVal is null) { 1995 _stringVal = subString(_mark, bufPos); 1996 } 1997 this._hash_lower = FnvHash.fnv1a_64_lower(_stringVal); 1998 } 1999 return _hash_lower; 2000 } 2001 2002 public List!string readAndResetComments() { 2003 List!string comments = this.comments; 2004 2005 this.comments = null; 2006 2007 return comments; 2008 } 2009 2010 private bool isOperator(char ch) { 2011 switch (ch) { 2012 case '!': 2013 case '%': 2014 case '&': 2015 case '*': 2016 case '+': 2017 case '-': 2018 case '<': 2019 case '=': 2020 case '>': 2021 case '^': 2022 case '|': 2023 case '~': 2024 case ';': 2025 return true; 2026 default: 2027 return false; 2028 } 2029 } 2030 2031 private enum long MULTMIN_RADIX_TEN = long.min / 10; 2032 private enum long N_MULTMAX_RADIX_TEN = -long.max / 10; 2033 2034 // QS_TODO negative number is invisible for lexer 2035 public Number integerValue() { 2036 long result = 0; 2037 bool negative = false; 2038 int i = _mark, max = _mark + bufPos; 2039 long limit; 2040 long multmin; 2041 int digit; 2042 2043 if (charAt(_mark) == '-') { 2044 negative = true; 2045 limit = Long.MIN_VALUE; 2046 i++; 2047 } else { 2048 limit = -Long.MAX_VALUE; 2049 } 2050 multmin = negative ? MULTMIN_RADIX_TEN : N_MULTMAX_RADIX_TEN; 2051 if (i < max) { 2052 digit = charAt(i++) - '0'; 2053 result = -digit; 2054 } 2055 while (i < max) { 2056 // Accumulating negatively avoids surprises near MAX_VALUE 2057 digit = charAt(i++) - '0'; 2058 if (result < multmin) { 2059 return new BigInteger(numberString()); 2060 } 2061 result *= 10; 2062 if (result < limit + digit) { 2063 return new BigInteger(numberString()); 2064 } 2065 result -= digit; 2066 } 2067 2068 if (negative) { 2069 if (i > _mark + 1) { 2070 if (result >= Integer.MIN_VALUE) { 2071 return new Integer(cast(int)result); 2072 } 2073 return new Long(result); 2074 } else { /* Only got "-" */ 2075 throw new Exception(numberString()); 2076 } 2077 } else { 2078 result = -result; 2079 if (result <= Integer.MAX_VALUE) { 2080 return new Integer(cast(int)result); 2081 } 2082 return new Long(result); 2083 } 2084 } 2085 2086 public int bp() { 2087 return this._pos; 2088 } 2089 2090 public char current() { 2091 return this.ch; 2092 } 2093 2094 public void reset(int _mark, char _markChar, Token _token) { 2095 this._pos = _mark; 2096 this.ch = _markChar; 2097 this._token = _token; 2098 } 2099 2100 public string numberString() { 2101 return subString(_mark, bufPos); 2102 } 2103 2104 public BigDecimal decimalValue() { 2105 // char[] value = sub_chars(_mark, bufPos); 2106 // if (!isNumeric(value)){ 2107 // throw new ParserException(value+" is not a number! " ~ info()); 2108 // } 2109 implementationMissing(); 2110 return BigDecimal.init; 2111 } 2112 2113 public SQLNumberExpr numberExpr() { 2114 char[] value = sub_chars(_mark, bufPos); 2115 if (!isNumeric(value)){ 2116 throw new ParserException( cast(string)value ~ " is not a number! " ~ info()); 2117 } 2118 2119 return new SQLNumberExpr(value); 2120 } 2121 2122 public SQLNumberExpr numberExpr(bool negate) { 2123 char[] value = sub_chars(_mark, bufPos); 2124 if (!isNumeric(value)){ 2125 throw new ParserException(cast(string)value ~ " is not a number! " ~ info()); 2126 } 2127 2128 if (negate) { 2129 char[] chars = new char[value.length + 1]; 2130 chars[0] = '-'; 2131 // System.arraycopy(value, 0, chars, 1, value.length); 2132 value.copy(chars); 2133 return new SQLNumberExpr(chars); 2134 } else { 2135 return new SQLNumberExpr(value); 2136 } 2137 } 2138 2139 public static interface CommentHandler { 2140 bool handle(Token lastToken, string comment); 2141 } 2142 2143 public bool hasComment() { 2144 return comments !is null; 2145 } 2146 2147 public int getCommentCount() { 2148 return commentCount; 2149 } 2150 2151 public void skipToEOF() { 2152 _pos = cast(int)text.length; 2153 this._token = Token.EOF; 2154 } 2155 2156 public bool isEndOfComment() { 2157 return endOfComment; 2158 } 2159 2160 protected bool isSafeComment(string comment) { 2161 if (comment is null) { 2162 return true; 2163 } 2164 comment = toLower(comment); 2165 if (comment.indexOf("select") != -1 // 2166 || comment.indexOf("delete") != -1 // 2167 || comment.indexOf("insert") != -1 // 2168 || comment.indexOf("update") != -1 // 2169 || comment.indexOf("into") != -1 // 2170 || comment.indexOf("where") != -1 // 2171 || comment.indexOf("or") != -1 // 2172 || comment.indexOf("and") != -1 // 2173 || comment.indexOf("union") != -1 // 2174 || comment.indexOf('\'') != -1 // 2175 || comment.indexOf('=') != -1 // 2176 || comment.indexOf('>') != -1 // 2177 || comment.indexOf('<') != -1 // 2178 || comment.indexOf('&') != -1 // 2179 || comment.indexOf('|') != -1 // 2180 || comment.indexOf('^') != -1 // 2181 ) { 2182 return false; 2183 } 2184 return true; 2185 } 2186 2187 protected void addComment(string comment) { 2188 if (comments is null) { 2189 comments = new ArrayList!string(2); 2190 } 2191 comments.add(_stringVal); 2192 } 2193 2194 public int getLine() { 2195 return line; 2196 } 2197 2198 public void computeRowAndColumn() { 2199 int line = 1; 2200 int column = 1; 2201 for (int i = 0; i < _pos; ++i) { 2202 char ch = .charAt(text, i); 2203 if (ch == '\n') { 2204 column = 1; 2205 line++; 2206 } 2207 } 2208 2209 this._posLine = line; 2210 this._posColumn = _posColumn; 2211 } 2212 2213 public int getPosLine() { 2214 return _posLine; 2215 } 2216 2217 public int getPosColumn() { 2218 return _posColumn; 2219 } 2220 2221 public void config(SQLParserFeature feature, bool state) { 2222 features = SQLParserFeature.config(features, feature, state); 2223 2224 if (feature == SQLParserFeature.OptimizedForParameterized) { 2225 optimizedForParameterized = state; 2226 } else if (feature == SQLParserFeature.KeepComments) { 2227 this.keepComments = state; 2228 } else if (feature == SQLParserFeature.SkipComments) { 2229 this.skipComment = state; 2230 } 2231 } 2232 2233 public bool isEnabled(SQLParserFeature feature) { 2234 return SQLParserFeature.isEnabled(this.features, feature); 2235 } 2236 2237 public static string parameterize(string sql, string dbType) { 2238 Lexer lexer = SQLParserUtils.createLexer(sql, dbType); //@gxc tmp 2239 // Lexer lexer; 2240 lexer.optimizedForParameterized = true; // optimized 2241 2242 lexer.nextToken(); 2243 2244 StringBuilder buf = new StringBuilder(); 2245 2246 for_: 2247 for (;;) { 2248 Token _token = lexer._token; 2249 switch (_token) { 2250 case Token.LITERAL_ALIAS: 2251 case Token.LITERAL_FLOAT: 2252 case Token.LITERAL_CHARS: 2253 case Token.LITERAL_INT: 2254 case Token.LITERAL_NCHARS: 2255 case Token.LITERAL_HEX: 2256 case Token.VARIANT: 2257 if (buf.length != 0) { 2258 buf.append(' '); 2259 } 2260 buf.append('?'); 2261 break; 2262 case Token.COMMA: 2263 buf.append(','); 2264 break; 2265 case Token.EQ: 2266 buf.append('='); 2267 break; 2268 case Token.EOF: 2269 break for_; 2270 case Token.ERROR: 2271 return sql; 2272 case Token.SELECT: 2273 buf.append("SELECT"); 2274 break; 2275 case Token.UPDATE: 2276 buf.append("UPDATE"); 2277 break; 2278 default: 2279 if (buf.length != 0) { 2280 buf.append(' '); 2281 } 2282 lexer.stringVal(buf); 2283 break; 2284 } 2285 2286 lexer.nextToken(); 2287 } 2288 2289 return buf.toString(); 2290 } 2291 2292 public string getSource() { 2293 return text; 2294 } 2295 }