sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 864 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 865 TokenType.GLOB: binary_range_parser(exp.Glob), 866 TokenType.ILIKE: binary_range_parser(exp.ILike), 867 TokenType.IN: lambda self, this: self._parse_in(this), 868 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 869 TokenType.IS: lambda self, this: self._parse_is(this), 870 TokenType.LIKE: binary_range_parser(exp.Like), 871 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 872 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 873 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 874 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 875 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 876 } 877 878 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 879 "ALLOWED_VALUES": lambda self: self.expression( 880 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 881 ), 882 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 883 "AUTO": lambda self: self._parse_auto_property(), 884 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 885 "BACKUP": lambda self: self.expression( 886 exp.BackupProperty, this=self._parse_var(any_token=True) 887 ), 888 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 889 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 890 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHECKSUM": lambda self: self._parse_checksum(), 892 "CLUSTER BY": lambda self: self._parse_cluster(), 893 "CLUSTERED": lambda self: self._parse_clustered_by(), 894 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 895 exp.CollateProperty, **kwargs 896 ), 897 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 898 "CONTAINS": lambda self: self._parse_contains_property(), 899 "COPY": lambda self: self._parse_copy_property(), 900 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 901 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 902 "DEFINER": lambda self: self._parse_definer(), 903 "DETERMINISTIC": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 907 "DUPLICATE": lambda self: self._parse_duplicate(), 908 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 909 "DISTKEY": lambda self: self._parse_distkey(), 910 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 911 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 912 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 913 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 914 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 915 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 916 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 917 "FREESPACE": lambda self: self._parse_freespace(), 918 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 919 "HEAP": lambda self: self.expression(exp.HeapProperty), 920 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 921 "IMMUTABLE": lambda self: self.expression( 922 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 923 ), 924 "INHERITS": lambda self: self.expression( 925 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 926 ), 927 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 928 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 929 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 930 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 931 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 932 "LIKE": lambda self: self._parse_create_like(), 933 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 934 "LOCK": lambda self: self._parse_locking(), 935 "LOCKING": lambda self: self._parse_locking(), 936 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 937 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 938 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 939 "MODIFIES": lambda self: self._parse_modifies_property(), 940 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 941 "NO": lambda self: self._parse_no_property(), 942 "ON": lambda self: self._parse_on_property(), 943 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 944 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 945 "PARTITION": lambda self: self._parse_partitioned_of(), 946 "PARTITION BY": lambda self: self._parse_partitioned_by(), 947 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 949 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 950 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 951 "READS": lambda self: self._parse_reads_property(), 952 "REMOTE": lambda self: self._parse_remote_with_connection(), 953 "RETURNS": lambda self: self._parse_returns(), 954 "STRICT": lambda self: self.expression(exp.StrictProperty), 955 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 956 "ROW": lambda self: self._parse_row(), 957 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 958 "SAMPLE": lambda self: self.expression( 959 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 960 ), 961 "SECURE": lambda self: self.expression(exp.SecureProperty), 962 "SECURITY": lambda self: self._parse_security(), 963 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 964 "SETTINGS": lambda self: self._parse_settings_property(), 965 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 966 "SORTKEY": lambda self: self._parse_sortkey(), 967 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 968 "STABLE": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("STABLE") 970 ), 971 "STORED": lambda self: self._parse_stored(), 972 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 973 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 974 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 975 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 976 "TO": lambda self: self._parse_to_table(), 977 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 978 "TRANSFORM": lambda self: self.expression( 979 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 980 ), 981 "TTL": lambda self: self._parse_ttl(), 982 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 983 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 984 "VOLATILE": lambda self: self._parse_volatile_property(), 985 "WITH": lambda self: self._parse_with_property(), 986 } 987 988 CONSTRAINT_PARSERS = { 989 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 990 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 991 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 992 "CHARACTER SET": lambda self: self.expression( 993 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 994 ), 995 "CHECK": lambda self: self.expression( 996 exp.CheckColumnConstraint, 997 this=self._parse_wrapped(self._parse_assignment), 998 enforced=self._match_text_seq("ENFORCED"), 999 ), 1000 "COLLATE": lambda self: self.expression( 1001 exp.CollateColumnConstraint, 1002 this=self._parse_identifier() or self._parse_column(), 1003 ), 1004 "COMMENT": lambda self: self.expression( 1005 exp.CommentColumnConstraint, this=self._parse_string() 1006 ), 1007 "COMPRESS": lambda self: self._parse_compress(), 1008 "CLUSTERED": lambda self: self.expression( 1009 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1010 ), 1011 "NONCLUSTERED": lambda self: self.expression( 1012 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1013 ), 1014 "DEFAULT": lambda self: self.expression( 1015 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1016 ), 1017 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1018 "EPHEMERAL": lambda self: self.expression( 1019 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1020 ), 1021 "EXCLUDE": lambda self: self.expression( 1022 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1023 ), 1024 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1025 "FORMAT": lambda self: self.expression( 1026 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1027 ), 1028 "GENERATED": lambda self: self._parse_generated_as_identity(), 1029 "IDENTITY": lambda self: self._parse_auto_increment(), 1030 "INLINE": lambda self: self._parse_inline(), 1031 "LIKE": lambda self: self._parse_create_like(), 1032 "NOT": lambda self: self._parse_not_constraint(), 1033 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1034 "ON": lambda self: ( 1035 self._match(TokenType.UPDATE) 1036 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1037 ) 1038 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1039 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1040 "PERIOD": lambda self: self._parse_period_for_system_time(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1042 "REFERENCES": lambda self: self._parse_references(match=False), 1043 "TITLE": lambda self: self.expression( 1044 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1045 ), 1046 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1047 "UNIQUE": lambda self: self._parse_unique(), 1048 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1049 "WITH": lambda self: self.expression( 1050 exp.Properties, expressions=self._parse_wrapped_properties() 1051 ), 1052 } 1053 1054 ALTER_PARSERS = { 1055 "ADD": lambda self: self._parse_alter_table_add(), 1056 "ALTER": lambda self: self._parse_alter_table_alter(), 1057 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1058 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1059 "DROP": lambda self: self._parse_alter_table_drop(), 1060 "RENAME": lambda self: self._parse_alter_table_rename(), 1061 "SET": lambda self: self._parse_alter_table_set(), 1062 "AS": lambda self: self._parse_select(), 1063 } 1064 1065 ALTER_ALTER_PARSERS = { 1066 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1067 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1068 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1069 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1070 } 1071 1072 SCHEMA_UNNAMED_CONSTRAINTS = { 1073 "CHECK", 1074 "EXCLUDE", 1075 "FOREIGN KEY", 1076 "LIKE", 1077 "PERIOD", 1078 "PRIMARY KEY", 1079 "UNIQUE", 1080 } 1081 1082 NO_PAREN_FUNCTION_PARSERS = { 1083 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1084 "CASE": lambda self: self._parse_case(), 1085 "CONNECT_BY_ROOT": lambda self: self.expression( 1086 exp.ConnectByRoot, this=self._parse_column() 1087 ), 1088 "IF": lambda self: self._parse_if(), 1089 "NEXT": lambda self: self._parse_next_value_for(), 1090 } 1091 1092 INVALID_FUNC_NAME_TOKENS = { 1093 TokenType.IDENTIFIER, 1094 TokenType.STRING, 1095 } 1096 1097 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1098 1099 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1100 1101 FUNCTION_PARSERS = { 1102 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1103 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1104 "DECODE": lambda self: self._parse_decode(), 1105 "EXTRACT": lambda self: self._parse_extract(), 1106 "GAP_FILL": lambda self: self._parse_gap_fill(), 1107 "JSON_OBJECT": lambda self: self._parse_json_object(), 1108 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1109 "JSON_TABLE": lambda self: self._parse_json_table(), 1110 "MATCH": lambda self: self._parse_match_against(), 1111 "NORMALIZE": lambda self: self._parse_normalize(), 1112 "OPENJSON": lambda self: self._parse_open_json(), 1113 "OVERLAY": lambda self: self._parse_overlay(), 1114 "POSITION": lambda self: self._parse_position(), 1115 "PREDICT": lambda self: self._parse_predict(), 1116 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1117 "STRING_AGG": lambda self: self._parse_string_agg(), 1118 "SUBSTRING": lambda self: self._parse_substring(), 1119 "TRIM": lambda self: self._parse_trim(), 1120 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1121 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1122 } 1123 1124 QUERY_MODIFIER_PARSERS = { 1125 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1126 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1127 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1128 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1129 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1130 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1131 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1132 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1133 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1134 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1135 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1136 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1137 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1138 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1139 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.CLUSTER_BY: lambda self: ( 1141 "cluster", 1142 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1143 ), 1144 TokenType.DISTRIBUTE_BY: lambda self: ( 1145 "distribute", 1146 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1147 ), 1148 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1149 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1150 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1151 } 1152 1153 SET_PARSERS = { 1154 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1155 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1156 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1157 "TRANSACTION": lambda self: self._parse_set_transaction(), 1158 } 1159 1160 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1161 1162 TYPE_LITERAL_PARSERS = { 1163 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1164 } 1165 1166 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1167 1168 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1169 1170 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1171 1172 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1173 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1174 "ISOLATION": ( 1175 ("LEVEL", "REPEATABLE", "READ"), 1176 ("LEVEL", "READ", "COMMITTED"), 1177 ("LEVEL", "READ", "UNCOMITTED"), 1178 ("LEVEL", "SERIALIZABLE"), 1179 ), 1180 "READ": ("WRITE", "ONLY"), 1181 } 1182 1183 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1184 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1185 ) 1186 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1187 1188 CREATE_SEQUENCE: OPTIONS_TYPE = { 1189 "SCALE": ("EXTEND", "NOEXTEND"), 1190 "SHARD": ("EXTEND", "NOEXTEND"), 1191 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1192 **dict.fromkeys( 1193 ( 1194 "SESSION", 1195 "GLOBAL", 1196 "KEEP", 1197 "NOKEEP", 1198 "ORDER", 1199 "NOORDER", 1200 "NOCACHE", 1201 "CYCLE", 1202 "NOCYCLE", 1203 "NOMINVALUE", 1204 "NOMAXVALUE", 1205 "NOSCALE", 1206 "NOSHARD", 1207 ), 1208 tuple(), 1209 ), 1210 } 1211 1212 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1213 1214 USABLES: OPTIONS_TYPE = dict.fromkeys( 1215 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1216 ) 1217 1218 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1219 1220 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1221 "TYPE": ("EVOLUTION",), 1222 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1223 } 1224 1225 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1226 "NOT": ("ENFORCED",), 1227 "MATCH": ( 1228 "FULL", 1229 "PARTIAL", 1230 "SIMPLE", 1231 ), 1232 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1233 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1234 } 1235 1236 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1237 1238 CLONE_KEYWORDS = {"CLONE", "COPY"} 1239 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1240 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1241 1242 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1243 1244 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1245 1246 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1247 1248 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1249 1250 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1251 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1252 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1253 1254 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1255 1256 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1257 1258 ADD_CONSTRAINT_TOKENS = { 1259 TokenType.CONSTRAINT, 1260 TokenType.FOREIGN_KEY, 1261 TokenType.INDEX, 1262 TokenType.KEY, 1263 TokenType.PRIMARY_KEY, 1264 TokenType.UNIQUE, 1265 } 1266 1267 DISTINCT_TOKENS = {TokenType.DISTINCT} 1268 1269 NULL_TOKENS = {TokenType.NULL} 1270 1271 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1272 1273 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1274 1275 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1276 1277 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1278 1279 ODBC_DATETIME_LITERALS = { 1280 "d": exp.Date, 1281 "t": exp.Time, 1282 "ts": exp.Timestamp, 1283 } 1284 1285 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1286 1287 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1288 1289 STRICT_CAST = True 1290 1291 PREFIXED_PIVOT_COLUMNS = False 1292 IDENTIFY_PIVOT_STRINGS = False 1293 1294 LOG_DEFAULTS_TO_LN = False 1295 1296 # Whether ADD is present for each column added by ALTER TABLE 1297 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1298 1299 # Whether the table sample clause expects CSV syntax 1300 TABLESAMPLE_CSV = False 1301 1302 # The default method used for table sampling 1303 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1304 1305 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1306 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1307 1308 # Whether the TRIM function expects the characters to trim as its first argument 1309 TRIM_PATTERN_FIRST = False 1310 1311 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1312 STRING_ALIASES = False 1313 1314 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1315 MODIFIERS_ATTACHED_TO_SET_OP = True 1316 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1317 1318 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1319 NO_PAREN_IF_COMMANDS = True 1320 1321 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1322 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1323 1324 # Whether the `:` operator is used to extract a value from a VARIANT column 1325 COLON_IS_VARIANT_EXTRACT = False 1326 1327 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1328 # If this is True and '(' is not found, the keyword will be treated as an identifier 1329 VALUES_FOLLOWED_BY_PAREN = True 1330 1331 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1332 SUPPORTS_IMPLICIT_UNNEST = False 1333 1334 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1335 INTERVAL_SPANS = True 1336 1337 # Whether a PARTITION clause can follow a table reference 1338 SUPPORTS_PARTITION_SELECTION = False 1339 1340 __slots__ = ( 1341 "error_level", 1342 "error_message_context", 1343 "max_errors", 1344 "dialect", 1345 "sql", 1346 "errors", 1347 "_tokens", 1348 "_index", 1349 "_curr", 1350 "_next", 1351 "_prev", 1352 "_prev_comments", 1353 ) 1354 1355 # Autofilled 1356 SHOW_TRIE: t.Dict = {} 1357 SET_TRIE: t.Dict = {} 1358 1359 def __init__( 1360 self, 1361 error_level: t.Optional[ErrorLevel] = None, 1362 error_message_context: int = 100, 1363 max_errors: int = 3, 1364 dialect: DialectType = None, 1365 ): 1366 from sqlglot.dialects import Dialect 1367 1368 self.error_level = error_level or ErrorLevel.IMMEDIATE 1369 self.error_message_context = error_message_context 1370 self.max_errors = max_errors 1371 self.dialect = Dialect.get_or_raise(dialect) 1372 self.reset() 1373 1374 def reset(self): 1375 self.sql = "" 1376 self.errors = [] 1377 self._tokens = [] 1378 self._index = 0 1379 self._curr = None 1380 self._next = None 1381 self._prev = None 1382 self._prev_comments = None 1383 1384 def parse( 1385 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1386 ) -> t.List[t.Optional[exp.Expression]]: 1387 """ 1388 Parses a list of tokens and returns a list of syntax trees, one tree 1389 per parsed SQL statement. 1390 1391 Args: 1392 raw_tokens: The list of tokens. 1393 sql: The original SQL string, used to produce helpful debug messages. 1394 1395 Returns: 1396 The list of the produced syntax trees. 1397 """ 1398 return self._parse( 1399 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1400 ) 1401 1402 def parse_into( 1403 self, 1404 expression_types: exp.IntoType, 1405 raw_tokens: t.List[Token], 1406 sql: t.Optional[str] = None, 1407 ) -> t.List[t.Optional[exp.Expression]]: 1408 """ 1409 Parses a list of tokens into a given Expression type. If a collection of Expression 1410 types is given instead, this method will try to parse the token list into each one 1411 of them, stopping at the first for which the parsing succeeds. 1412 1413 Args: 1414 expression_types: The expression type(s) to try and parse the token list into. 1415 raw_tokens: The list of tokens. 1416 sql: The original SQL string, used to produce helpful debug messages. 1417 1418 Returns: 1419 The target Expression. 1420 """ 1421 errors = [] 1422 for expression_type in ensure_list(expression_types): 1423 parser = self.EXPRESSION_PARSERS.get(expression_type) 1424 if not parser: 1425 raise TypeError(f"No parser registered for {expression_type}") 1426 1427 try: 1428 return self._parse(parser, raw_tokens, sql) 1429 except ParseError as e: 1430 e.errors[0]["into_expression"] = expression_type 1431 errors.append(e) 1432 1433 raise ParseError( 1434 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1435 errors=merge_errors(errors), 1436 ) from errors[-1] 1437 1438 def _parse( 1439 self, 1440 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1441 raw_tokens: t.List[Token], 1442 sql: t.Optional[str] = None, 1443 ) -> t.List[t.Optional[exp.Expression]]: 1444 self.reset() 1445 self.sql = sql or "" 1446 1447 total = len(raw_tokens) 1448 chunks: t.List[t.List[Token]] = [[]] 1449 1450 for i, token in enumerate(raw_tokens): 1451 if token.token_type == TokenType.SEMICOLON: 1452 if token.comments: 1453 chunks.append([token]) 1454 1455 if i < total - 1: 1456 chunks.append([]) 1457 else: 1458 chunks[-1].append(token) 1459 1460 expressions = [] 1461 1462 for tokens in chunks: 1463 self._index = -1 1464 self._tokens = tokens 1465 self._advance() 1466 1467 expressions.append(parse_method(self)) 1468 1469 if self._index < len(self._tokens): 1470 self.raise_error("Invalid expression / Unexpected token") 1471 1472 self.check_errors() 1473 1474 return expressions 1475 1476 def check_errors(self) -> None: 1477 """Logs or raises any found errors, depending on the chosen error level setting.""" 1478 if self.error_level == ErrorLevel.WARN: 1479 for error in self.errors: 1480 logger.error(str(error)) 1481 elif self.error_level == ErrorLevel.RAISE and self.errors: 1482 raise ParseError( 1483 concat_messages(self.errors, self.max_errors), 1484 errors=merge_errors(self.errors), 1485 ) 1486 1487 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1488 """ 1489 Appends an error in the list of recorded errors or raises it, depending on the chosen 1490 error level setting. 1491 """ 1492 token = token or self._curr or self._prev or Token.string("") 1493 start = token.start 1494 end = token.end + 1 1495 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1496 highlight = self.sql[start:end] 1497 end_context = self.sql[end : end + self.error_message_context] 1498 1499 error = ParseError.new( 1500 f"{message}. Line {token.line}, Col: {token.col}.\n" 1501 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1502 description=message, 1503 line=token.line, 1504 col=token.col, 1505 start_context=start_context, 1506 highlight=highlight, 1507 end_context=end_context, 1508 ) 1509 1510 if self.error_level == ErrorLevel.IMMEDIATE: 1511 raise error 1512 1513 self.errors.append(error) 1514 1515 def expression( 1516 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1517 ) -> E: 1518 """ 1519 Creates a new, validated Expression. 1520 1521 Args: 1522 exp_class: The expression class to instantiate. 1523 comments: An optional list of comments to attach to the expression. 1524 kwargs: The arguments to set for the expression along with their respective values. 1525 1526 Returns: 1527 The target expression. 1528 """ 1529 instance = exp_class(**kwargs) 1530 instance.add_comments(comments) if comments else self._add_comments(instance) 1531 return self.validate_expression(instance) 1532 1533 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1534 if expression and self._prev_comments: 1535 expression.add_comments(self._prev_comments) 1536 self._prev_comments = None 1537 1538 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1539 """ 1540 Validates an Expression, making sure that all its mandatory arguments are set. 1541 1542 Args: 1543 expression: The expression to validate. 1544 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1545 1546 Returns: 1547 The validated expression. 1548 """ 1549 if self.error_level != ErrorLevel.IGNORE: 1550 for error_message in expression.error_messages(args): 1551 self.raise_error(error_message) 1552 1553 return expression 1554 1555 def _find_sql(self, start: Token, end: Token) -> str: 1556 return self.sql[start.start : end.end + 1] 1557 1558 def _is_connected(self) -> bool: 1559 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1560 1561 def _advance(self, times: int = 1) -> None: 1562 self._index += times 1563 self._curr = seq_get(self._tokens, self._index) 1564 self._next = seq_get(self._tokens, self._index + 1) 1565 1566 if self._index > 0: 1567 self._prev = self._tokens[self._index - 1] 1568 self._prev_comments = self._prev.comments 1569 else: 1570 self._prev = None 1571 self._prev_comments = None 1572 1573 def _retreat(self, index: int) -> None: 1574 if index != self._index: 1575 self._advance(index - self._index) 1576 1577 def _warn_unsupported(self) -> None: 1578 if len(self._tokens) <= 1: 1579 return 1580 1581 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1582 # interested in emitting a warning for the one being currently processed. 1583 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1584 1585 logger.warning( 1586 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1587 ) 1588 1589 def _parse_command(self) -> exp.Command: 1590 self._warn_unsupported() 1591 return self.expression( 1592 exp.Command, 1593 comments=self._prev_comments, 1594 this=self._prev.text.upper(), 1595 expression=self._parse_string(), 1596 ) 1597 1598 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1599 """ 1600 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1601 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1602 solve this by setting & resetting the parser state accordingly 1603 """ 1604 index = self._index 1605 error_level = self.error_level 1606 1607 self.error_level = ErrorLevel.IMMEDIATE 1608 try: 1609 this = parse_method() 1610 except ParseError: 1611 this = None 1612 finally: 1613 if not this or retreat: 1614 self._retreat(index) 1615 self.error_level = error_level 1616 1617 return this 1618 1619 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1620 start = self._prev 1621 exists = self._parse_exists() if allow_exists else None 1622 1623 self._match(TokenType.ON) 1624 1625 materialized = self._match_text_seq("MATERIALIZED") 1626 kind = self._match_set(self.CREATABLES) and self._prev 1627 if not kind: 1628 return self._parse_as_command(start) 1629 1630 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1631 this = self._parse_user_defined_function(kind=kind.token_type) 1632 elif kind.token_type == TokenType.TABLE: 1633 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1634 elif kind.token_type == TokenType.COLUMN: 1635 this = self._parse_column() 1636 else: 1637 this = self._parse_id_var() 1638 1639 self._match(TokenType.IS) 1640 1641 return self.expression( 1642 exp.Comment, 1643 this=this, 1644 kind=kind.text, 1645 expression=self._parse_string(), 1646 exists=exists, 1647 materialized=materialized, 1648 ) 1649 1650 def _parse_to_table( 1651 self, 1652 ) -> exp.ToTableProperty: 1653 table = self._parse_table_parts(schema=True) 1654 return self.expression(exp.ToTableProperty, this=table) 1655 1656 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1657 def _parse_ttl(self) -> exp.Expression: 1658 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1659 this = self._parse_bitwise() 1660 1661 if self._match_text_seq("DELETE"): 1662 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1663 if self._match_text_seq("RECOMPRESS"): 1664 return self.expression( 1665 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1666 ) 1667 if self._match_text_seq("TO", "DISK"): 1668 return self.expression( 1669 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1670 ) 1671 if self._match_text_seq("TO", "VOLUME"): 1672 return self.expression( 1673 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1674 ) 1675 1676 return this 1677 1678 expressions = self._parse_csv(_parse_ttl_action) 1679 where = self._parse_where() 1680 group = self._parse_group() 1681 1682 aggregates = None 1683 if group and self._match(TokenType.SET): 1684 aggregates = self._parse_csv(self._parse_set_item) 1685 1686 return self.expression( 1687 exp.MergeTreeTTL, 1688 expressions=expressions, 1689 where=where, 1690 group=group, 1691 aggregates=aggregates, 1692 ) 1693 1694 def _parse_statement(self) -> t.Optional[exp.Expression]: 1695 if self._curr is None: 1696 return None 1697 1698 if self._match_set(self.STATEMENT_PARSERS): 1699 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1700 1701 if self._match_set(self.dialect.tokenizer.COMMANDS): 1702 return self._parse_command() 1703 1704 expression = self._parse_expression() 1705 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1706 return self._parse_query_modifiers(expression) 1707 1708 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1709 start = self._prev 1710 temporary = self._match(TokenType.TEMPORARY) 1711 materialized = self._match_text_seq("MATERIALIZED") 1712 1713 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1714 if not kind: 1715 return self._parse_as_command(start) 1716 1717 concurrently = self._match_text_seq("CONCURRENTLY") 1718 if_exists = exists or self._parse_exists() 1719 table = self._parse_table_parts( 1720 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1721 ) 1722 1723 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1724 1725 if self._match(TokenType.L_PAREN, advance=False): 1726 expressions = self._parse_wrapped_csv(self._parse_types) 1727 else: 1728 expressions = None 1729 1730 return self.expression( 1731 exp.Drop, 1732 comments=start.comments, 1733 exists=if_exists, 1734 this=table, 1735 expressions=expressions, 1736 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1737 temporary=temporary, 1738 materialized=materialized, 1739 cascade=self._match_text_seq("CASCADE"), 1740 constraints=self._match_text_seq("CONSTRAINTS"), 1741 purge=self._match_text_seq("PURGE"), 1742 cluster=cluster, 1743 concurrently=concurrently, 1744 ) 1745 1746 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1747 return ( 1748 self._match_text_seq("IF") 1749 and (not not_ or self._match(TokenType.NOT)) 1750 and self._match(TokenType.EXISTS) 1751 ) 1752 1753 def _parse_create(self) -> exp.Create | exp.Command: 1754 # Note: this can't be None because we've matched a statement parser 1755 start = self._prev 1756 comments = self._prev_comments 1757 1758 replace = ( 1759 start.token_type == TokenType.REPLACE 1760 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1761 or self._match_pair(TokenType.OR, TokenType.ALTER) 1762 ) 1763 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1764 1765 unique = self._match(TokenType.UNIQUE) 1766 1767 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1768 clustered = True 1769 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1770 "COLUMNSTORE" 1771 ): 1772 clustered = False 1773 else: 1774 clustered = None 1775 1776 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1777 self._advance() 1778 1779 properties = None 1780 create_token = self._match_set(self.CREATABLES) and self._prev 1781 1782 if not create_token: 1783 # exp.Properties.Location.POST_CREATE 1784 properties = self._parse_properties() 1785 create_token = self._match_set(self.CREATABLES) and self._prev 1786 1787 if not properties or not create_token: 1788 return self._parse_as_command(start) 1789 1790 concurrently = self._match_text_seq("CONCURRENTLY") 1791 exists = self._parse_exists(not_=True) 1792 this = None 1793 expression: t.Optional[exp.Expression] = None 1794 indexes = None 1795 no_schema_binding = None 1796 begin = None 1797 end = None 1798 clone = None 1799 1800 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1801 nonlocal properties 1802 if properties and temp_props: 1803 properties.expressions.extend(temp_props.expressions) 1804 elif temp_props: 1805 properties = temp_props 1806 1807 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1808 this = self._parse_user_defined_function(kind=create_token.token_type) 1809 1810 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1811 extend_props(self._parse_properties()) 1812 1813 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1814 extend_props(self._parse_properties()) 1815 1816 if not expression: 1817 if self._match(TokenType.COMMAND): 1818 expression = self._parse_as_command(self._prev) 1819 else: 1820 begin = self._match(TokenType.BEGIN) 1821 return_ = self._match_text_seq("RETURN") 1822 1823 if self._match(TokenType.STRING, advance=False): 1824 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1825 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1826 expression = self._parse_string() 1827 extend_props(self._parse_properties()) 1828 else: 1829 expression = self._parse_statement() 1830 1831 end = self._match_text_seq("END") 1832 1833 if return_: 1834 expression = self.expression(exp.Return, this=expression) 1835 elif create_token.token_type == TokenType.INDEX: 1836 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1837 if not self._match(TokenType.ON): 1838 index = self._parse_id_var() 1839 anonymous = False 1840 else: 1841 index = None 1842 anonymous = True 1843 1844 this = self._parse_index(index=index, anonymous=anonymous) 1845 elif create_token.token_type in self.DB_CREATABLES: 1846 table_parts = self._parse_table_parts( 1847 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1848 ) 1849 1850 # exp.Properties.Location.POST_NAME 1851 self._match(TokenType.COMMA) 1852 extend_props(self._parse_properties(before=True)) 1853 1854 this = self._parse_schema(this=table_parts) 1855 1856 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1857 extend_props(self._parse_properties()) 1858 1859 self._match(TokenType.ALIAS) 1860 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1861 # exp.Properties.Location.POST_ALIAS 1862 extend_props(self._parse_properties()) 1863 1864 if create_token.token_type == TokenType.SEQUENCE: 1865 expression = self._parse_types() 1866 extend_props(self._parse_properties()) 1867 else: 1868 expression = self._parse_ddl_select() 1869 1870 if create_token.token_type == TokenType.TABLE: 1871 # exp.Properties.Location.POST_EXPRESSION 1872 extend_props(self._parse_properties()) 1873 1874 indexes = [] 1875 while True: 1876 index = self._parse_index() 1877 1878 # exp.Properties.Location.POST_INDEX 1879 extend_props(self._parse_properties()) 1880 if not index: 1881 break 1882 else: 1883 self._match(TokenType.COMMA) 1884 indexes.append(index) 1885 elif create_token.token_type == TokenType.VIEW: 1886 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1887 no_schema_binding = True 1888 1889 shallow = self._match_text_seq("SHALLOW") 1890 1891 if self._match_texts(self.CLONE_KEYWORDS): 1892 copy = self._prev.text.lower() == "copy" 1893 clone = self.expression( 1894 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1895 ) 1896 1897 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1898 return self._parse_as_command(start) 1899 1900 create_kind_text = create_token.text.upper() 1901 return self.expression( 1902 exp.Create, 1903 comments=comments, 1904 this=this, 1905 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1906 replace=replace, 1907 refresh=refresh, 1908 unique=unique, 1909 expression=expression, 1910 exists=exists, 1911 properties=properties, 1912 indexes=indexes, 1913 no_schema_binding=no_schema_binding, 1914 begin=begin, 1915 end=end, 1916 clone=clone, 1917 concurrently=concurrently, 1918 clustered=clustered, 1919 ) 1920 1921 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1922 seq = exp.SequenceProperties() 1923 1924 options = [] 1925 index = self._index 1926 1927 while self._curr: 1928 self._match(TokenType.COMMA) 1929 if self._match_text_seq("INCREMENT"): 1930 self._match_text_seq("BY") 1931 self._match_text_seq("=") 1932 seq.set("increment", self._parse_term()) 1933 elif self._match_text_seq("MINVALUE"): 1934 seq.set("minvalue", self._parse_term()) 1935 elif self._match_text_seq("MAXVALUE"): 1936 seq.set("maxvalue", self._parse_term()) 1937 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1938 self._match_text_seq("=") 1939 seq.set("start", self._parse_term()) 1940 elif self._match_text_seq("CACHE"): 1941 # T-SQL allows empty CACHE which is initialized dynamically 1942 seq.set("cache", self._parse_number() or True) 1943 elif self._match_text_seq("OWNED", "BY"): 1944 # "OWNED BY NONE" is the default 1945 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1946 else: 1947 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1948 if opt: 1949 options.append(opt) 1950 else: 1951 break 1952 1953 seq.set("options", options if options else None) 1954 return None if self._index == index else seq 1955 1956 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1957 # only used for teradata currently 1958 self._match(TokenType.COMMA) 1959 1960 kwargs = { 1961 "no": self._match_text_seq("NO"), 1962 "dual": self._match_text_seq("DUAL"), 1963 "before": self._match_text_seq("BEFORE"), 1964 "default": self._match_text_seq("DEFAULT"), 1965 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1966 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1967 "after": self._match_text_seq("AFTER"), 1968 "minimum": self._match_texts(("MIN", "MINIMUM")), 1969 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1970 } 1971 1972 if self._match_texts(self.PROPERTY_PARSERS): 1973 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1974 try: 1975 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1976 except TypeError: 1977 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1978 1979 return None 1980 1981 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1982 return self._parse_wrapped_csv(self._parse_property) 1983 1984 def _parse_property(self) -> t.Optional[exp.Expression]: 1985 if self._match_texts(self.PROPERTY_PARSERS): 1986 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1987 1988 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1989 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1990 1991 if self._match_text_seq("COMPOUND", "SORTKEY"): 1992 return self._parse_sortkey(compound=True) 1993 1994 if self._match_text_seq("SQL", "SECURITY"): 1995 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1996 1997 index = self._index 1998 key = self._parse_column() 1999 2000 if not self._match(TokenType.EQ): 2001 self._retreat(index) 2002 return self._parse_sequence_properties() 2003 2004 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2005 if isinstance(key, exp.Column): 2006 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2007 2008 value = self._parse_bitwise() or self._parse_var(any_token=True) 2009 2010 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2011 if isinstance(value, exp.Column): 2012 value = exp.var(value.name) 2013 2014 return self.expression(exp.Property, this=key, value=value) 2015 2016 def _parse_stored(self) -> exp.FileFormatProperty: 2017 self._match(TokenType.ALIAS) 2018 2019 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2020 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2021 2022 return self.expression( 2023 exp.FileFormatProperty, 2024 this=( 2025 self.expression( 2026 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2027 ) 2028 if input_format or output_format 2029 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2030 ), 2031 ) 2032 2033 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2034 field = self._parse_field() 2035 if isinstance(field, exp.Identifier) and not field.quoted: 2036 field = exp.var(field) 2037 2038 return field 2039 2040 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2041 self._match(TokenType.EQ) 2042 self._match(TokenType.ALIAS) 2043 2044 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2045 2046 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2047 properties = [] 2048 while True: 2049 if before: 2050 prop = self._parse_property_before() 2051 else: 2052 prop = self._parse_property() 2053 if not prop: 2054 break 2055 for p in ensure_list(prop): 2056 properties.append(p) 2057 2058 if properties: 2059 return self.expression(exp.Properties, expressions=properties) 2060 2061 return None 2062 2063 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2064 return self.expression( 2065 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2066 ) 2067 2068 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2069 if self._match_texts(("DEFINER", "INVOKER")): 2070 security_specifier = self._prev.text.upper() 2071 return self.expression(exp.SecurityProperty, this=security_specifier) 2072 return None 2073 2074 def _parse_settings_property(self) -> exp.SettingsProperty: 2075 return self.expression( 2076 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2077 ) 2078 2079 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2080 if self._index >= 2: 2081 pre_volatile_token = self._tokens[self._index - 2] 2082 else: 2083 pre_volatile_token = None 2084 2085 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2086 return exp.VolatileProperty() 2087 2088 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2089 2090 def _parse_retention_period(self) -> exp.Var: 2091 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2092 number = self._parse_number() 2093 number_str = f"{number} " if number else "" 2094 unit = self._parse_var(any_token=True) 2095 return exp.var(f"{number_str}{unit}") 2096 2097 def _parse_system_versioning_property( 2098 self, with_: bool = False 2099 ) -> exp.WithSystemVersioningProperty: 2100 self._match(TokenType.EQ) 2101 prop = self.expression( 2102 exp.WithSystemVersioningProperty, 2103 **{ # type: ignore 2104 "on": True, 2105 "with": with_, 2106 }, 2107 ) 2108 2109 if self._match_text_seq("OFF"): 2110 prop.set("on", False) 2111 return prop 2112 2113 self._match(TokenType.ON) 2114 if self._match(TokenType.L_PAREN): 2115 while self._curr and not self._match(TokenType.R_PAREN): 2116 if self._match_text_seq("HISTORY_TABLE", "="): 2117 prop.set("this", self._parse_table_parts()) 2118 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2119 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2120 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2121 prop.set("retention_period", self._parse_retention_period()) 2122 2123 self._match(TokenType.COMMA) 2124 2125 return prop 2126 2127 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2128 self._match(TokenType.EQ) 2129 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2130 prop = self.expression(exp.DataDeletionProperty, on=on) 2131 2132 if self._match(TokenType.L_PAREN): 2133 while self._curr and not self._match(TokenType.R_PAREN): 2134 if self._match_text_seq("FILTER_COLUMN", "="): 2135 prop.set("filter_column", self._parse_column()) 2136 elif self._match_text_seq("RETENTION_PERIOD", "="): 2137 prop.set("retention_period", self._parse_retention_period()) 2138 2139 self._match(TokenType.COMMA) 2140 2141 return prop 2142 2143 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2144 kind = "HASH" 2145 expressions: t.Optional[t.List[exp.Expression]] = None 2146 if self._match_text_seq("BY", "HASH"): 2147 expressions = self._parse_wrapped_csv(self._parse_id_var) 2148 elif self._match_text_seq("BY", "RANDOM"): 2149 kind = "RANDOM" 2150 2151 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2152 buckets: t.Optional[exp.Expression] = None 2153 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2154 buckets = self._parse_number() 2155 2156 return self.expression( 2157 exp.DistributedByProperty, 2158 expressions=expressions, 2159 kind=kind, 2160 buckets=buckets, 2161 order=self._parse_order(), 2162 ) 2163 2164 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2165 self._match_text_seq("KEY") 2166 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2167 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2168 2169 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2170 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2171 prop = self._parse_system_versioning_property(with_=True) 2172 self._match_r_paren() 2173 return prop 2174 2175 if self._match(TokenType.L_PAREN, advance=False): 2176 return self._parse_wrapped_properties() 2177 2178 if self._match_text_seq("JOURNAL"): 2179 return self._parse_withjournaltable() 2180 2181 if self._match_texts(self.VIEW_ATTRIBUTES): 2182 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2183 2184 if self._match_text_seq("DATA"): 2185 return self._parse_withdata(no=False) 2186 elif self._match_text_seq("NO", "DATA"): 2187 return self._parse_withdata(no=True) 2188 2189 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2190 return self._parse_serde_properties(with_=True) 2191 2192 if self._match(TokenType.SCHEMA): 2193 return self.expression( 2194 exp.WithSchemaBindingProperty, 2195 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2196 ) 2197 2198 if not self._next: 2199 return None 2200 2201 return self._parse_withisolatedloading() 2202 2203 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2204 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2205 self._match(TokenType.EQ) 2206 2207 user = self._parse_id_var() 2208 self._match(TokenType.PARAMETER) 2209 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2210 2211 if not user or not host: 2212 return None 2213 2214 return exp.DefinerProperty(this=f"{user}@{host}") 2215 2216 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2217 self._match(TokenType.TABLE) 2218 self._match(TokenType.EQ) 2219 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2220 2221 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2222 return self.expression(exp.LogProperty, no=no) 2223 2224 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2225 return self.expression(exp.JournalProperty, **kwargs) 2226 2227 def _parse_checksum(self) -> exp.ChecksumProperty: 2228 self._match(TokenType.EQ) 2229 2230 on = None 2231 if self._match(TokenType.ON): 2232 on = True 2233 elif self._match_text_seq("OFF"): 2234 on = False 2235 2236 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2237 2238 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2239 return self.expression( 2240 exp.Cluster, 2241 expressions=( 2242 self._parse_wrapped_csv(self._parse_ordered) 2243 if wrapped 2244 else self._parse_csv(self._parse_ordered) 2245 ), 2246 ) 2247 2248 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2249 self._match_text_seq("BY") 2250 2251 self._match_l_paren() 2252 expressions = self._parse_csv(self._parse_column) 2253 self._match_r_paren() 2254 2255 if self._match_text_seq("SORTED", "BY"): 2256 self._match_l_paren() 2257 sorted_by = self._parse_csv(self._parse_ordered) 2258 self._match_r_paren() 2259 else: 2260 sorted_by = None 2261 2262 self._match(TokenType.INTO) 2263 buckets = self._parse_number() 2264 self._match_text_seq("BUCKETS") 2265 2266 return self.expression( 2267 exp.ClusteredByProperty, 2268 expressions=expressions, 2269 sorted_by=sorted_by, 2270 buckets=buckets, 2271 ) 2272 2273 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2274 if not self._match_text_seq("GRANTS"): 2275 self._retreat(self._index - 1) 2276 return None 2277 2278 return self.expression(exp.CopyGrantsProperty) 2279 2280 def _parse_freespace(self) -> exp.FreespaceProperty: 2281 self._match(TokenType.EQ) 2282 return self.expression( 2283 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2284 ) 2285 2286 def _parse_mergeblockratio( 2287 self, no: bool = False, default: bool = False 2288 ) -> exp.MergeBlockRatioProperty: 2289 if self._match(TokenType.EQ): 2290 return self.expression( 2291 exp.MergeBlockRatioProperty, 2292 this=self._parse_number(), 2293 percent=self._match(TokenType.PERCENT), 2294 ) 2295 2296 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2297 2298 def _parse_datablocksize( 2299 self, 2300 default: t.Optional[bool] = None, 2301 minimum: t.Optional[bool] = None, 2302 maximum: t.Optional[bool] = None, 2303 ) -> exp.DataBlocksizeProperty: 2304 self._match(TokenType.EQ) 2305 size = self._parse_number() 2306 2307 units = None 2308 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2309 units = self._prev.text 2310 2311 return self.expression( 2312 exp.DataBlocksizeProperty, 2313 size=size, 2314 units=units, 2315 default=default, 2316 minimum=minimum, 2317 maximum=maximum, 2318 ) 2319 2320 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2321 self._match(TokenType.EQ) 2322 always = self._match_text_seq("ALWAYS") 2323 manual = self._match_text_seq("MANUAL") 2324 never = self._match_text_seq("NEVER") 2325 default = self._match_text_seq("DEFAULT") 2326 2327 autotemp = None 2328 if self._match_text_seq("AUTOTEMP"): 2329 autotemp = self._parse_schema() 2330 2331 return self.expression( 2332 exp.BlockCompressionProperty, 2333 always=always, 2334 manual=manual, 2335 never=never, 2336 default=default, 2337 autotemp=autotemp, 2338 ) 2339 2340 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2341 index = self._index 2342 no = self._match_text_seq("NO") 2343 concurrent = self._match_text_seq("CONCURRENT") 2344 2345 if not self._match_text_seq("ISOLATED", "LOADING"): 2346 self._retreat(index) 2347 return None 2348 2349 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2350 return self.expression( 2351 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2352 ) 2353 2354 def _parse_locking(self) -> exp.LockingProperty: 2355 if self._match(TokenType.TABLE): 2356 kind = "TABLE" 2357 elif self._match(TokenType.VIEW): 2358 kind = "VIEW" 2359 elif self._match(TokenType.ROW): 2360 kind = "ROW" 2361 elif self._match_text_seq("DATABASE"): 2362 kind = "DATABASE" 2363 else: 2364 kind = None 2365 2366 if kind in ("DATABASE", "TABLE", "VIEW"): 2367 this = self._parse_table_parts() 2368 else: 2369 this = None 2370 2371 if self._match(TokenType.FOR): 2372 for_or_in = "FOR" 2373 elif self._match(TokenType.IN): 2374 for_or_in = "IN" 2375 else: 2376 for_or_in = None 2377 2378 if self._match_text_seq("ACCESS"): 2379 lock_type = "ACCESS" 2380 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2381 lock_type = "EXCLUSIVE" 2382 elif self._match_text_seq("SHARE"): 2383 lock_type = "SHARE" 2384 elif self._match_text_seq("READ"): 2385 lock_type = "READ" 2386 elif self._match_text_seq("WRITE"): 2387 lock_type = "WRITE" 2388 elif self._match_text_seq("CHECKSUM"): 2389 lock_type = "CHECKSUM" 2390 else: 2391 lock_type = None 2392 2393 override = self._match_text_seq("OVERRIDE") 2394 2395 return self.expression( 2396 exp.LockingProperty, 2397 this=this, 2398 kind=kind, 2399 for_or_in=for_or_in, 2400 lock_type=lock_type, 2401 override=override, 2402 ) 2403 2404 def _parse_partition_by(self) -> t.List[exp.Expression]: 2405 if self._match(TokenType.PARTITION_BY): 2406 return self._parse_csv(self._parse_assignment) 2407 return [] 2408 2409 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2410 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2411 if self._match_text_seq("MINVALUE"): 2412 return exp.var("MINVALUE") 2413 if self._match_text_seq("MAXVALUE"): 2414 return exp.var("MAXVALUE") 2415 return self._parse_bitwise() 2416 2417 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2418 expression = None 2419 from_expressions = None 2420 to_expressions = None 2421 2422 if self._match(TokenType.IN): 2423 this = self._parse_wrapped_csv(self._parse_bitwise) 2424 elif self._match(TokenType.FROM): 2425 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2426 self._match_text_seq("TO") 2427 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2428 elif self._match_text_seq("WITH", "(", "MODULUS"): 2429 this = self._parse_number() 2430 self._match_text_seq(",", "REMAINDER") 2431 expression = self._parse_number() 2432 self._match_r_paren() 2433 else: 2434 self.raise_error("Failed to parse partition bound spec.") 2435 2436 return self.expression( 2437 exp.PartitionBoundSpec, 2438 this=this, 2439 expression=expression, 2440 from_expressions=from_expressions, 2441 to_expressions=to_expressions, 2442 ) 2443 2444 # https://www.postgresql.org/docs/current/sql-createtable.html 2445 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2446 if not self._match_text_seq("OF"): 2447 self._retreat(self._index - 1) 2448 return None 2449 2450 this = self._parse_table(schema=True) 2451 2452 if self._match(TokenType.DEFAULT): 2453 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2454 elif self._match_text_seq("FOR", "VALUES"): 2455 expression = self._parse_partition_bound_spec() 2456 else: 2457 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2458 2459 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2460 2461 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2462 self._match(TokenType.EQ) 2463 return self.expression( 2464 exp.PartitionedByProperty, 2465 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2466 ) 2467 2468 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2469 if self._match_text_seq("AND", "STATISTICS"): 2470 statistics = True 2471 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2472 statistics = False 2473 else: 2474 statistics = None 2475 2476 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2477 2478 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2479 if self._match_text_seq("SQL"): 2480 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2481 return None 2482 2483 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2484 if self._match_text_seq("SQL", "DATA"): 2485 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2486 return None 2487 2488 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2489 if self._match_text_seq("PRIMARY", "INDEX"): 2490 return exp.NoPrimaryIndexProperty() 2491 if self._match_text_seq("SQL"): 2492 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2493 return None 2494 2495 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2496 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2497 return exp.OnCommitProperty() 2498 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2499 return exp.OnCommitProperty(delete=True) 2500 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2501 2502 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2503 if self._match_text_seq("SQL", "DATA"): 2504 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2505 return None 2506 2507 def _parse_distkey(self) -> exp.DistKeyProperty: 2508 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2509 2510 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2511 table = self._parse_table(schema=True) 2512 2513 options = [] 2514 while self._match_texts(("INCLUDING", "EXCLUDING")): 2515 this = self._prev.text.upper() 2516 2517 id_var = self._parse_id_var() 2518 if not id_var: 2519 return None 2520 2521 options.append( 2522 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2523 ) 2524 2525 return self.expression(exp.LikeProperty, this=table, expressions=options) 2526 2527 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2528 return self.expression( 2529 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2530 ) 2531 2532 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2533 self._match(TokenType.EQ) 2534 return self.expression( 2535 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2536 ) 2537 2538 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2539 self._match_text_seq("WITH", "CONNECTION") 2540 return self.expression( 2541 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2542 ) 2543 2544 def _parse_returns(self) -> exp.ReturnsProperty: 2545 value: t.Optional[exp.Expression] 2546 null = None 2547 is_table = self._match(TokenType.TABLE) 2548 2549 if is_table: 2550 if self._match(TokenType.LT): 2551 value = self.expression( 2552 exp.Schema, 2553 this="TABLE", 2554 expressions=self._parse_csv(self._parse_struct_types), 2555 ) 2556 if not self._match(TokenType.GT): 2557 self.raise_error("Expecting >") 2558 else: 2559 value = self._parse_schema(exp.var("TABLE")) 2560 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2561 null = True 2562 value = None 2563 else: 2564 value = self._parse_types() 2565 2566 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2567 2568 def _parse_describe(self) -> exp.Describe: 2569 kind = self._match_set(self.CREATABLES) and self._prev.text 2570 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2571 if self._match(TokenType.DOT): 2572 style = None 2573 self._retreat(self._index - 2) 2574 this = self._parse_table(schema=True) 2575 properties = self._parse_properties() 2576 expressions = properties.expressions if properties else None 2577 partition = self._parse_partition() 2578 return self.expression( 2579 exp.Describe, 2580 this=this, 2581 style=style, 2582 kind=kind, 2583 expressions=expressions, 2584 partition=partition, 2585 ) 2586 2587 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2588 kind = self._prev.text.upper() 2589 expressions = [] 2590 2591 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2592 if self._match(TokenType.WHEN): 2593 expression = self._parse_disjunction() 2594 self._match(TokenType.THEN) 2595 else: 2596 expression = None 2597 2598 else_ = self._match(TokenType.ELSE) 2599 2600 if not self._match(TokenType.INTO): 2601 return None 2602 2603 return self.expression( 2604 exp.ConditionalInsert, 2605 this=self.expression( 2606 exp.Insert, 2607 this=self._parse_table(schema=True), 2608 expression=self._parse_derived_table_values(), 2609 ), 2610 expression=expression, 2611 else_=else_, 2612 ) 2613 2614 expression = parse_conditional_insert() 2615 while expression is not None: 2616 expressions.append(expression) 2617 expression = parse_conditional_insert() 2618 2619 return self.expression( 2620 exp.MultitableInserts, 2621 kind=kind, 2622 comments=comments, 2623 expressions=expressions, 2624 source=self._parse_table(), 2625 ) 2626 2627 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2628 comments = ensure_list(self._prev_comments) 2629 hint = self._parse_hint() 2630 overwrite = self._match(TokenType.OVERWRITE) 2631 ignore = self._match(TokenType.IGNORE) 2632 local = self._match_text_seq("LOCAL") 2633 alternative = None 2634 is_function = None 2635 2636 if self._match_text_seq("DIRECTORY"): 2637 this: t.Optional[exp.Expression] = self.expression( 2638 exp.Directory, 2639 this=self._parse_var_or_string(), 2640 local=local, 2641 row_format=self._parse_row_format(match_row=True), 2642 ) 2643 else: 2644 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2645 comments += ensure_list(self._prev_comments) 2646 return self._parse_multitable_inserts(comments) 2647 2648 if self._match(TokenType.OR): 2649 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2650 2651 self._match(TokenType.INTO) 2652 comments += ensure_list(self._prev_comments) 2653 self._match(TokenType.TABLE) 2654 is_function = self._match(TokenType.FUNCTION) 2655 2656 this = ( 2657 self._parse_table(schema=True, parse_partition=True) 2658 if not is_function 2659 else self._parse_function() 2660 ) 2661 2662 returning = self._parse_returning() 2663 2664 return self.expression( 2665 exp.Insert, 2666 comments=comments, 2667 hint=hint, 2668 is_function=is_function, 2669 this=this, 2670 stored=self._match_text_seq("STORED") and self._parse_stored(), 2671 by_name=self._match_text_seq("BY", "NAME"), 2672 exists=self._parse_exists(), 2673 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2674 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2675 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2676 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2677 conflict=self._parse_on_conflict(), 2678 returning=returning or self._parse_returning(), 2679 overwrite=overwrite, 2680 alternative=alternative, 2681 ignore=ignore, 2682 source=self._match(TokenType.TABLE) and self._parse_table(), 2683 ) 2684 2685 def _parse_kill(self) -> exp.Kill: 2686 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2687 2688 return self.expression( 2689 exp.Kill, 2690 this=self._parse_primary(), 2691 kind=kind, 2692 ) 2693 2694 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2695 conflict = self._match_text_seq("ON", "CONFLICT") 2696 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2697 2698 if not conflict and not duplicate: 2699 return None 2700 2701 conflict_keys = None 2702 constraint = None 2703 2704 if conflict: 2705 if self._match_text_seq("ON", "CONSTRAINT"): 2706 constraint = self._parse_id_var() 2707 elif self._match(TokenType.L_PAREN): 2708 conflict_keys = self._parse_csv(self._parse_id_var) 2709 self._match_r_paren() 2710 2711 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2712 if self._prev.token_type == TokenType.UPDATE: 2713 self._match(TokenType.SET) 2714 expressions = self._parse_csv(self._parse_equality) 2715 else: 2716 expressions = None 2717 2718 return self.expression( 2719 exp.OnConflict, 2720 duplicate=duplicate, 2721 expressions=expressions, 2722 action=action, 2723 conflict_keys=conflict_keys, 2724 constraint=constraint, 2725 ) 2726 2727 def _parse_returning(self) -> t.Optional[exp.Returning]: 2728 if not self._match(TokenType.RETURNING): 2729 return None 2730 return self.expression( 2731 exp.Returning, 2732 expressions=self._parse_csv(self._parse_expression), 2733 into=self._match(TokenType.INTO) and self._parse_table_part(), 2734 ) 2735 2736 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2737 if not self._match(TokenType.FORMAT): 2738 return None 2739 return self._parse_row_format() 2740 2741 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2742 index = self._index 2743 with_ = with_ or self._match_text_seq("WITH") 2744 2745 if not self._match(TokenType.SERDE_PROPERTIES): 2746 self._retreat(index) 2747 return None 2748 return self.expression( 2749 exp.SerdeProperties, 2750 **{ # type: ignore 2751 "expressions": self._parse_wrapped_properties(), 2752 "with": with_, 2753 }, 2754 ) 2755 2756 def _parse_row_format( 2757 self, match_row: bool = False 2758 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2759 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2760 return None 2761 2762 if self._match_text_seq("SERDE"): 2763 this = self._parse_string() 2764 2765 serde_properties = self._parse_serde_properties() 2766 2767 return self.expression( 2768 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2769 ) 2770 2771 self._match_text_seq("DELIMITED") 2772 2773 kwargs = {} 2774 2775 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2776 kwargs["fields"] = self._parse_string() 2777 if self._match_text_seq("ESCAPED", "BY"): 2778 kwargs["escaped"] = self._parse_string() 2779 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2780 kwargs["collection_items"] = self._parse_string() 2781 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2782 kwargs["map_keys"] = self._parse_string() 2783 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2784 kwargs["lines"] = self._parse_string() 2785 if self._match_text_seq("NULL", "DEFINED", "AS"): 2786 kwargs["null"] = self._parse_string() 2787 2788 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2789 2790 def _parse_load(self) -> exp.LoadData | exp.Command: 2791 if self._match_text_seq("DATA"): 2792 local = self._match_text_seq("LOCAL") 2793 self._match_text_seq("INPATH") 2794 inpath = self._parse_string() 2795 overwrite = self._match(TokenType.OVERWRITE) 2796 self._match_pair(TokenType.INTO, TokenType.TABLE) 2797 2798 return self.expression( 2799 exp.LoadData, 2800 this=self._parse_table(schema=True), 2801 local=local, 2802 overwrite=overwrite, 2803 inpath=inpath, 2804 partition=self._parse_partition(), 2805 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2806 serde=self._match_text_seq("SERDE") and self._parse_string(), 2807 ) 2808 return self._parse_as_command(self._prev) 2809 2810 def _parse_delete(self) -> exp.Delete: 2811 # This handles MySQL's "Multiple-Table Syntax" 2812 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2813 tables = None 2814 comments = self._prev_comments 2815 if not self._match(TokenType.FROM, advance=False): 2816 tables = self._parse_csv(self._parse_table) or None 2817 2818 returning = self._parse_returning() 2819 2820 return self.expression( 2821 exp.Delete, 2822 comments=comments, 2823 tables=tables, 2824 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2825 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2826 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2827 where=self._parse_where(), 2828 returning=returning or self._parse_returning(), 2829 limit=self._parse_limit(), 2830 ) 2831 2832 def _parse_update(self) -> exp.Update: 2833 comments = self._prev_comments 2834 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2835 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2836 returning = self._parse_returning() 2837 return self.expression( 2838 exp.Update, 2839 comments=comments, 2840 **{ # type: ignore 2841 "this": this, 2842 "expressions": expressions, 2843 "from": self._parse_from(joins=True), 2844 "where": self._parse_where(), 2845 "returning": returning or self._parse_returning(), 2846 "order": self._parse_order(), 2847 "limit": self._parse_limit(), 2848 }, 2849 ) 2850 2851 def _parse_uncache(self) -> exp.Uncache: 2852 if not self._match(TokenType.TABLE): 2853 self.raise_error("Expecting TABLE after UNCACHE") 2854 2855 return self.expression( 2856 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2857 ) 2858 2859 def _parse_cache(self) -> exp.Cache: 2860 lazy = self._match_text_seq("LAZY") 2861 self._match(TokenType.TABLE) 2862 table = self._parse_table(schema=True) 2863 2864 options = [] 2865 if self._match_text_seq("OPTIONS"): 2866 self._match_l_paren() 2867 k = self._parse_string() 2868 self._match(TokenType.EQ) 2869 v = self._parse_string() 2870 options = [k, v] 2871 self._match_r_paren() 2872 2873 self._match(TokenType.ALIAS) 2874 return self.expression( 2875 exp.Cache, 2876 this=table, 2877 lazy=lazy, 2878 options=options, 2879 expression=self._parse_select(nested=True), 2880 ) 2881 2882 def _parse_partition(self) -> t.Optional[exp.Partition]: 2883 if not self._match(TokenType.PARTITION): 2884 return None 2885 2886 return self.expression( 2887 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2888 ) 2889 2890 def _parse_value(self) -> t.Optional[exp.Tuple]: 2891 if self._match(TokenType.L_PAREN): 2892 expressions = self._parse_csv(self._parse_expression) 2893 self._match_r_paren() 2894 return self.expression(exp.Tuple, expressions=expressions) 2895 2896 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2897 expression = self._parse_expression() 2898 if expression: 2899 return self.expression(exp.Tuple, expressions=[expression]) 2900 return None 2901 2902 def _parse_projections(self) -> t.List[exp.Expression]: 2903 return self._parse_expressions() 2904 2905 def _parse_select( 2906 self, 2907 nested: bool = False, 2908 table: bool = False, 2909 parse_subquery_alias: bool = True, 2910 parse_set_operation: bool = True, 2911 ) -> t.Optional[exp.Expression]: 2912 cte = self._parse_with() 2913 2914 if cte: 2915 this = self._parse_statement() 2916 2917 if not this: 2918 self.raise_error("Failed to parse any statement following CTE") 2919 return cte 2920 2921 if "with" in this.arg_types: 2922 this.set("with", cte) 2923 else: 2924 self.raise_error(f"{this.key} does not support CTE") 2925 this = cte 2926 2927 return this 2928 2929 # duckdb supports leading with FROM x 2930 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2931 2932 if self._match(TokenType.SELECT): 2933 comments = self._prev_comments 2934 2935 hint = self._parse_hint() 2936 2937 if self._next and not self._next.token_type == TokenType.DOT: 2938 all_ = self._match(TokenType.ALL) 2939 distinct = self._match_set(self.DISTINCT_TOKENS) 2940 else: 2941 all_, distinct = None, None 2942 2943 kind = ( 2944 self._match(TokenType.ALIAS) 2945 and self._match_texts(("STRUCT", "VALUE")) 2946 and self._prev.text.upper() 2947 ) 2948 2949 if distinct: 2950 distinct = self.expression( 2951 exp.Distinct, 2952 on=self._parse_value() if self._match(TokenType.ON) else None, 2953 ) 2954 2955 if all_ and distinct: 2956 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2957 2958 limit = self._parse_limit(top=True) 2959 projections = self._parse_projections() 2960 2961 this = self.expression( 2962 exp.Select, 2963 kind=kind, 2964 hint=hint, 2965 distinct=distinct, 2966 expressions=projections, 2967 limit=limit, 2968 ) 2969 this.comments = comments 2970 2971 into = self._parse_into() 2972 if into: 2973 this.set("into", into) 2974 2975 if not from_: 2976 from_ = self._parse_from() 2977 2978 if from_: 2979 this.set("from", from_) 2980 2981 this = self._parse_query_modifiers(this) 2982 elif (table or nested) and self._match(TokenType.L_PAREN): 2983 if self._match(TokenType.PIVOT): 2984 this = self._parse_simplified_pivot() 2985 elif self._match(TokenType.FROM): 2986 this = exp.select("*").from_( 2987 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2988 ) 2989 else: 2990 this = ( 2991 self._parse_table() 2992 if table 2993 else self._parse_select(nested=True, parse_set_operation=False) 2994 ) 2995 2996 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2997 # in case a modifier (e.g. join) is following 2998 if table and isinstance(this, exp.Values) and this.alias: 2999 alias = this.args["alias"].pop() 3000 this = exp.Table(this=this, alias=alias) 3001 3002 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3003 3004 self._match_r_paren() 3005 3006 # We return early here so that the UNION isn't attached to the subquery by the 3007 # following call to _parse_set_operations, but instead becomes the parent node 3008 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3009 elif self._match(TokenType.VALUES, advance=False): 3010 this = self._parse_derived_table_values() 3011 elif from_: 3012 this = exp.select("*").from_(from_.this, copy=False) 3013 elif self._match(TokenType.SUMMARIZE): 3014 table = self._match(TokenType.TABLE) 3015 this = self._parse_select() or self._parse_string() or self._parse_table() 3016 return self.expression(exp.Summarize, this=this, table=table) 3017 elif self._match(TokenType.DESCRIBE): 3018 this = self._parse_describe() 3019 elif self._match_text_seq("STREAM"): 3020 this = self.expression(exp.Stream, this=self._parse_function()) 3021 else: 3022 this = None 3023 3024 return self._parse_set_operations(this) if parse_set_operation else this 3025 3026 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3027 if not skip_with_token and not self._match(TokenType.WITH): 3028 return None 3029 3030 comments = self._prev_comments 3031 recursive = self._match(TokenType.RECURSIVE) 3032 3033 expressions = [] 3034 while True: 3035 expressions.append(self._parse_cte()) 3036 3037 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3038 break 3039 else: 3040 self._match(TokenType.WITH) 3041 3042 return self.expression( 3043 exp.With, comments=comments, expressions=expressions, recursive=recursive 3044 ) 3045 3046 def _parse_cte(self) -> exp.CTE: 3047 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3048 if not alias or not alias.this: 3049 self.raise_error("Expected CTE to have alias") 3050 3051 self._match(TokenType.ALIAS) 3052 comments = self._prev_comments 3053 3054 if self._match_text_seq("NOT", "MATERIALIZED"): 3055 materialized = False 3056 elif self._match_text_seq("MATERIALIZED"): 3057 materialized = True 3058 else: 3059 materialized = None 3060 3061 return self.expression( 3062 exp.CTE, 3063 this=self._parse_wrapped(self._parse_statement), 3064 alias=alias, 3065 materialized=materialized, 3066 comments=comments, 3067 ) 3068 3069 def _parse_table_alias( 3070 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3071 ) -> t.Optional[exp.TableAlias]: 3072 any_token = self._match(TokenType.ALIAS) 3073 alias = ( 3074 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3075 or self._parse_string_as_identifier() 3076 ) 3077 3078 index = self._index 3079 if self._match(TokenType.L_PAREN): 3080 columns = self._parse_csv(self._parse_function_parameter) 3081 self._match_r_paren() if columns else self._retreat(index) 3082 else: 3083 columns = None 3084 3085 if not alias and not columns: 3086 return None 3087 3088 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3089 3090 # We bubble up comments from the Identifier to the TableAlias 3091 if isinstance(alias, exp.Identifier): 3092 table_alias.add_comments(alias.pop_comments()) 3093 3094 return table_alias 3095 3096 def _parse_subquery( 3097 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3098 ) -> t.Optional[exp.Subquery]: 3099 if not this: 3100 return None 3101 3102 return self.expression( 3103 exp.Subquery, 3104 this=this, 3105 pivots=self._parse_pivots(), 3106 alias=self._parse_table_alias() if parse_alias else None, 3107 sample=self._parse_table_sample(), 3108 ) 3109 3110 def _implicit_unnests_to_explicit(self, this: E) -> E: 3111 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3112 3113 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3114 for i, join in enumerate(this.args.get("joins") or []): 3115 table = join.this 3116 normalized_table = table.copy() 3117 normalized_table.meta["maybe_column"] = True 3118 normalized_table = _norm(normalized_table, dialect=self.dialect) 3119 3120 if isinstance(table, exp.Table) and not join.args.get("on"): 3121 if normalized_table.parts[0].name in refs: 3122 table_as_column = table.to_column() 3123 unnest = exp.Unnest(expressions=[table_as_column]) 3124 3125 # Table.to_column creates a parent Alias node that we want to convert to 3126 # a TableAlias and attach to the Unnest, so it matches the parser's output 3127 if isinstance(table.args.get("alias"), exp.TableAlias): 3128 table_as_column.replace(table_as_column.this) 3129 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3130 3131 table.replace(unnest) 3132 3133 refs.add(normalized_table.alias_or_name) 3134 3135 return this 3136 3137 def _parse_query_modifiers( 3138 self, this: t.Optional[exp.Expression] 3139 ) -> t.Optional[exp.Expression]: 3140 if isinstance(this, (exp.Query, exp.Table)): 3141 for join in self._parse_joins(): 3142 this.append("joins", join) 3143 for lateral in iter(self._parse_lateral, None): 3144 this.append("laterals", lateral) 3145 3146 while True: 3147 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3148 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3149 key, expression = parser(self) 3150 3151 if expression: 3152 this.set(key, expression) 3153 if key == "limit": 3154 offset = expression.args.pop("offset", None) 3155 3156 if offset: 3157 offset = exp.Offset(expression=offset) 3158 this.set("offset", offset) 3159 3160 limit_by_expressions = expression.expressions 3161 expression.set("expressions", None) 3162 offset.set("expressions", limit_by_expressions) 3163 continue 3164 break 3165 3166 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3167 this = self._implicit_unnests_to_explicit(this) 3168 3169 return this 3170 3171 def _parse_hint(self) -> t.Optional[exp.Hint]: 3172 if self._match(TokenType.HINT): 3173 hints = [] 3174 for hint in iter( 3175 lambda: self._parse_csv( 3176 lambda: self._parse_function() or self._parse_var(upper=True) 3177 ), 3178 [], 3179 ): 3180 hints.extend(hint) 3181 3182 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3183 self.raise_error("Expected */ after HINT") 3184 3185 return self.expression(exp.Hint, expressions=hints) 3186 3187 return None 3188 3189 def _parse_into(self) -> t.Optional[exp.Into]: 3190 if not self._match(TokenType.INTO): 3191 return None 3192 3193 temp = self._match(TokenType.TEMPORARY) 3194 unlogged = self._match_text_seq("UNLOGGED") 3195 self._match(TokenType.TABLE) 3196 3197 return self.expression( 3198 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3199 ) 3200 3201 def _parse_from( 3202 self, joins: bool = False, skip_from_token: bool = False 3203 ) -> t.Optional[exp.From]: 3204 if not skip_from_token and not self._match(TokenType.FROM): 3205 return None 3206 3207 return self.expression( 3208 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3209 ) 3210 3211 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3212 return self.expression( 3213 exp.MatchRecognizeMeasure, 3214 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3215 this=self._parse_expression(), 3216 ) 3217 3218 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3219 if not self._match(TokenType.MATCH_RECOGNIZE): 3220 return None 3221 3222 self._match_l_paren() 3223 3224 partition = self._parse_partition_by() 3225 order = self._parse_order() 3226 3227 measures = ( 3228 self._parse_csv(self._parse_match_recognize_measure) 3229 if self._match_text_seq("MEASURES") 3230 else None 3231 ) 3232 3233 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3234 rows = exp.var("ONE ROW PER MATCH") 3235 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3236 text = "ALL ROWS PER MATCH" 3237 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3238 text += " SHOW EMPTY MATCHES" 3239 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3240 text += " OMIT EMPTY MATCHES" 3241 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3242 text += " WITH UNMATCHED ROWS" 3243 rows = exp.var(text) 3244 else: 3245 rows = None 3246 3247 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3248 text = "AFTER MATCH SKIP" 3249 if self._match_text_seq("PAST", "LAST", "ROW"): 3250 text += " PAST LAST ROW" 3251 elif self._match_text_seq("TO", "NEXT", "ROW"): 3252 text += " TO NEXT ROW" 3253 elif self._match_text_seq("TO", "FIRST"): 3254 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3255 elif self._match_text_seq("TO", "LAST"): 3256 text += f" TO LAST {self._advance_any().text}" # type: ignore 3257 after = exp.var(text) 3258 else: 3259 after = None 3260 3261 if self._match_text_seq("PATTERN"): 3262 self._match_l_paren() 3263 3264 if not self._curr: 3265 self.raise_error("Expecting )", self._curr) 3266 3267 paren = 1 3268 start = self._curr 3269 3270 while self._curr and paren > 0: 3271 if self._curr.token_type == TokenType.L_PAREN: 3272 paren += 1 3273 if self._curr.token_type == TokenType.R_PAREN: 3274 paren -= 1 3275 3276 end = self._prev 3277 self._advance() 3278 3279 if paren > 0: 3280 self.raise_error("Expecting )", self._curr) 3281 3282 pattern = exp.var(self._find_sql(start, end)) 3283 else: 3284 pattern = None 3285 3286 define = ( 3287 self._parse_csv(self._parse_name_as_expression) 3288 if self._match_text_seq("DEFINE") 3289 else None 3290 ) 3291 3292 self._match_r_paren() 3293 3294 return self.expression( 3295 exp.MatchRecognize, 3296 partition_by=partition, 3297 order=order, 3298 measures=measures, 3299 rows=rows, 3300 after=after, 3301 pattern=pattern, 3302 define=define, 3303 alias=self._parse_table_alias(), 3304 ) 3305 3306 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3307 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3308 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3309 cross_apply = False 3310 3311 if cross_apply is not None: 3312 this = self._parse_select(table=True) 3313 view = None 3314 outer = None 3315 elif self._match(TokenType.LATERAL): 3316 this = self._parse_select(table=True) 3317 view = self._match(TokenType.VIEW) 3318 outer = self._match(TokenType.OUTER) 3319 else: 3320 return None 3321 3322 if not this: 3323 this = ( 3324 self._parse_unnest() 3325 or self._parse_function() 3326 or self._parse_id_var(any_token=False) 3327 ) 3328 3329 while self._match(TokenType.DOT): 3330 this = exp.Dot( 3331 this=this, 3332 expression=self._parse_function() or self._parse_id_var(any_token=False), 3333 ) 3334 3335 if view: 3336 table = self._parse_id_var(any_token=False) 3337 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3338 table_alias: t.Optional[exp.TableAlias] = self.expression( 3339 exp.TableAlias, this=table, columns=columns 3340 ) 3341 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3342 # We move the alias from the lateral's child node to the lateral itself 3343 table_alias = this.args["alias"].pop() 3344 else: 3345 table_alias = self._parse_table_alias() 3346 3347 return self.expression( 3348 exp.Lateral, 3349 this=this, 3350 view=view, 3351 outer=outer, 3352 alias=table_alias, 3353 cross_apply=cross_apply, 3354 ) 3355 3356 def _parse_join_parts( 3357 self, 3358 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3359 return ( 3360 self._match_set(self.JOIN_METHODS) and self._prev, 3361 self._match_set(self.JOIN_SIDES) and self._prev, 3362 self._match_set(self.JOIN_KINDS) and self._prev, 3363 ) 3364 3365 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3366 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3367 this = self._parse_column() 3368 if isinstance(this, exp.Column): 3369 return this.this 3370 return this 3371 3372 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3373 3374 def _parse_join( 3375 self, skip_join_token: bool = False, parse_bracket: bool = False 3376 ) -> t.Optional[exp.Join]: 3377 if self._match(TokenType.COMMA): 3378 return self.expression(exp.Join, this=self._parse_table()) 3379 3380 index = self._index 3381 method, side, kind = self._parse_join_parts() 3382 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3383 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3384 3385 if not skip_join_token and not join: 3386 self._retreat(index) 3387 kind = None 3388 method = None 3389 side = None 3390 3391 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3392 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3393 3394 if not skip_join_token and not join and not outer_apply and not cross_apply: 3395 return None 3396 3397 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3398 3399 if method: 3400 kwargs["method"] = method.text 3401 if side: 3402 kwargs["side"] = side.text 3403 if kind: 3404 kwargs["kind"] = kind.text 3405 if hint: 3406 kwargs["hint"] = hint 3407 3408 if self._match(TokenType.MATCH_CONDITION): 3409 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3410 3411 if self._match(TokenType.ON): 3412 kwargs["on"] = self._parse_assignment() 3413 elif self._match(TokenType.USING): 3414 kwargs["using"] = self._parse_using_identifiers() 3415 elif ( 3416 not (outer_apply or cross_apply) 3417 and not isinstance(kwargs["this"], exp.Unnest) 3418 and not (kind and kind.token_type == TokenType.CROSS) 3419 ): 3420 index = self._index 3421 joins: t.Optional[list] = list(self._parse_joins()) 3422 3423 if joins and self._match(TokenType.ON): 3424 kwargs["on"] = self._parse_assignment() 3425 elif joins and self._match(TokenType.USING): 3426 kwargs["using"] = self._parse_using_identifiers() 3427 else: 3428 joins = None 3429 self._retreat(index) 3430 3431 kwargs["this"].set("joins", joins if joins else None) 3432 3433 comments = [c for token in (method, side, kind) if token for c in token.comments] 3434 return self.expression(exp.Join, comments=comments, **kwargs) 3435 3436 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3437 this = self._parse_assignment() 3438 3439 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3440 return this 3441 3442 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3443 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3444 3445 return this 3446 3447 def _parse_index_params(self) -> exp.IndexParameters: 3448 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3449 3450 if self._match(TokenType.L_PAREN, advance=False): 3451 columns = self._parse_wrapped_csv(self._parse_with_operator) 3452 else: 3453 columns = None 3454 3455 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3456 partition_by = self._parse_partition_by() 3457 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3458 tablespace = ( 3459 self._parse_var(any_token=True) 3460 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3461 else None 3462 ) 3463 where = self._parse_where() 3464 3465 on = self._parse_field() if self._match(TokenType.ON) else None 3466 3467 return self.expression( 3468 exp.IndexParameters, 3469 using=using, 3470 columns=columns, 3471 include=include, 3472 partition_by=partition_by, 3473 where=where, 3474 with_storage=with_storage, 3475 tablespace=tablespace, 3476 on=on, 3477 ) 3478 3479 def _parse_index( 3480 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3481 ) -> t.Optional[exp.Index]: 3482 if index or anonymous: 3483 unique = None 3484 primary = None 3485 amp = None 3486 3487 self._match(TokenType.ON) 3488 self._match(TokenType.TABLE) # hive 3489 table = self._parse_table_parts(schema=True) 3490 else: 3491 unique = self._match(TokenType.UNIQUE) 3492 primary = self._match_text_seq("PRIMARY") 3493 amp = self._match_text_seq("AMP") 3494 3495 if not self._match(TokenType.INDEX): 3496 return None 3497 3498 index = self._parse_id_var() 3499 table = None 3500 3501 params = self._parse_index_params() 3502 3503 return self.expression( 3504 exp.Index, 3505 this=index, 3506 table=table, 3507 unique=unique, 3508 primary=primary, 3509 amp=amp, 3510 params=params, 3511 ) 3512 3513 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3514 hints: t.List[exp.Expression] = [] 3515 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3516 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3517 hints.append( 3518 self.expression( 3519 exp.WithTableHint, 3520 expressions=self._parse_csv( 3521 lambda: self._parse_function() or self._parse_var(any_token=True) 3522 ), 3523 ) 3524 ) 3525 self._match_r_paren() 3526 else: 3527 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3528 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3529 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3530 3531 self._match_set((TokenType.INDEX, TokenType.KEY)) 3532 if self._match(TokenType.FOR): 3533 hint.set("target", self._advance_any() and self._prev.text.upper()) 3534 3535 hint.set("expressions", self._parse_wrapped_id_vars()) 3536 hints.append(hint) 3537 3538 return hints or None 3539 3540 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3541 return ( 3542 (not schema and self._parse_function(optional_parens=False)) 3543 or self._parse_id_var(any_token=False) 3544 or self._parse_string_as_identifier() 3545 or self._parse_placeholder() 3546 ) 3547 3548 def _parse_table_parts( 3549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3550 ) -> exp.Table: 3551 catalog = None 3552 db = None 3553 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3554 3555 while self._match(TokenType.DOT): 3556 if catalog: 3557 # This allows nesting the table in arbitrarily many dot expressions if needed 3558 table = self.expression( 3559 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3560 ) 3561 else: 3562 catalog = db 3563 db = table 3564 # "" used for tsql FROM a..b case 3565 table = self._parse_table_part(schema=schema) or "" 3566 3567 if ( 3568 wildcard 3569 and self._is_connected() 3570 and (isinstance(table, exp.Identifier) or not table) 3571 and self._match(TokenType.STAR) 3572 ): 3573 if isinstance(table, exp.Identifier): 3574 table.args["this"] += "*" 3575 else: 3576 table = exp.Identifier(this="*") 3577 3578 # We bubble up comments from the Identifier to the Table 3579 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3580 3581 if is_db_reference: 3582 catalog = db 3583 db = table 3584 table = None 3585 3586 if not table and not is_db_reference: 3587 self.raise_error(f"Expected table name but got {self._curr}") 3588 if not db and is_db_reference: 3589 self.raise_error(f"Expected database name but got {self._curr}") 3590 3591 table = self.expression( 3592 exp.Table, 3593 comments=comments, 3594 this=table, 3595 db=db, 3596 catalog=catalog, 3597 ) 3598 3599 changes = self._parse_changes() 3600 if changes: 3601 table.set("changes", changes) 3602 3603 at_before = self._parse_historical_data() 3604 if at_before: 3605 table.set("when", at_before) 3606 3607 pivots = self._parse_pivots() 3608 if pivots: 3609 table.set("pivots", pivots) 3610 3611 return table 3612 3613 def _parse_table( 3614 self, 3615 schema: bool = False, 3616 joins: bool = False, 3617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3618 parse_bracket: bool = False, 3619 is_db_reference: bool = False, 3620 parse_partition: bool = False, 3621 ) -> t.Optional[exp.Expression]: 3622 lateral = self._parse_lateral() 3623 if lateral: 3624 return lateral 3625 3626 unnest = self._parse_unnest() 3627 if unnest: 3628 return unnest 3629 3630 values = self._parse_derived_table_values() 3631 if values: 3632 return values 3633 3634 subquery = self._parse_select(table=True) 3635 if subquery: 3636 if not subquery.args.get("pivots"): 3637 subquery.set("pivots", self._parse_pivots()) 3638 return subquery 3639 3640 bracket = parse_bracket and self._parse_bracket(None) 3641 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3642 3643 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3644 self._parse_table 3645 ) 3646 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3647 3648 only = self._match(TokenType.ONLY) 3649 3650 this = t.cast( 3651 exp.Expression, 3652 bracket 3653 or rows_from 3654 or self._parse_bracket( 3655 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3656 ), 3657 ) 3658 3659 if only: 3660 this.set("only", only) 3661 3662 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3663 self._match_text_seq("*") 3664 3665 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3666 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3667 this.set("partition", self._parse_partition()) 3668 3669 if schema: 3670 return self._parse_schema(this=this) 3671 3672 version = self._parse_version() 3673 3674 if version: 3675 this.set("version", version) 3676 3677 if self.dialect.ALIAS_POST_TABLESAMPLE: 3678 this.set("sample", self._parse_table_sample()) 3679 3680 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3681 if alias: 3682 this.set("alias", alias) 3683 3684 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3685 return self.expression( 3686 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3687 ) 3688 3689 this.set("hints", self._parse_table_hints()) 3690 3691 if not this.args.get("pivots"): 3692 this.set("pivots", self._parse_pivots()) 3693 3694 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3695 this.set("sample", self._parse_table_sample()) 3696 3697 if joins: 3698 for join in self._parse_joins(): 3699 this.append("joins", join) 3700 3701 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3702 this.set("ordinality", True) 3703 this.set("alias", self._parse_table_alias()) 3704 3705 return this 3706 3707 def _parse_version(self) -> t.Optional[exp.Version]: 3708 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3709 this = "TIMESTAMP" 3710 elif self._match(TokenType.VERSION_SNAPSHOT): 3711 this = "VERSION" 3712 else: 3713 return None 3714 3715 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3716 kind = self._prev.text.upper() 3717 start = self._parse_bitwise() 3718 self._match_texts(("TO", "AND")) 3719 end = self._parse_bitwise() 3720 expression: t.Optional[exp.Expression] = self.expression( 3721 exp.Tuple, expressions=[start, end] 3722 ) 3723 elif self._match_text_seq("CONTAINED", "IN"): 3724 kind = "CONTAINED IN" 3725 expression = self.expression( 3726 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3727 ) 3728 elif self._match(TokenType.ALL): 3729 kind = "ALL" 3730 expression = None 3731 else: 3732 self._match_text_seq("AS", "OF") 3733 kind = "AS OF" 3734 expression = self._parse_type() 3735 3736 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3737 3738 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3739 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3740 index = self._index 3741 historical_data = None 3742 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3743 this = self._prev.text.upper() 3744 kind = ( 3745 self._match(TokenType.L_PAREN) 3746 and self._match_texts(self.HISTORICAL_DATA_KIND) 3747 and self._prev.text.upper() 3748 ) 3749 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3750 3751 if expression: 3752 self._match_r_paren() 3753 historical_data = self.expression( 3754 exp.HistoricalData, this=this, kind=kind, expression=expression 3755 ) 3756 else: 3757 self._retreat(index) 3758 3759 return historical_data 3760 3761 def _parse_changes(self) -> t.Optional[exp.Changes]: 3762 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3763 return None 3764 3765 information = self._parse_var(any_token=True) 3766 self._match_r_paren() 3767 3768 return self.expression( 3769 exp.Changes, 3770 information=information, 3771 at_before=self._parse_historical_data(), 3772 end=self._parse_historical_data(), 3773 ) 3774 3775 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3776 if not self._match(TokenType.UNNEST): 3777 return None 3778 3779 expressions = self._parse_wrapped_csv(self._parse_equality) 3780 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3781 3782 alias = self._parse_table_alias() if with_alias else None 3783 3784 if alias: 3785 if self.dialect.UNNEST_COLUMN_ONLY: 3786 if alias.args.get("columns"): 3787 self.raise_error("Unexpected extra column alias in unnest.") 3788 3789 alias.set("columns", [alias.this]) 3790 alias.set("this", None) 3791 3792 columns = alias.args.get("columns") or [] 3793 if offset and len(expressions) < len(columns): 3794 offset = columns.pop() 3795 3796 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3797 self._match(TokenType.ALIAS) 3798 offset = self._parse_id_var( 3799 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3800 ) or exp.to_identifier("offset") 3801 3802 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3803 3804 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3805 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3806 if not is_derived and not ( 3807 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3808 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3809 ): 3810 return None 3811 3812 expressions = self._parse_csv(self._parse_value) 3813 alias = self._parse_table_alias() 3814 3815 if is_derived: 3816 self._match_r_paren() 3817 3818 return self.expression( 3819 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3820 ) 3821 3822 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3823 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3824 as_modifier and self._match_text_seq("USING", "SAMPLE") 3825 ): 3826 return None 3827 3828 bucket_numerator = None 3829 bucket_denominator = None 3830 bucket_field = None 3831 percent = None 3832 size = None 3833 seed = None 3834 3835 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3836 matched_l_paren = self._match(TokenType.L_PAREN) 3837 3838 if self.TABLESAMPLE_CSV: 3839 num = None 3840 expressions = self._parse_csv(self._parse_primary) 3841 else: 3842 expressions = None 3843 num = ( 3844 self._parse_factor() 3845 if self._match(TokenType.NUMBER, advance=False) 3846 else self._parse_primary() or self._parse_placeholder() 3847 ) 3848 3849 if self._match_text_seq("BUCKET"): 3850 bucket_numerator = self._parse_number() 3851 self._match_text_seq("OUT", "OF") 3852 bucket_denominator = bucket_denominator = self._parse_number() 3853 self._match(TokenType.ON) 3854 bucket_field = self._parse_field() 3855 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3856 percent = num 3857 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3858 size = num 3859 else: 3860 percent = num 3861 3862 if matched_l_paren: 3863 self._match_r_paren() 3864 3865 if self._match(TokenType.L_PAREN): 3866 method = self._parse_var(upper=True) 3867 seed = self._match(TokenType.COMMA) and self._parse_number() 3868 self._match_r_paren() 3869 elif self._match_texts(("SEED", "REPEATABLE")): 3870 seed = self._parse_wrapped(self._parse_number) 3871 3872 if not method and self.DEFAULT_SAMPLING_METHOD: 3873 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3874 3875 return self.expression( 3876 exp.TableSample, 3877 expressions=expressions, 3878 method=method, 3879 bucket_numerator=bucket_numerator, 3880 bucket_denominator=bucket_denominator, 3881 bucket_field=bucket_field, 3882 percent=percent, 3883 size=size, 3884 seed=seed, 3885 ) 3886 3887 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3888 return list(iter(self._parse_pivot, None)) or None 3889 3890 def _parse_joins(self) -> t.Iterator[exp.Join]: 3891 return iter(self._parse_join, None) 3892 3893 # https://duckdb.org/docs/sql/statements/pivot 3894 def _parse_simplified_pivot(self) -> exp.Pivot: 3895 def _parse_on() -> t.Optional[exp.Expression]: 3896 this = self._parse_bitwise() 3897 return self._parse_in(this) if self._match(TokenType.IN) else this 3898 3899 this = self._parse_table() 3900 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3901 using = self._match(TokenType.USING) and self._parse_csv( 3902 lambda: self._parse_alias(self._parse_function()) 3903 ) 3904 group = self._parse_group() 3905 return self.expression( 3906 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3907 ) 3908 3909 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3910 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3911 this = self._parse_select_or_expression() 3912 3913 self._match(TokenType.ALIAS) 3914 alias = self._parse_bitwise() 3915 if alias: 3916 if isinstance(alias, exp.Column) and not alias.db: 3917 alias = alias.this 3918 return self.expression(exp.PivotAlias, this=this, alias=alias) 3919 3920 return this 3921 3922 value = self._parse_column() 3923 3924 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3925 self.raise_error("Expecting IN (") 3926 3927 if self._match(TokenType.ANY): 3928 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3929 else: 3930 exprs = self._parse_csv(_parse_aliased_expression) 3931 3932 self._match_r_paren() 3933 return self.expression(exp.In, this=value, expressions=exprs) 3934 3935 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3936 index = self._index 3937 include_nulls = None 3938 3939 if self._match(TokenType.PIVOT): 3940 unpivot = False 3941 elif self._match(TokenType.UNPIVOT): 3942 unpivot = True 3943 3944 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3945 if self._match_text_seq("INCLUDE", "NULLS"): 3946 include_nulls = True 3947 elif self._match_text_seq("EXCLUDE", "NULLS"): 3948 include_nulls = False 3949 else: 3950 return None 3951 3952 expressions = [] 3953 3954 if not self._match(TokenType.L_PAREN): 3955 self._retreat(index) 3956 return None 3957 3958 if unpivot: 3959 expressions = self._parse_csv(self._parse_column) 3960 else: 3961 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3962 3963 if not expressions: 3964 self.raise_error("Failed to parse PIVOT's aggregation list") 3965 3966 if not self._match(TokenType.FOR): 3967 self.raise_error("Expecting FOR") 3968 3969 field = self._parse_pivot_in() 3970 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3971 self._parse_bitwise 3972 ) 3973 3974 self._match_r_paren() 3975 3976 pivot = self.expression( 3977 exp.Pivot, 3978 expressions=expressions, 3979 field=field, 3980 unpivot=unpivot, 3981 include_nulls=include_nulls, 3982 default_on_null=default_on_null, 3983 ) 3984 3985 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3986 pivot.set("alias", self._parse_table_alias()) 3987 3988 if not unpivot: 3989 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3990 3991 columns: t.List[exp.Expression] = [] 3992 for fld in pivot.args["field"].expressions: 3993 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3994 for name in names: 3995 if self.PREFIXED_PIVOT_COLUMNS: 3996 name = f"{name}_{field_name}" if name else field_name 3997 else: 3998 name = f"{field_name}_{name}" if name else field_name 3999 4000 columns.append(exp.to_identifier(name)) 4001 4002 pivot.set("columns", columns) 4003 4004 return pivot 4005 4006 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4007 return [agg.alias for agg in aggregations] 4008 4009 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4010 if not skip_where_token and not self._match(TokenType.PREWHERE): 4011 return None 4012 4013 return self.expression( 4014 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4015 ) 4016 4017 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4018 if not skip_where_token and not self._match(TokenType.WHERE): 4019 return None 4020 4021 return self.expression( 4022 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4023 ) 4024 4025 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4026 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4027 return None 4028 4029 elements: t.Dict[str, t.Any] = defaultdict(list) 4030 4031 if self._match(TokenType.ALL): 4032 elements["all"] = True 4033 elif self._match(TokenType.DISTINCT): 4034 elements["all"] = False 4035 4036 while True: 4037 index = self._index 4038 4039 elements["expressions"].extend( 4040 self._parse_csv( 4041 lambda: None 4042 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4043 else self._parse_assignment() 4044 ) 4045 ) 4046 4047 before_with_index = self._index 4048 with_prefix = self._match(TokenType.WITH) 4049 4050 if self._match(TokenType.ROLLUP): 4051 elements["rollup"].append( 4052 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4053 ) 4054 elif self._match(TokenType.CUBE): 4055 elements["cube"].append( 4056 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4057 ) 4058 elif self._match(TokenType.GROUPING_SETS): 4059 elements["grouping_sets"].append( 4060 self.expression( 4061 exp.GroupingSets, 4062 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4063 ) 4064 ) 4065 elif self._match_text_seq("TOTALS"): 4066 elements["totals"] = True # type: ignore 4067 4068 if before_with_index <= self._index <= before_with_index + 1: 4069 self._retreat(before_with_index) 4070 break 4071 4072 if index == self._index: 4073 break 4074 4075 return self.expression(exp.Group, **elements) # type: ignore 4076 4077 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4078 return self.expression( 4079 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4080 ) 4081 4082 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4083 if self._match(TokenType.L_PAREN): 4084 grouping_set = self._parse_csv(self._parse_column) 4085 self._match_r_paren() 4086 return self.expression(exp.Tuple, expressions=grouping_set) 4087 4088 return self._parse_column() 4089 4090 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4091 if not skip_having_token and not self._match(TokenType.HAVING): 4092 return None 4093 return self.expression(exp.Having, this=self._parse_assignment()) 4094 4095 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4096 if not self._match(TokenType.QUALIFY): 4097 return None 4098 return self.expression(exp.Qualify, this=self._parse_assignment()) 4099 4100 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4101 if skip_start_token: 4102 start = None 4103 elif self._match(TokenType.START_WITH): 4104 start = self._parse_assignment() 4105 else: 4106 return None 4107 4108 self._match(TokenType.CONNECT_BY) 4109 nocycle = self._match_text_seq("NOCYCLE") 4110 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4111 exp.Prior, this=self._parse_bitwise() 4112 ) 4113 connect = self._parse_assignment() 4114 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4115 4116 if not start and self._match(TokenType.START_WITH): 4117 start = self._parse_assignment() 4118 4119 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4120 4121 def _parse_name_as_expression(self) -> exp.Alias: 4122 return self.expression( 4123 exp.Alias, 4124 alias=self._parse_id_var(any_token=True), 4125 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4126 ) 4127 4128 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4129 if self._match_text_seq("INTERPOLATE"): 4130 return self._parse_wrapped_csv(self._parse_name_as_expression) 4131 return None 4132 4133 def _parse_order( 4134 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4135 ) -> t.Optional[exp.Expression]: 4136 siblings = None 4137 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4138 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4139 return this 4140 4141 siblings = True 4142 4143 return self.expression( 4144 exp.Order, 4145 this=this, 4146 expressions=self._parse_csv(self._parse_ordered), 4147 siblings=siblings, 4148 ) 4149 4150 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4151 if not self._match(token): 4152 return None 4153 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4154 4155 def _parse_ordered( 4156 self, parse_method: t.Optional[t.Callable] = None 4157 ) -> t.Optional[exp.Ordered]: 4158 this = parse_method() if parse_method else self._parse_assignment() 4159 if not this: 4160 return None 4161 4162 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4163 this = exp.var("ALL") 4164 4165 asc = self._match(TokenType.ASC) 4166 desc = self._match(TokenType.DESC) or (asc and False) 4167 4168 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4169 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4170 4171 nulls_first = is_nulls_first or False 4172 explicitly_null_ordered = is_nulls_first or is_nulls_last 4173 4174 if ( 4175 not explicitly_null_ordered 4176 and ( 4177 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4178 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4179 ) 4180 and self.dialect.NULL_ORDERING != "nulls_are_last" 4181 ): 4182 nulls_first = True 4183 4184 if self._match_text_seq("WITH", "FILL"): 4185 with_fill = self.expression( 4186 exp.WithFill, 4187 **{ # type: ignore 4188 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4189 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4190 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4191 "interpolate": self._parse_interpolate(), 4192 }, 4193 ) 4194 else: 4195 with_fill = None 4196 4197 return self.expression( 4198 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4199 ) 4200 4201 def _parse_limit( 4202 self, 4203 this: t.Optional[exp.Expression] = None, 4204 top: bool = False, 4205 skip_limit_token: bool = False, 4206 ) -> t.Optional[exp.Expression]: 4207 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4208 comments = self._prev_comments 4209 if top: 4210 limit_paren = self._match(TokenType.L_PAREN) 4211 expression = self._parse_term() if limit_paren else self._parse_number() 4212 4213 if limit_paren: 4214 self._match_r_paren() 4215 else: 4216 expression = self._parse_term() 4217 4218 if self._match(TokenType.COMMA): 4219 offset = expression 4220 expression = self._parse_term() 4221 else: 4222 offset = None 4223 4224 limit_exp = self.expression( 4225 exp.Limit, 4226 this=this, 4227 expression=expression, 4228 offset=offset, 4229 comments=comments, 4230 expressions=self._parse_limit_by(), 4231 ) 4232 4233 return limit_exp 4234 4235 if self._match(TokenType.FETCH): 4236 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4237 direction = self._prev.text.upper() if direction else "FIRST" 4238 4239 count = self._parse_field(tokens=self.FETCH_TOKENS) 4240 percent = self._match(TokenType.PERCENT) 4241 4242 self._match_set((TokenType.ROW, TokenType.ROWS)) 4243 4244 only = self._match_text_seq("ONLY") 4245 with_ties = self._match_text_seq("WITH", "TIES") 4246 4247 if only and with_ties: 4248 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4249 4250 return self.expression( 4251 exp.Fetch, 4252 direction=direction, 4253 count=count, 4254 percent=percent, 4255 with_ties=with_ties, 4256 ) 4257 4258 return this 4259 4260 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4261 if not self._match(TokenType.OFFSET): 4262 return this 4263 4264 count = self._parse_term() 4265 self._match_set((TokenType.ROW, TokenType.ROWS)) 4266 4267 return self.expression( 4268 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4269 ) 4270 4271 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4272 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4273 4274 def _parse_locks(self) -> t.List[exp.Lock]: 4275 locks = [] 4276 while True: 4277 if self._match_text_seq("FOR", "UPDATE"): 4278 update = True 4279 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4280 "LOCK", "IN", "SHARE", "MODE" 4281 ): 4282 update = False 4283 else: 4284 break 4285 4286 expressions = None 4287 if self._match_text_seq("OF"): 4288 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4289 4290 wait: t.Optional[bool | exp.Expression] = None 4291 if self._match_text_seq("NOWAIT"): 4292 wait = True 4293 elif self._match_text_seq("WAIT"): 4294 wait = self._parse_primary() 4295 elif self._match_text_seq("SKIP", "LOCKED"): 4296 wait = False 4297 4298 locks.append( 4299 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4300 ) 4301 4302 return locks 4303 4304 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4305 while this and self._match_set(self.SET_OPERATIONS): 4306 token_type = self._prev.token_type 4307 4308 if token_type == TokenType.UNION: 4309 operation: t.Type[exp.SetOperation] = exp.Union 4310 elif token_type == TokenType.EXCEPT: 4311 operation = exp.Except 4312 else: 4313 operation = exp.Intersect 4314 4315 comments = self._prev.comments 4316 4317 if self._match(TokenType.DISTINCT): 4318 distinct: t.Optional[bool] = True 4319 elif self._match(TokenType.ALL): 4320 distinct = False 4321 else: 4322 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4323 if distinct is None: 4324 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4325 4326 by_name = self._match_text_seq("BY", "NAME") 4327 expression = self._parse_select(nested=True, parse_set_operation=False) 4328 4329 this = self.expression( 4330 operation, 4331 comments=comments, 4332 this=this, 4333 distinct=distinct, 4334 by_name=by_name, 4335 expression=expression, 4336 ) 4337 4338 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4339 expression = this.expression 4340 4341 if expression: 4342 for arg in self.SET_OP_MODIFIERS: 4343 expr = expression.args.get(arg) 4344 if expr: 4345 this.set(arg, expr.pop()) 4346 4347 return this 4348 4349 def _parse_expression(self) -> t.Optional[exp.Expression]: 4350 return self._parse_alias(self._parse_assignment()) 4351 4352 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4353 this = self._parse_disjunction() 4354 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4355 # This allows us to parse <non-identifier token> := <expr> 4356 this = exp.column( 4357 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4358 ) 4359 4360 while self._match_set(self.ASSIGNMENT): 4361 if isinstance(this, exp.Column) and len(this.parts) == 1: 4362 this = this.this 4363 4364 this = self.expression( 4365 self.ASSIGNMENT[self._prev.token_type], 4366 this=this, 4367 comments=self._prev_comments, 4368 expression=self._parse_assignment(), 4369 ) 4370 4371 return this 4372 4373 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4374 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4375 4376 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4377 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4378 4379 def _parse_equality(self) -> t.Optional[exp.Expression]: 4380 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4381 4382 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4383 return self._parse_tokens(self._parse_range, self.COMPARISON) 4384 4385 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4386 this = this or self._parse_bitwise() 4387 negate = self._match(TokenType.NOT) 4388 4389 if self._match_set(self.RANGE_PARSERS): 4390 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4391 if not expression: 4392 return this 4393 4394 this = expression 4395 elif self._match(TokenType.ISNULL): 4396 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4397 4398 # Postgres supports ISNULL and NOTNULL for conditions. 4399 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4400 if self._match(TokenType.NOTNULL): 4401 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4402 this = self.expression(exp.Not, this=this) 4403 4404 if negate: 4405 this = self._negate_range(this) 4406 4407 if self._match(TokenType.IS): 4408 this = self._parse_is(this) 4409 4410 return this 4411 4412 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4413 if not this: 4414 return this 4415 4416 return self.expression(exp.Not, this=this) 4417 4418 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4419 index = self._index - 1 4420 negate = self._match(TokenType.NOT) 4421 4422 if self._match_text_seq("DISTINCT", "FROM"): 4423 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4424 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4425 4426 if self._match(TokenType.JSON): 4427 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4428 4429 if self._match_text_seq("WITH"): 4430 _with = True 4431 elif self._match_text_seq("WITHOUT"): 4432 _with = False 4433 else: 4434 _with = None 4435 4436 unique = self._match(TokenType.UNIQUE) 4437 self._match_text_seq("KEYS") 4438 expression: t.Optional[exp.Expression] = self.expression( 4439 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4440 ) 4441 else: 4442 expression = self._parse_primary() or self._parse_null() 4443 if not expression: 4444 self._retreat(index) 4445 return None 4446 4447 this = self.expression(exp.Is, this=this, expression=expression) 4448 return self.expression(exp.Not, this=this) if negate else this 4449 4450 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4451 unnest = self._parse_unnest(with_alias=False) 4452 if unnest: 4453 this = self.expression(exp.In, this=this, unnest=unnest) 4454 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4455 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4456 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4457 4458 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4459 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4460 else: 4461 this = self.expression(exp.In, this=this, expressions=expressions) 4462 4463 if matched_l_paren: 4464 self._match_r_paren(this) 4465 elif not self._match(TokenType.R_BRACKET, expression=this): 4466 self.raise_error("Expecting ]") 4467 else: 4468 this = self.expression(exp.In, this=this, field=self._parse_field()) 4469 4470 return this 4471 4472 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4473 low = self._parse_bitwise() 4474 self._match(TokenType.AND) 4475 high = self._parse_bitwise() 4476 return self.expression(exp.Between, this=this, low=low, high=high) 4477 4478 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4479 if not self._match(TokenType.ESCAPE): 4480 return this 4481 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4482 4483 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4484 index = self._index 4485 4486 if not self._match(TokenType.INTERVAL) and match_interval: 4487 return None 4488 4489 if self._match(TokenType.STRING, advance=False): 4490 this = self._parse_primary() 4491 else: 4492 this = self._parse_term() 4493 4494 if not this or ( 4495 isinstance(this, exp.Column) 4496 and not this.table 4497 and not this.this.quoted 4498 and this.name.upper() == "IS" 4499 ): 4500 self._retreat(index) 4501 return None 4502 4503 unit = self._parse_function() or ( 4504 not self._match(TokenType.ALIAS, advance=False) 4505 and self._parse_var(any_token=True, upper=True) 4506 ) 4507 4508 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4509 # each INTERVAL expression into this canonical form so it's easy to transpile 4510 if this and this.is_number: 4511 this = exp.Literal.string(this.to_py()) 4512 elif this and this.is_string: 4513 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4514 if len(parts) == 1: 4515 if unit: 4516 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4517 self._retreat(self._index - 1) 4518 4519 this = exp.Literal.string(parts[0][0]) 4520 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4521 4522 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4523 unit = self.expression( 4524 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4525 ) 4526 4527 interval = self.expression(exp.Interval, this=this, unit=unit) 4528 4529 index = self._index 4530 self._match(TokenType.PLUS) 4531 4532 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4533 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4534 return self.expression( 4535 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4536 ) 4537 4538 self._retreat(index) 4539 return interval 4540 4541 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4542 this = self._parse_term() 4543 4544 while True: 4545 if self._match_set(self.BITWISE): 4546 this = self.expression( 4547 self.BITWISE[self._prev.token_type], 4548 this=this, 4549 expression=self._parse_term(), 4550 ) 4551 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4552 this = self.expression( 4553 exp.DPipe, 4554 this=this, 4555 expression=self._parse_term(), 4556 safe=not self.dialect.STRICT_STRING_CONCAT, 4557 ) 4558 elif self._match(TokenType.DQMARK): 4559 this = self.expression( 4560 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4561 ) 4562 elif self._match_pair(TokenType.LT, TokenType.LT): 4563 this = self.expression( 4564 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4565 ) 4566 elif self._match_pair(TokenType.GT, TokenType.GT): 4567 this = self.expression( 4568 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4569 ) 4570 else: 4571 break 4572 4573 return this 4574 4575 def _parse_term(self) -> t.Optional[exp.Expression]: 4576 this = self._parse_factor() 4577 4578 while self._match_set(self.TERM): 4579 klass = self.TERM[self._prev.token_type] 4580 comments = self._prev_comments 4581 expression = self._parse_factor() 4582 4583 this = self.expression(klass, this=this, comments=comments, expression=expression) 4584 4585 if isinstance(this, exp.Collate): 4586 expr = this.expression 4587 4588 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4589 # fallback to Identifier / Var 4590 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4591 ident = expr.this 4592 if isinstance(ident, exp.Identifier): 4593 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4594 4595 return this 4596 4597 def _parse_factor(self) -> t.Optional[exp.Expression]: 4598 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4599 this = parse_method() 4600 4601 while self._match_set(self.FACTOR): 4602 klass = self.FACTOR[self._prev.token_type] 4603 comments = self._prev_comments 4604 expression = parse_method() 4605 4606 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4607 self._retreat(self._index - 1) 4608 return this 4609 4610 this = self.expression(klass, this=this, comments=comments, expression=expression) 4611 4612 if isinstance(this, exp.Div): 4613 this.args["typed"] = self.dialect.TYPED_DIVISION 4614 this.args["safe"] = self.dialect.SAFE_DIVISION 4615 4616 return this 4617 4618 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4619 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4620 4621 def _parse_unary(self) -> t.Optional[exp.Expression]: 4622 if self._match_set(self.UNARY_PARSERS): 4623 return self.UNARY_PARSERS[self._prev.token_type](self) 4624 return self._parse_at_time_zone(self._parse_type()) 4625 4626 def _parse_type( 4627 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4628 ) -> t.Optional[exp.Expression]: 4629 interval = parse_interval and self._parse_interval() 4630 if interval: 4631 return interval 4632 4633 index = self._index 4634 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4635 4636 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4637 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4638 if isinstance(data_type, exp.Cast): 4639 # This constructor can contain ops directly after it, for instance struct unnesting: 4640 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4641 return self._parse_column_ops(data_type) 4642 4643 if data_type: 4644 index2 = self._index 4645 this = self._parse_primary() 4646 4647 if isinstance(this, exp.Literal): 4648 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4649 if parser: 4650 return parser(self, this, data_type) 4651 4652 return self.expression(exp.Cast, this=this, to=data_type) 4653 4654 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4655 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4656 # 4657 # If the index difference here is greater than 1, that means the parser itself must have 4658 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4659 # 4660 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4661 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4662 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4663 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4664 # 4665 # In these cases, we don't really want to return the converted type, but instead retreat 4666 # and try to parse a Column or Identifier in the section below. 4667 if data_type.expressions and index2 - index > 1: 4668 self._retreat(index2) 4669 return self._parse_column_ops(data_type) 4670 4671 self._retreat(index) 4672 4673 if fallback_to_identifier: 4674 return self._parse_id_var() 4675 4676 this = self._parse_column() 4677 return this and self._parse_column_ops(this) 4678 4679 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4680 this = self._parse_type() 4681 if not this: 4682 return None 4683 4684 if isinstance(this, exp.Column) and not this.table: 4685 this = exp.var(this.name.upper()) 4686 4687 return self.expression( 4688 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4689 ) 4690 4691 def _parse_types( 4692 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4693 ) -> t.Optional[exp.Expression]: 4694 index = self._index 4695 4696 this: t.Optional[exp.Expression] = None 4697 prefix = self._match_text_seq("SYSUDTLIB", ".") 4698 4699 if not self._match_set(self.TYPE_TOKENS): 4700 identifier = allow_identifiers and self._parse_id_var( 4701 any_token=False, tokens=(TokenType.VAR,) 4702 ) 4703 if isinstance(identifier, exp.Identifier): 4704 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4705 4706 if len(tokens) != 1: 4707 self.raise_error("Unexpected identifier", self._prev) 4708 4709 if tokens[0].token_type in self.TYPE_TOKENS: 4710 self._prev = tokens[0] 4711 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4712 type_name = identifier.name 4713 4714 while self._match(TokenType.DOT): 4715 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4716 4717 this = exp.DataType.build(type_name, udt=True) 4718 else: 4719 self._retreat(self._index - 1) 4720 return None 4721 else: 4722 return None 4723 4724 type_token = self._prev.token_type 4725 4726 if type_token == TokenType.PSEUDO_TYPE: 4727 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4728 4729 if type_token == TokenType.OBJECT_IDENTIFIER: 4730 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4731 4732 # https://materialize.com/docs/sql/types/map/ 4733 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4734 key_type = self._parse_types( 4735 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4736 ) 4737 if not self._match(TokenType.FARROW): 4738 self._retreat(index) 4739 return None 4740 4741 value_type = self._parse_types( 4742 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4743 ) 4744 if not self._match(TokenType.R_BRACKET): 4745 self._retreat(index) 4746 return None 4747 4748 return exp.DataType( 4749 this=exp.DataType.Type.MAP, 4750 expressions=[key_type, value_type], 4751 nested=True, 4752 prefix=prefix, 4753 ) 4754 4755 nested = type_token in self.NESTED_TYPE_TOKENS 4756 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4757 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4758 expressions = None 4759 maybe_func = False 4760 4761 if self._match(TokenType.L_PAREN): 4762 if is_struct: 4763 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4764 elif nested: 4765 expressions = self._parse_csv( 4766 lambda: self._parse_types( 4767 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4768 ) 4769 ) 4770 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4771 this = expressions[0] 4772 this.set("nullable", True) 4773 self._match_r_paren() 4774 return this 4775 elif type_token in self.ENUM_TYPE_TOKENS: 4776 expressions = self._parse_csv(self._parse_equality) 4777 elif is_aggregate: 4778 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4779 any_token=False, tokens=(TokenType.VAR,) 4780 ) 4781 if not func_or_ident or not self._match(TokenType.COMMA): 4782 return None 4783 expressions = self._parse_csv( 4784 lambda: self._parse_types( 4785 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4786 ) 4787 ) 4788 expressions.insert(0, func_or_ident) 4789 else: 4790 expressions = self._parse_csv(self._parse_type_size) 4791 4792 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4793 if type_token == TokenType.VECTOR and len(expressions) == 2: 4794 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4795 4796 if not expressions or not self._match(TokenType.R_PAREN): 4797 self._retreat(index) 4798 return None 4799 4800 maybe_func = True 4801 4802 values: t.Optional[t.List[exp.Expression]] = None 4803 4804 if nested and self._match(TokenType.LT): 4805 if is_struct: 4806 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4807 else: 4808 expressions = self._parse_csv( 4809 lambda: self._parse_types( 4810 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4811 ) 4812 ) 4813 4814 if not self._match(TokenType.GT): 4815 self.raise_error("Expecting >") 4816 4817 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4818 values = self._parse_csv(self._parse_assignment) 4819 if not values and is_struct: 4820 values = None 4821 self._retreat(self._index - 1) 4822 else: 4823 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4824 4825 if type_token in self.TIMESTAMPS: 4826 if self._match_text_seq("WITH", "TIME", "ZONE"): 4827 maybe_func = False 4828 tz_type = ( 4829 exp.DataType.Type.TIMETZ 4830 if type_token in self.TIMES 4831 else exp.DataType.Type.TIMESTAMPTZ 4832 ) 4833 this = exp.DataType(this=tz_type, expressions=expressions) 4834 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4835 maybe_func = False 4836 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4837 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4838 maybe_func = False 4839 elif type_token == TokenType.INTERVAL: 4840 unit = self._parse_var(upper=True) 4841 if unit: 4842 if self._match_text_seq("TO"): 4843 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4844 4845 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4846 else: 4847 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4848 4849 if maybe_func and check_func: 4850 index2 = self._index 4851 peek = self._parse_string() 4852 4853 if not peek: 4854 self._retreat(index) 4855 return None 4856 4857 self._retreat(index2) 4858 4859 if not this: 4860 if self._match_text_seq("UNSIGNED"): 4861 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4862 if not unsigned_type_token: 4863 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4864 4865 type_token = unsigned_type_token or type_token 4866 4867 this = exp.DataType( 4868 this=exp.DataType.Type[type_token.value], 4869 expressions=expressions, 4870 nested=nested, 4871 prefix=prefix, 4872 ) 4873 4874 # Empty arrays/structs are allowed 4875 if values is not None: 4876 cls = exp.Struct if is_struct else exp.Array 4877 this = exp.cast(cls(expressions=values), this, copy=False) 4878 4879 elif expressions: 4880 this.set("expressions", expressions) 4881 4882 # https://materialize.com/docs/sql/types/list/#type-name 4883 while self._match(TokenType.LIST): 4884 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4885 4886 index = self._index 4887 4888 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4889 matched_array = self._match(TokenType.ARRAY) 4890 4891 while self._curr: 4892 datatype_token = self._prev.token_type 4893 matched_l_bracket = self._match(TokenType.L_BRACKET) 4894 if not matched_l_bracket and not matched_array: 4895 break 4896 4897 matched_array = False 4898 values = self._parse_csv(self._parse_assignment) or None 4899 if ( 4900 values 4901 and not schema 4902 and ( 4903 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4904 ) 4905 ): 4906 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4907 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4908 self._retreat(index) 4909 break 4910 4911 this = exp.DataType( 4912 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4913 ) 4914 self._match(TokenType.R_BRACKET) 4915 4916 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4917 converter = self.TYPE_CONVERTERS.get(this.this) 4918 if converter: 4919 this = converter(t.cast(exp.DataType, this)) 4920 4921 return this 4922 4923 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4924 index = self._index 4925 4926 if ( 4927 self._curr 4928 and self._next 4929 and self._curr.token_type in self.TYPE_TOKENS 4930 and self._next.token_type in self.TYPE_TOKENS 4931 ): 4932 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4933 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4934 this = self._parse_id_var() 4935 else: 4936 this = ( 4937 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4938 or self._parse_id_var() 4939 ) 4940 4941 self._match(TokenType.COLON) 4942 4943 if ( 4944 type_required 4945 and not isinstance(this, exp.DataType) 4946 and not self._match_set(self.TYPE_TOKENS, advance=False) 4947 ): 4948 self._retreat(index) 4949 return self._parse_types() 4950 4951 return self._parse_column_def(this) 4952 4953 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4954 if not self._match_text_seq("AT", "TIME", "ZONE"): 4955 return this 4956 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4957 4958 def _parse_column(self) -> t.Optional[exp.Expression]: 4959 this = self._parse_column_reference() 4960 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4961 4962 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4963 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4964 4965 return column 4966 4967 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4968 this = self._parse_field() 4969 if ( 4970 not this 4971 and self._match(TokenType.VALUES, advance=False) 4972 and self.VALUES_FOLLOWED_BY_PAREN 4973 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4974 ): 4975 this = self._parse_id_var() 4976 4977 if isinstance(this, exp.Identifier): 4978 # We bubble up comments from the Identifier to the Column 4979 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4980 4981 return this 4982 4983 def _parse_colon_as_variant_extract( 4984 self, this: t.Optional[exp.Expression] 4985 ) -> t.Optional[exp.Expression]: 4986 casts = [] 4987 json_path = [] 4988 escape = None 4989 4990 while self._match(TokenType.COLON): 4991 start_index = self._index 4992 4993 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4994 path = self._parse_column_ops( 4995 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4996 ) 4997 4998 # The cast :: operator has a lower precedence than the extraction operator :, so 4999 # we rearrange the AST appropriately to avoid casting the JSON path 5000 while isinstance(path, exp.Cast): 5001 casts.append(path.to) 5002 path = path.this 5003 5004 if casts: 5005 dcolon_offset = next( 5006 i 5007 for i, t in enumerate(self._tokens[start_index:]) 5008 if t.token_type == TokenType.DCOLON 5009 ) 5010 end_token = self._tokens[start_index + dcolon_offset - 1] 5011 else: 5012 end_token = self._prev 5013 5014 if path: 5015 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5016 # it'll roundtrip to a string literal in GET_PATH 5017 if isinstance(path, exp.Identifier) and path.quoted: 5018 escape = True 5019 5020 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5021 5022 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5023 # Databricks transforms it back to the colon/dot notation 5024 if json_path: 5025 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5026 5027 if json_path_expr: 5028 json_path_expr.set("escape", escape) 5029 5030 this = self.expression( 5031 exp.JSONExtract, 5032 this=this, 5033 expression=json_path_expr, 5034 variant_extract=True, 5035 ) 5036 5037 while casts: 5038 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5039 5040 return this 5041 5042 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5043 return self._parse_types() 5044 5045 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5046 this = self._parse_bracket(this) 5047 5048 while self._match_set(self.COLUMN_OPERATORS): 5049 op_token = self._prev.token_type 5050 op = self.COLUMN_OPERATORS.get(op_token) 5051 5052 if op_token == TokenType.DCOLON: 5053 field = self._parse_dcolon() 5054 if not field: 5055 self.raise_error("Expected type") 5056 elif op and self._curr: 5057 field = self._parse_column_reference() or self._parse_bracket() 5058 else: 5059 field = self._parse_field(any_token=True, anonymous_func=True) 5060 5061 if isinstance(field, exp.Func) and this: 5062 # bigquery allows function calls like x.y.count(...) 5063 # SAFE.SUBSTR(...) 5064 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5065 this = exp.replace_tree( 5066 this, 5067 lambda n: ( 5068 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5069 if n.table 5070 else n.this 5071 ) 5072 if isinstance(n, exp.Column) 5073 else n, 5074 ) 5075 5076 if op: 5077 this = op(self, this, field) 5078 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5079 this = self.expression( 5080 exp.Column, 5081 comments=this.comments, 5082 this=field, 5083 table=this.this, 5084 db=this.args.get("table"), 5085 catalog=this.args.get("db"), 5086 ) 5087 else: 5088 this = self.expression(exp.Dot, this=this, expression=field) 5089 5090 this = self._parse_bracket(this) 5091 5092 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5093 5094 def _parse_primary(self) -> t.Optional[exp.Expression]: 5095 if self._match_set(self.PRIMARY_PARSERS): 5096 token_type = self._prev.token_type 5097 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5098 5099 if token_type == TokenType.STRING: 5100 expressions = [primary] 5101 while self._match(TokenType.STRING): 5102 expressions.append(exp.Literal.string(self._prev.text)) 5103 5104 if len(expressions) > 1: 5105 return self.expression(exp.Concat, expressions=expressions) 5106 5107 return primary 5108 5109 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5110 return exp.Literal.number(f"0.{self._prev.text}") 5111 5112 if self._match(TokenType.L_PAREN): 5113 comments = self._prev_comments 5114 query = self._parse_select() 5115 5116 if query: 5117 expressions = [query] 5118 else: 5119 expressions = self._parse_expressions() 5120 5121 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5122 5123 if not this and self._match(TokenType.R_PAREN, advance=False): 5124 this = self.expression(exp.Tuple) 5125 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5126 this = self._parse_subquery(this=this, parse_alias=False) 5127 elif isinstance(this, exp.Subquery): 5128 this = self._parse_subquery( 5129 this=self._parse_set_operations(this), parse_alias=False 5130 ) 5131 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5132 this = self.expression(exp.Tuple, expressions=expressions) 5133 else: 5134 this = self.expression(exp.Paren, this=this) 5135 5136 if this: 5137 this.add_comments(comments) 5138 5139 self._match_r_paren(expression=this) 5140 return this 5141 5142 return None 5143 5144 def _parse_field( 5145 self, 5146 any_token: bool = False, 5147 tokens: t.Optional[t.Collection[TokenType]] = None, 5148 anonymous_func: bool = False, 5149 ) -> t.Optional[exp.Expression]: 5150 if anonymous_func: 5151 field = ( 5152 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5153 or self._parse_primary() 5154 ) 5155 else: 5156 field = self._parse_primary() or self._parse_function( 5157 anonymous=anonymous_func, any_token=any_token 5158 ) 5159 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5160 5161 def _parse_function( 5162 self, 5163 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5164 anonymous: bool = False, 5165 optional_parens: bool = True, 5166 any_token: bool = False, 5167 ) -> t.Optional[exp.Expression]: 5168 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5169 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5170 fn_syntax = False 5171 if ( 5172 self._match(TokenType.L_BRACE, advance=False) 5173 and self._next 5174 and self._next.text.upper() == "FN" 5175 ): 5176 self._advance(2) 5177 fn_syntax = True 5178 5179 func = self._parse_function_call( 5180 functions=functions, 5181 anonymous=anonymous, 5182 optional_parens=optional_parens, 5183 any_token=any_token, 5184 ) 5185 5186 if fn_syntax: 5187 self._match(TokenType.R_BRACE) 5188 5189 return func 5190 5191 def _parse_function_call( 5192 self, 5193 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5194 anonymous: bool = False, 5195 optional_parens: bool = True, 5196 any_token: bool = False, 5197 ) -> t.Optional[exp.Expression]: 5198 if not self._curr: 5199 return None 5200 5201 comments = self._curr.comments 5202 token_type = self._curr.token_type 5203 this = self._curr.text 5204 upper = this.upper() 5205 5206 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5207 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5208 self._advance() 5209 return self._parse_window(parser(self)) 5210 5211 if not self._next or self._next.token_type != TokenType.L_PAREN: 5212 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5213 self._advance() 5214 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5215 5216 return None 5217 5218 if any_token: 5219 if token_type in self.RESERVED_TOKENS: 5220 return None 5221 elif token_type not in self.FUNC_TOKENS: 5222 return None 5223 5224 self._advance(2) 5225 5226 parser = self.FUNCTION_PARSERS.get(upper) 5227 if parser and not anonymous: 5228 this = parser(self) 5229 else: 5230 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5231 5232 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5233 this = self.expression( 5234 subquery_predicate, comments=comments, this=self._parse_select() 5235 ) 5236 self._match_r_paren() 5237 return this 5238 5239 if functions is None: 5240 functions = self.FUNCTIONS 5241 5242 function = functions.get(upper) 5243 5244 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5245 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5246 5247 if alias: 5248 args = self._kv_to_prop_eq(args) 5249 5250 if function and not anonymous: 5251 if "dialect" in function.__code__.co_varnames: 5252 func = function(args, dialect=self.dialect) 5253 else: 5254 func = function(args) 5255 5256 func = self.validate_expression(func, args) 5257 if not self.dialect.NORMALIZE_FUNCTIONS: 5258 func.meta["name"] = this 5259 5260 this = func 5261 else: 5262 if token_type == TokenType.IDENTIFIER: 5263 this = exp.Identifier(this=this, quoted=True) 5264 this = self.expression(exp.Anonymous, this=this, expressions=args) 5265 5266 if isinstance(this, exp.Expression): 5267 this.add_comments(comments) 5268 5269 self._match_r_paren(this) 5270 return self._parse_window(this) 5271 5272 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5273 return expression 5274 5275 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5276 transformed = [] 5277 5278 for index, e in enumerate(expressions): 5279 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5280 if isinstance(e, exp.Alias): 5281 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5282 5283 if not isinstance(e, exp.PropertyEQ): 5284 e = self.expression( 5285 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5286 ) 5287 5288 if isinstance(e.this, exp.Column): 5289 e.this.replace(e.this.this) 5290 else: 5291 e = self._to_prop_eq(e, index) 5292 5293 transformed.append(e) 5294 5295 return transformed 5296 5297 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5298 return self._parse_column_def(self._parse_id_var()) 5299 5300 def _parse_user_defined_function( 5301 self, kind: t.Optional[TokenType] = None 5302 ) -> t.Optional[exp.Expression]: 5303 this = self._parse_id_var() 5304 5305 while self._match(TokenType.DOT): 5306 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5307 5308 if not self._match(TokenType.L_PAREN): 5309 return this 5310 5311 expressions = self._parse_csv(self._parse_function_parameter) 5312 self._match_r_paren() 5313 return self.expression( 5314 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5315 ) 5316 5317 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5318 literal = self._parse_primary() 5319 if literal: 5320 return self.expression(exp.Introducer, this=token.text, expression=literal) 5321 5322 return self.expression(exp.Identifier, this=token.text) 5323 5324 def _parse_session_parameter(self) -> exp.SessionParameter: 5325 kind = None 5326 this = self._parse_id_var() or self._parse_primary() 5327 5328 if this and self._match(TokenType.DOT): 5329 kind = this.name 5330 this = self._parse_var() or self._parse_primary() 5331 5332 return self.expression(exp.SessionParameter, this=this, kind=kind) 5333 5334 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5335 return self._parse_id_var() 5336 5337 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5338 index = self._index 5339 5340 if self._match(TokenType.L_PAREN): 5341 expressions = t.cast( 5342 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5343 ) 5344 5345 if not self._match(TokenType.R_PAREN): 5346 self._retreat(index) 5347 else: 5348 expressions = [self._parse_lambda_arg()] 5349 5350 if self._match_set(self.LAMBDAS): 5351 return self.LAMBDAS[self._prev.token_type](self, expressions) 5352 5353 self._retreat(index) 5354 5355 this: t.Optional[exp.Expression] 5356 5357 if self._match(TokenType.DISTINCT): 5358 this = self.expression( 5359 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5360 ) 5361 else: 5362 this = self._parse_select_or_expression(alias=alias) 5363 5364 return self._parse_limit( 5365 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5366 ) 5367 5368 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5369 index = self._index 5370 if not self._match(TokenType.L_PAREN): 5371 return this 5372 5373 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5374 # expr can be of both types 5375 if self._match_set(self.SELECT_START_TOKENS): 5376 self._retreat(index) 5377 return this 5378 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5379 self._match_r_paren() 5380 return self.expression(exp.Schema, this=this, expressions=args) 5381 5382 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5383 return self._parse_column_def(self._parse_field(any_token=True)) 5384 5385 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5386 # column defs are not really columns, they're identifiers 5387 if isinstance(this, exp.Column): 5388 this = this.this 5389 5390 kind = self._parse_types(schema=True) 5391 5392 if self._match_text_seq("FOR", "ORDINALITY"): 5393 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5394 5395 constraints: t.List[exp.Expression] = [] 5396 5397 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5398 ("ALIAS", "MATERIALIZED") 5399 ): 5400 persisted = self._prev.text.upper() == "MATERIALIZED" 5401 constraint_kind = exp.ComputedColumnConstraint( 5402 this=self._parse_assignment(), 5403 persisted=persisted or self._match_text_seq("PERSISTED"), 5404 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5405 ) 5406 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5407 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5408 self._match(TokenType.ALIAS) 5409 constraints.append( 5410 self.expression( 5411 exp.ColumnConstraint, 5412 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5413 ) 5414 ) 5415 5416 while True: 5417 constraint = self._parse_column_constraint() 5418 if not constraint: 5419 break 5420 constraints.append(constraint) 5421 5422 if not kind and not constraints: 5423 return this 5424 5425 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5426 5427 def _parse_auto_increment( 5428 self, 5429 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5430 start = None 5431 increment = None 5432 5433 if self._match(TokenType.L_PAREN, advance=False): 5434 args = self._parse_wrapped_csv(self._parse_bitwise) 5435 start = seq_get(args, 0) 5436 increment = seq_get(args, 1) 5437 elif self._match_text_seq("START"): 5438 start = self._parse_bitwise() 5439 self._match_text_seq("INCREMENT") 5440 increment = self._parse_bitwise() 5441 5442 if start and increment: 5443 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5444 5445 return exp.AutoIncrementColumnConstraint() 5446 5447 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5448 if not self._match_text_seq("REFRESH"): 5449 self._retreat(self._index - 1) 5450 return None 5451 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5452 5453 def _parse_compress(self) -> exp.CompressColumnConstraint: 5454 if self._match(TokenType.L_PAREN, advance=False): 5455 return self.expression( 5456 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5457 ) 5458 5459 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5460 5461 def _parse_generated_as_identity( 5462 self, 5463 ) -> ( 5464 exp.GeneratedAsIdentityColumnConstraint 5465 | exp.ComputedColumnConstraint 5466 | exp.GeneratedAsRowColumnConstraint 5467 ): 5468 if self._match_text_seq("BY", "DEFAULT"): 5469 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5470 this = self.expression( 5471 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5472 ) 5473 else: 5474 self._match_text_seq("ALWAYS") 5475 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5476 5477 self._match(TokenType.ALIAS) 5478 5479 if self._match_text_seq("ROW"): 5480 start = self._match_text_seq("START") 5481 if not start: 5482 self._match(TokenType.END) 5483 hidden = self._match_text_seq("HIDDEN") 5484 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5485 5486 identity = self._match_text_seq("IDENTITY") 5487 5488 if self._match(TokenType.L_PAREN): 5489 if self._match(TokenType.START_WITH): 5490 this.set("start", self._parse_bitwise()) 5491 if self._match_text_seq("INCREMENT", "BY"): 5492 this.set("increment", self._parse_bitwise()) 5493 if self._match_text_seq("MINVALUE"): 5494 this.set("minvalue", self._parse_bitwise()) 5495 if self._match_text_seq("MAXVALUE"): 5496 this.set("maxvalue", self._parse_bitwise()) 5497 5498 if self._match_text_seq("CYCLE"): 5499 this.set("cycle", True) 5500 elif self._match_text_seq("NO", "CYCLE"): 5501 this.set("cycle", False) 5502 5503 if not identity: 5504 this.set("expression", self._parse_range()) 5505 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5506 args = self._parse_csv(self._parse_bitwise) 5507 this.set("start", seq_get(args, 0)) 5508 this.set("increment", seq_get(args, 1)) 5509 5510 self._match_r_paren() 5511 5512 return this 5513 5514 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5515 self._match_text_seq("LENGTH") 5516 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5517 5518 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5519 if self._match_text_seq("NULL"): 5520 return self.expression(exp.NotNullColumnConstraint) 5521 if self._match_text_seq("CASESPECIFIC"): 5522 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5523 if self._match_text_seq("FOR", "REPLICATION"): 5524 return self.expression(exp.NotForReplicationColumnConstraint) 5525 5526 # Unconsume the `NOT` token 5527 self._retreat(self._index - 1) 5528 return None 5529 5530 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5531 if self._match(TokenType.CONSTRAINT): 5532 this = self._parse_id_var() 5533 else: 5534 this = None 5535 5536 if self._match_texts(self.CONSTRAINT_PARSERS): 5537 return self.expression( 5538 exp.ColumnConstraint, 5539 this=this, 5540 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5541 ) 5542 5543 return this 5544 5545 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5546 if not self._match(TokenType.CONSTRAINT): 5547 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5548 5549 return self.expression( 5550 exp.Constraint, 5551 this=self._parse_id_var(), 5552 expressions=self._parse_unnamed_constraints(), 5553 ) 5554 5555 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5556 constraints = [] 5557 while True: 5558 constraint = self._parse_unnamed_constraint() or self._parse_function() 5559 if not constraint: 5560 break 5561 constraints.append(constraint) 5562 5563 return constraints 5564 5565 def _parse_unnamed_constraint( 5566 self, constraints: t.Optional[t.Collection[str]] = None 5567 ) -> t.Optional[exp.Expression]: 5568 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5569 constraints or self.CONSTRAINT_PARSERS 5570 ): 5571 return None 5572 5573 constraint = self._prev.text.upper() 5574 if constraint not in self.CONSTRAINT_PARSERS: 5575 self.raise_error(f"No parser found for schema constraint {constraint}.") 5576 5577 return self.CONSTRAINT_PARSERS[constraint](self) 5578 5579 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5580 return self._parse_id_var(any_token=False) 5581 5582 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5583 self._match_text_seq("KEY") 5584 return self.expression( 5585 exp.UniqueColumnConstraint, 5586 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5587 this=self._parse_schema(self._parse_unique_key()), 5588 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5589 on_conflict=self._parse_on_conflict(), 5590 ) 5591 5592 def _parse_key_constraint_options(self) -> t.List[str]: 5593 options = [] 5594 while True: 5595 if not self._curr: 5596 break 5597 5598 if self._match(TokenType.ON): 5599 action = None 5600 on = self._advance_any() and self._prev.text 5601 5602 if self._match_text_seq("NO", "ACTION"): 5603 action = "NO ACTION" 5604 elif self._match_text_seq("CASCADE"): 5605 action = "CASCADE" 5606 elif self._match_text_seq("RESTRICT"): 5607 action = "RESTRICT" 5608 elif self._match_pair(TokenType.SET, TokenType.NULL): 5609 action = "SET NULL" 5610 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5611 action = "SET DEFAULT" 5612 else: 5613 self.raise_error("Invalid key constraint") 5614 5615 options.append(f"ON {on} {action}") 5616 else: 5617 var = self._parse_var_from_options( 5618 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5619 ) 5620 if not var: 5621 break 5622 options.append(var.name) 5623 5624 return options 5625 5626 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5627 if match and not self._match(TokenType.REFERENCES): 5628 return None 5629 5630 expressions = None 5631 this = self._parse_table(schema=True) 5632 options = self._parse_key_constraint_options() 5633 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5634 5635 def _parse_foreign_key(self) -> exp.ForeignKey: 5636 expressions = self._parse_wrapped_id_vars() 5637 reference = self._parse_references() 5638 options = {} 5639 5640 while self._match(TokenType.ON): 5641 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5642 self.raise_error("Expected DELETE or UPDATE") 5643 5644 kind = self._prev.text.lower() 5645 5646 if self._match_text_seq("NO", "ACTION"): 5647 action = "NO ACTION" 5648 elif self._match(TokenType.SET): 5649 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5650 action = "SET " + self._prev.text.upper() 5651 else: 5652 self._advance() 5653 action = self._prev.text.upper() 5654 5655 options[kind] = action 5656 5657 return self.expression( 5658 exp.ForeignKey, 5659 expressions=expressions, 5660 reference=reference, 5661 **options, # type: ignore 5662 ) 5663 5664 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5665 return self._parse_field() 5666 5667 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5668 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5669 self._retreat(self._index - 1) 5670 return None 5671 5672 id_vars = self._parse_wrapped_id_vars() 5673 return self.expression( 5674 exp.PeriodForSystemTimeConstraint, 5675 this=seq_get(id_vars, 0), 5676 expression=seq_get(id_vars, 1), 5677 ) 5678 5679 def _parse_primary_key( 5680 self, wrapped_optional: bool = False, in_props: bool = False 5681 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5682 desc = ( 5683 self._match_set((TokenType.ASC, TokenType.DESC)) 5684 and self._prev.token_type == TokenType.DESC 5685 ) 5686 5687 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5688 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5689 5690 expressions = self._parse_wrapped_csv( 5691 self._parse_primary_key_part, optional=wrapped_optional 5692 ) 5693 options = self._parse_key_constraint_options() 5694 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5695 5696 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5697 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5698 5699 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5700 """ 5701 Parses a datetime column in ODBC format. We parse the column into the corresponding 5702 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5703 same as we did for `DATE('yyyy-mm-dd')`. 5704 5705 Reference: 5706 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5707 """ 5708 self._match(TokenType.VAR) 5709 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5710 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5711 if not self._match(TokenType.R_BRACE): 5712 self.raise_error("Expected }") 5713 return expression 5714 5715 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5716 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5717 return this 5718 5719 bracket_kind = self._prev.token_type 5720 if ( 5721 bracket_kind == TokenType.L_BRACE 5722 and self._curr 5723 and self._curr.token_type == TokenType.VAR 5724 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5725 ): 5726 return self._parse_odbc_datetime_literal() 5727 5728 expressions = self._parse_csv( 5729 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5730 ) 5731 5732 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5733 self.raise_error("Expected ]") 5734 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5735 self.raise_error("Expected }") 5736 5737 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5738 if bracket_kind == TokenType.L_BRACE: 5739 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5740 elif not this: 5741 this = build_array_constructor( 5742 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5743 ) 5744 else: 5745 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5746 if constructor_type: 5747 return build_array_constructor( 5748 constructor_type, 5749 args=expressions, 5750 bracket_kind=bracket_kind, 5751 dialect=self.dialect, 5752 ) 5753 5754 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5755 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5756 5757 self._add_comments(this) 5758 return self._parse_bracket(this) 5759 5760 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5761 if self._match(TokenType.COLON): 5762 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5763 return this 5764 5765 def _parse_case(self) -> t.Optional[exp.Expression]: 5766 ifs = [] 5767 default = None 5768 5769 comments = self._prev_comments 5770 expression = self._parse_assignment() 5771 5772 while self._match(TokenType.WHEN): 5773 this = self._parse_assignment() 5774 self._match(TokenType.THEN) 5775 then = self._parse_assignment() 5776 ifs.append(self.expression(exp.If, this=this, true=then)) 5777 5778 if self._match(TokenType.ELSE): 5779 default = self._parse_assignment() 5780 5781 if not self._match(TokenType.END): 5782 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5783 default = exp.column("interval") 5784 else: 5785 self.raise_error("Expected END after CASE", self._prev) 5786 5787 return self.expression( 5788 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5789 ) 5790 5791 def _parse_if(self) -> t.Optional[exp.Expression]: 5792 if self._match(TokenType.L_PAREN): 5793 args = self._parse_csv(self._parse_assignment) 5794 this = self.validate_expression(exp.If.from_arg_list(args), args) 5795 self._match_r_paren() 5796 else: 5797 index = self._index - 1 5798 5799 if self.NO_PAREN_IF_COMMANDS and index == 0: 5800 return self._parse_as_command(self._prev) 5801 5802 condition = self._parse_assignment() 5803 5804 if not condition: 5805 self._retreat(index) 5806 return None 5807 5808 self._match(TokenType.THEN) 5809 true = self._parse_assignment() 5810 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5811 self._match(TokenType.END) 5812 this = self.expression(exp.If, this=condition, true=true, false=false) 5813 5814 return this 5815 5816 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5817 if not self._match_text_seq("VALUE", "FOR"): 5818 self._retreat(self._index - 1) 5819 return None 5820 5821 return self.expression( 5822 exp.NextValueFor, 5823 this=self._parse_column(), 5824 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5825 ) 5826 5827 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5828 this = self._parse_function() or self._parse_var_or_string(upper=True) 5829 5830 if self._match(TokenType.FROM): 5831 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5832 5833 if not self._match(TokenType.COMMA): 5834 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5835 5836 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5837 5838 def _parse_gap_fill(self) -> exp.GapFill: 5839 self._match(TokenType.TABLE) 5840 this = self._parse_table() 5841 5842 self._match(TokenType.COMMA) 5843 args = [this, *self._parse_csv(self._parse_lambda)] 5844 5845 gap_fill = exp.GapFill.from_arg_list(args) 5846 return self.validate_expression(gap_fill, args) 5847 5848 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5849 this = self._parse_assignment() 5850 5851 if not self._match(TokenType.ALIAS): 5852 if self._match(TokenType.COMMA): 5853 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5854 5855 self.raise_error("Expected AS after CAST") 5856 5857 fmt = None 5858 to = self._parse_types() 5859 5860 if self._match(TokenType.FORMAT): 5861 fmt_string = self._parse_string() 5862 fmt = self._parse_at_time_zone(fmt_string) 5863 5864 if not to: 5865 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5866 if to.this in exp.DataType.TEMPORAL_TYPES: 5867 this = self.expression( 5868 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5869 this=this, 5870 format=exp.Literal.string( 5871 format_time( 5872 fmt_string.this if fmt_string else "", 5873 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5874 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5875 ) 5876 ), 5877 safe=safe, 5878 ) 5879 5880 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5881 this.set("zone", fmt.args["zone"]) 5882 return this 5883 elif not to: 5884 self.raise_error("Expected TYPE after CAST") 5885 elif isinstance(to, exp.Identifier): 5886 to = exp.DataType.build(to.name, udt=True) 5887 elif to.this == exp.DataType.Type.CHAR: 5888 if self._match(TokenType.CHARACTER_SET): 5889 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5890 5891 return self.expression( 5892 exp.Cast if strict else exp.TryCast, 5893 this=this, 5894 to=to, 5895 format=fmt, 5896 safe=safe, 5897 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5898 ) 5899 5900 def _parse_string_agg(self) -> exp.Expression: 5901 if self._match(TokenType.DISTINCT): 5902 args: t.List[t.Optional[exp.Expression]] = [ 5903 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5904 ] 5905 if self._match(TokenType.COMMA): 5906 args.extend(self._parse_csv(self._parse_assignment)) 5907 else: 5908 args = self._parse_csv(self._parse_assignment) # type: ignore 5909 5910 index = self._index 5911 if not self._match(TokenType.R_PAREN) and args: 5912 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5913 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5914 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5915 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5916 5917 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5918 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5919 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5920 if not self._match_text_seq("WITHIN", "GROUP"): 5921 self._retreat(index) 5922 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5923 5924 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5925 order = self._parse_order(this=seq_get(args, 0)) 5926 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5927 5928 def _parse_convert( 5929 self, strict: bool, safe: t.Optional[bool] = None 5930 ) -> t.Optional[exp.Expression]: 5931 this = self._parse_bitwise() 5932 5933 if self._match(TokenType.USING): 5934 to: t.Optional[exp.Expression] = self.expression( 5935 exp.CharacterSet, this=self._parse_var() 5936 ) 5937 elif self._match(TokenType.COMMA): 5938 to = self._parse_types() 5939 else: 5940 to = None 5941 5942 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5943 5944 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5945 """ 5946 There are generally two variants of the DECODE function: 5947 5948 - DECODE(bin, charset) 5949 - DECODE(expression, search, result [, search, result] ... [, default]) 5950 5951 The second variant will always be parsed into a CASE expression. Note that NULL 5952 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5953 instead of relying on pattern matching. 5954 """ 5955 args = self._parse_csv(self._parse_assignment) 5956 5957 if len(args) < 3: 5958 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5959 5960 expression, *expressions = args 5961 if not expression: 5962 return None 5963 5964 ifs = [] 5965 for search, result in zip(expressions[::2], expressions[1::2]): 5966 if not search or not result: 5967 return None 5968 5969 if isinstance(search, exp.Literal): 5970 ifs.append( 5971 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5972 ) 5973 elif isinstance(search, exp.Null): 5974 ifs.append( 5975 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5976 ) 5977 else: 5978 cond = exp.or_( 5979 exp.EQ(this=expression.copy(), expression=search), 5980 exp.and_( 5981 exp.Is(this=expression.copy(), expression=exp.Null()), 5982 exp.Is(this=search.copy(), expression=exp.Null()), 5983 copy=False, 5984 ), 5985 copy=False, 5986 ) 5987 ifs.append(exp.If(this=cond, true=result)) 5988 5989 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5990 5991 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5992 self._match_text_seq("KEY") 5993 key = self._parse_column() 5994 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5995 self._match_text_seq("VALUE") 5996 value = self._parse_bitwise() 5997 5998 if not key and not value: 5999 return None 6000 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6001 6002 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6003 if not this or not self._match_text_seq("FORMAT", "JSON"): 6004 return this 6005 6006 return self.expression(exp.FormatJson, this=this) 6007 6008 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6009 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6010 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6011 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6012 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6013 else: 6014 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6015 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6016 6017 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6018 6019 if not empty and not error and not null: 6020 return None 6021 6022 return self.expression( 6023 exp.OnCondition, 6024 empty=empty, 6025 error=error, 6026 null=null, 6027 ) 6028 6029 def _parse_on_handling( 6030 self, on: str, *values: str 6031 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6032 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6033 for value in values: 6034 if self._match_text_seq(value, "ON", on): 6035 return f"{value} ON {on}" 6036 6037 index = self._index 6038 if self._match(TokenType.DEFAULT): 6039 default_value = self._parse_bitwise() 6040 if self._match_text_seq("ON", on): 6041 return default_value 6042 6043 self._retreat(index) 6044 6045 return None 6046 6047 @t.overload 6048 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6049 6050 @t.overload 6051 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6052 6053 def _parse_json_object(self, agg=False): 6054 star = self._parse_star() 6055 expressions = ( 6056 [star] 6057 if star 6058 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6059 ) 6060 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6061 6062 unique_keys = None 6063 if self._match_text_seq("WITH", "UNIQUE"): 6064 unique_keys = True 6065 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6066 unique_keys = False 6067 6068 self._match_text_seq("KEYS") 6069 6070 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6071 self._parse_type() 6072 ) 6073 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6074 6075 return self.expression( 6076 exp.JSONObjectAgg if agg else exp.JSONObject, 6077 expressions=expressions, 6078 null_handling=null_handling, 6079 unique_keys=unique_keys, 6080 return_type=return_type, 6081 encoding=encoding, 6082 ) 6083 6084 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6085 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6086 if not self._match_text_seq("NESTED"): 6087 this = self._parse_id_var() 6088 kind = self._parse_types(allow_identifiers=False) 6089 nested = None 6090 else: 6091 this = None 6092 kind = None 6093 nested = True 6094 6095 path = self._match_text_seq("PATH") and self._parse_string() 6096 nested_schema = nested and self._parse_json_schema() 6097 6098 return self.expression( 6099 exp.JSONColumnDef, 6100 this=this, 6101 kind=kind, 6102 path=path, 6103 nested_schema=nested_schema, 6104 ) 6105 6106 def _parse_json_schema(self) -> exp.JSONSchema: 6107 self._match_text_seq("COLUMNS") 6108 return self.expression( 6109 exp.JSONSchema, 6110 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6111 ) 6112 6113 def _parse_json_table(self) -> exp.JSONTable: 6114 this = self._parse_format_json(self._parse_bitwise()) 6115 path = self._match(TokenType.COMMA) and self._parse_string() 6116 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6117 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6118 schema = self._parse_json_schema() 6119 6120 return exp.JSONTable( 6121 this=this, 6122 schema=schema, 6123 path=path, 6124 error_handling=error_handling, 6125 empty_handling=empty_handling, 6126 ) 6127 6128 def _parse_match_against(self) -> exp.MatchAgainst: 6129 expressions = self._parse_csv(self._parse_column) 6130 6131 self._match_text_seq(")", "AGAINST", "(") 6132 6133 this = self._parse_string() 6134 6135 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6136 modifier = "IN NATURAL LANGUAGE MODE" 6137 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6138 modifier = f"{modifier} WITH QUERY EXPANSION" 6139 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6140 modifier = "IN BOOLEAN MODE" 6141 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6142 modifier = "WITH QUERY EXPANSION" 6143 else: 6144 modifier = None 6145 6146 return self.expression( 6147 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6148 ) 6149 6150 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6151 def _parse_open_json(self) -> exp.OpenJSON: 6152 this = self._parse_bitwise() 6153 path = self._match(TokenType.COMMA) and self._parse_string() 6154 6155 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6156 this = self._parse_field(any_token=True) 6157 kind = self._parse_types() 6158 path = self._parse_string() 6159 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6160 6161 return self.expression( 6162 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6163 ) 6164 6165 expressions = None 6166 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6167 self._match_l_paren() 6168 expressions = self._parse_csv(_parse_open_json_column_def) 6169 6170 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6171 6172 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6173 args = self._parse_csv(self._parse_bitwise) 6174 6175 if self._match(TokenType.IN): 6176 return self.expression( 6177 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6178 ) 6179 6180 if haystack_first: 6181 haystack = seq_get(args, 0) 6182 needle = seq_get(args, 1) 6183 else: 6184 needle = seq_get(args, 0) 6185 haystack = seq_get(args, 1) 6186 6187 return self.expression( 6188 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6189 ) 6190 6191 def _parse_predict(self) -> exp.Predict: 6192 self._match_text_seq("MODEL") 6193 this = self._parse_table() 6194 6195 self._match(TokenType.COMMA) 6196 self._match_text_seq("TABLE") 6197 6198 return self.expression( 6199 exp.Predict, 6200 this=this, 6201 expression=self._parse_table(), 6202 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6203 ) 6204 6205 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6206 args = self._parse_csv(self._parse_table) 6207 return exp.JoinHint(this=func_name.upper(), expressions=args) 6208 6209 def _parse_substring(self) -> exp.Substring: 6210 # Postgres supports the form: substring(string [from int] [for int]) 6211 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6212 6213 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6214 6215 if self._match(TokenType.FROM): 6216 args.append(self._parse_bitwise()) 6217 if self._match(TokenType.FOR): 6218 if len(args) == 1: 6219 args.append(exp.Literal.number(1)) 6220 args.append(self._parse_bitwise()) 6221 6222 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6223 6224 def _parse_trim(self) -> exp.Trim: 6225 # https://www.w3resource.com/sql/character-functions/trim.php 6226 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6227 6228 position = None 6229 collation = None 6230 expression = None 6231 6232 if self._match_texts(self.TRIM_TYPES): 6233 position = self._prev.text.upper() 6234 6235 this = self._parse_bitwise() 6236 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6237 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6238 expression = self._parse_bitwise() 6239 6240 if invert_order: 6241 this, expression = expression, this 6242 6243 if self._match(TokenType.COLLATE): 6244 collation = self._parse_bitwise() 6245 6246 return self.expression( 6247 exp.Trim, this=this, position=position, expression=expression, collation=collation 6248 ) 6249 6250 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6251 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6252 6253 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6254 return self._parse_window(self._parse_id_var(), alias=True) 6255 6256 def _parse_respect_or_ignore_nulls( 6257 self, this: t.Optional[exp.Expression] 6258 ) -> t.Optional[exp.Expression]: 6259 if self._match_text_seq("IGNORE", "NULLS"): 6260 return self.expression(exp.IgnoreNulls, this=this) 6261 if self._match_text_seq("RESPECT", "NULLS"): 6262 return self.expression(exp.RespectNulls, this=this) 6263 return this 6264 6265 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6266 if self._match(TokenType.HAVING): 6267 self._match_texts(("MAX", "MIN")) 6268 max = self._prev.text.upper() != "MIN" 6269 return self.expression( 6270 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6271 ) 6272 6273 return this 6274 6275 def _parse_window( 6276 self, this: t.Optional[exp.Expression], alias: bool = False 6277 ) -> t.Optional[exp.Expression]: 6278 func = this 6279 comments = func.comments if isinstance(func, exp.Expression) else None 6280 6281 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6282 self._match(TokenType.WHERE) 6283 this = self.expression( 6284 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6285 ) 6286 self._match_r_paren() 6287 6288 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6289 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6290 if self._match_text_seq("WITHIN", "GROUP"): 6291 order = self._parse_wrapped(self._parse_order) 6292 this = self.expression(exp.WithinGroup, this=this, expression=order) 6293 6294 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6295 # Some dialects choose to implement and some do not. 6296 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6297 6298 # There is some code above in _parse_lambda that handles 6299 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6300 6301 # The below changes handle 6302 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6303 6304 # Oracle allows both formats 6305 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6306 # and Snowflake chose to do the same for familiarity 6307 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6308 if isinstance(this, exp.AggFunc): 6309 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6310 6311 if ignore_respect and ignore_respect is not this: 6312 ignore_respect.replace(ignore_respect.this) 6313 this = self.expression(ignore_respect.__class__, this=this) 6314 6315 this = self._parse_respect_or_ignore_nulls(this) 6316 6317 # bigquery select from window x AS (partition by ...) 6318 if alias: 6319 over = None 6320 self._match(TokenType.ALIAS) 6321 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6322 return this 6323 else: 6324 over = self._prev.text.upper() 6325 6326 if comments and isinstance(func, exp.Expression): 6327 func.pop_comments() 6328 6329 if not self._match(TokenType.L_PAREN): 6330 return self.expression( 6331 exp.Window, 6332 comments=comments, 6333 this=this, 6334 alias=self._parse_id_var(False), 6335 over=over, 6336 ) 6337 6338 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6339 6340 first = self._match(TokenType.FIRST) 6341 if self._match_text_seq("LAST"): 6342 first = False 6343 6344 partition, order = self._parse_partition_and_order() 6345 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6346 6347 if kind: 6348 self._match(TokenType.BETWEEN) 6349 start = self._parse_window_spec() 6350 self._match(TokenType.AND) 6351 end = self._parse_window_spec() 6352 6353 spec = self.expression( 6354 exp.WindowSpec, 6355 kind=kind, 6356 start=start["value"], 6357 start_side=start["side"], 6358 end=end["value"], 6359 end_side=end["side"], 6360 ) 6361 else: 6362 spec = None 6363 6364 self._match_r_paren() 6365 6366 window = self.expression( 6367 exp.Window, 6368 comments=comments, 6369 this=this, 6370 partition_by=partition, 6371 order=order, 6372 spec=spec, 6373 alias=window_alias, 6374 over=over, 6375 first=first, 6376 ) 6377 6378 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6379 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6380 return self._parse_window(window, alias=alias) 6381 6382 return window 6383 6384 def _parse_partition_and_order( 6385 self, 6386 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6387 return self._parse_partition_by(), self._parse_order() 6388 6389 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6390 self._match(TokenType.BETWEEN) 6391 6392 return { 6393 "value": ( 6394 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6395 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6396 or self._parse_bitwise() 6397 ), 6398 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6399 } 6400 6401 def _parse_alias( 6402 self, this: t.Optional[exp.Expression], explicit: bool = False 6403 ) -> t.Optional[exp.Expression]: 6404 any_token = self._match(TokenType.ALIAS) 6405 comments = self._prev_comments or [] 6406 6407 if explicit and not any_token: 6408 return this 6409 6410 if self._match(TokenType.L_PAREN): 6411 aliases = self.expression( 6412 exp.Aliases, 6413 comments=comments, 6414 this=this, 6415 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6416 ) 6417 self._match_r_paren(aliases) 6418 return aliases 6419 6420 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6421 self.STRING_ALIASES and self._parse_string_as_identifier() 6422 ) 6423 6424 if alias: 6425 comments.extend(alias.pop_comments()) 6426 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6427 column = this.this 6428 6429 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6430 if not this.comments and column and column.comments: 6431 this.comments = column.pop_comments() 6432 6433 return this 6434 6435 def _parse_id_var( 6436 self, 6437 any_token: bool = True, 6438 tokens: t.Optional[t.Collection[TokenType]] = None, 6439 ) -> t.Optional[exp.Expression]: 6440 expression = self._parse_identifier() 6441 if not expression and ( 6442 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6443 ): 6444 quoted = self._prev.token_type == TokenType.STRING 6445 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6446 6447 return expression 6448 6449 def _parse_string(self) -> t.Optional[exp.Expression]: 6450 if self._match_set(self.STRING_PARSERS): 6451 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6452 return self._parse_placeholder() 6453 6454 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6455 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6456 6457 def _parse_number(self) -> t.Optional[exp.Expression]: 6458 if self._match_set(self.NUMERIC_PARSERS): 6459 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6460 return self._parse_placeholder() 6461 6462 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6463 if self._match(TokenType.IDENTIFIER): 6464 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6465 return self._parse_placeholder() 6466 6467 def _parse_var( 6468 self, 6469 any_token: bool = False, 6470 tokens: t.Optional[t.Collection[TokenType]] = None, 6471 upper: bool = False, 6472 ) -> t.Optional[exp.Expression]: 6473 if ( 6474 (any_token and self._advance_any()) 6475 or self._match(TokenType.VAR) 6476 or (self._match_set(tokens) if tokens else False) 6477 ): 6478 return self.expression( 6479 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6480 ) 6481 return self._parse_placeholder() 6482 6483 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6484 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6485 self._advance() 6486 return self._prev 6487 return None 6488 6489 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6490 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6491 6492 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6493 return self._parse_primary() or self._parse_var(any_token=True) 6494 6495 def _parse_null(self) -> t.Optional[exp.Expression]: 6496 if self._match_set(self.NULL_TOKENS): 6497 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6498 return self._parse_placeholder() 6499 6500 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6501 if self._match(TokenType.TRUE): 6502 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6503 if self._match(TokenType.FALSE): 6504 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6505 return self._parse_placeholder() 6506 6507 def _parse_star(self) -> t.Optional[exp.Expression]: 6508 if self._match(TokenType.STAR): 6509 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6510 return self._parse_placeholder() 6511 6512 def _parse_parameter(self) -> exp.Parameter: 6513 this = self._parse_identifier() or self._parse_primary_or_var() 6514 return self.expression(exp.Parameter, this=this) 6515 6516 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6517 if self._match_set(self.PLACEHOLDER_PARSERS): 6518 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6519 if placeholder: 6520 return placeholder 6521 self._advance(-1) 6522 return None 6523 6524 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6525 if not self._match_texts(keywords): 6526 return None 6527 if self._match(TokenType.L_PAREN, advance=False): 6528 return self._parse_wrapped_csv(self._parse_expression) 6529 6530 expression = self._parse_expression() 6531 return [expression] if expression else None 6532 6533 def _parse_csv( 6534 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6535 ) -> t.List[exp.Expression]: 6536 parse_result = parse_method() 6537 items = [parse_result] if parse_result is not None else [] 6538 6539 while self._match(sep): 6540 self._add_comments(parse_result) 6541 parse_result = parse_method() 6542 if parse_result is not None: 6543 items.append(parse_result) 6544 6545 return items 6546 6547 def _parse_tokens( 6548 self, parse_method: t.Callable, expressions: t.Dict 6549 ) -> t.Optional[exp.Expression]: 6550 this = parse_method() 6551 6552 while self._match_set(expressions): 6553 this = self.expression( 6554 expressions[self._prev.token_type], 6555 this=this, 6556 comments=self._prev_comments, 6557 expression=parse_method(), 6558 ) 6559 6560 return this 6561 6562 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6563 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6564 6565 def _parse_wrapped_csv( 6566 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6567 ) -> t.List[exp.Expression]: 6568 return self._parse_wrapped( 6569 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6570 ) 6571 6572 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6573 wrapped = self._match(TokenType.L_PAREN) 6574 if not wrapped and not optional: 6575 self.raise_error("Expecting (") 6576 parse_result = parse_method() 6577 if wrapped: 6578 self._match_r_paren() 6579 return parse_result 6580 6581 def _parse_expressions(self) -> t.List[exp.Expression]: 6582 return self._parse_csv(self._parse_expression) 6583 6584 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6585 return self._parse_select() or self._parse_set_operations( 6586 self._parse_expression() if alias else self._parse_assignment() 6587 ) 6588 6589 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6590 return self._parse_query_modifiers( 6591 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6592 ) 6593 6594 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6595 this = None 6596 if self._match_texts(self.TRANSACTION_KIND): 6597 this = self._prev.text 6598 6599 self._match_texts(("TRANSACTION", "WORK")) 6600 6601 modes = [] 6602 while True: 6603 mode = [] 6604 while self._match(TokenType.VAR): 6605 mode.append(self._prev.text) 6606 6607 if mode: 6608 modes.append(" ".join(mode)) 6609 if not self._match(TokenType.COMMA): 6610 break 6611 6612 return self.expression(exp.Transaction, this=this, modes=modes) 6613 6614 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6615 chain = None 6616 savepoint = None 6617 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6618 6619 self._match_texts(("TRANSACTION", "WORK")) 6620 6621 if self._match_text_seq("TO"): 6622 self._match_text_seq("SAVEPOINT") 6623 savepoint = self._parse_id_var() 6624 6625 if self._match(TokenType.AND): 6626 chain = not self._match_text_seq("NO") 6627 self._match_text_seq("CHAIN") 6628 6629 if is_rollback: 6630 return self.expression(exp.Rollback, savepoint=savepoint) 6631 6632 return self.expression(exp.Commit, chain=chain) 6633 6634 def _parse_refresh(self) -> exp.Refresh: 6635 self._match(TokenType.TABLE) 6636 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6637 6638 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6639 if not self._match_text_seq("ADD"): 6640 return None 6641 6642 self._match(TokenType.COLUMN) 6643 exists_column = self._parse_exists(not_=True) 6644 expression = self._parse_field_def() 6645 6646 if expression: 6647 expression.set("exists", exists_column) 6648 6649 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6650 if self._match_texts(("FIRST", "AFTER")): 6651 position = self._prev.text 6652 column_position = self.expression( 6653 exp.ColumnPosition, this=self._parse_column(), position=position 6654 ) 6655 expression.set("position", column_position) 6656 6657 return expression 6658 6659 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6660 drop = self._match(TokenType.DROP) and self._parse_drop() 6661 if drop and not isinstance(drop, exp.Command): 6662 drop.set("kind", drop.args.get("kind", "COLUMN")) 6663 return drop 6664 6665 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6666 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6667 return self.expression( 6668 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6669 ) 6670 6671 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6672 index = self._index - 1 6673 6674 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6675 return self._parse_csv( 6676 lambda: self.expression( 6677 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6678 ) 6679 ) 6680 6681 self._retreat(index) 6682 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6683 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6684 6685 if self._match_text_seq("ADD", "COLUMNS"): 6686 schema = self._parse_schema() 6687 if schema: 6688 return [schema] 6689 return [] 6690 6691 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6692 6693 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6694 if self._match_texts(self.ALTER_ALTER_PARSERS): 6695 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6696 6697 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6698 # keyword after ALTER we default to parsing this statement 6699 self._match(TokenType.COLUMN) 6700 column = self._parse_field(any_token=True) 6701 6702 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6703 return self.expression(exp.AlterColumn, this=column, drop=True) 6704 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6705 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6706 if self._match(TokenType.COMMENT): 6707 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6708 if self._match_text_seq("DROP", "NOT", "NULL"): 6709 return self.expression( 6710 exp.AlterColumn, 6711 this=column, 6712 drop=True, 6713 allow_null=True, 6714 ) 6715 if self._match_text_seq("SET", "NOT", "NULL"): 6716 return self.expression( 6717 exp.AlterColumn, 6718 this=column, 6719 allow_null=False, 6720 ) 6721 self._match_text_seq("SET", "DATA") 6722 self._match_text_seq("TYPE") 6723 return self.expression( 6724 exp.AlterColumn, 6725 this=column, 6726 dtype=self._parse_types(), 6727 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6728 using=self._match(TokenType.USING) and self._parse_assignment(), 6729 ) 6730 6731 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6732 if self._match_texts(("ALL", "EVEN", "AUTO")): 6733 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6734 6735 self._match_text_seq("KEY", "DISTKEY") 6736 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6737 6738 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6739 if compound: 6740 self._match_text_seq("SORTKEY") 6741 6742 if self._match(TokenType.L_PAREN, advance=False): 6743 return self.expression( 6744 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6745 ) 6746 6747 self._match_texts(("AUTO", "NONE")) 6748 return self.expression( 6749 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6750 ) 6751 6752 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6753 index = self._index - 1 6754 6755 partition_exists = self._parse_exists() 6756 if self._match(TokenType.PARTITION, advance=False): 6757 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6758 6759 self._retreat(index) 6760 return self._parse_csv(self._parse_drop_column) 6761 6762 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6763 if self._match(TokenType.COLUMN): 6764 exists = self._parse_exists() 6765 old_column = self._parse_column() 6766 to = self._match_text_seq("TO") 6767 new_column = self._parse_column() 6768 6769 if old_column is None or to is None or new_column is None: 6770 return None 6771 6772 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6773 6774 self._match_text_seq("TO") 6775 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6776 6777 def _parse_alter_table_set(self) -> exp.AlterSet: 6778 alter_set = self.expression(exp.AlterSet) 6779 6780 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6781 "TABLE", "PROPERTIES" 6782 ): 6783 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6784 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6785 alter_set.set("expressions", [self._parse_assignment()]) 6786 elif self._match_texts(("LOGGED", "UNLOGGED")): 6787 alter_set.set("option", exp.var(self._prev.text.upper())) 6788 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6789 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6790 elif self._match_text_seq("LOCATION"): 6791 alter_set.set("location", self._parse_field()) 6792 elif self._match_text_seq("ACCESS", "METHOD"): 6793 alter_set.set("access_method", self._parse_field()) 6794 elif self._match_text_seq("TABLESPACE"): 6795 alter_set.set("tablespace", self._parse_field()) 6796 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6797 alter_set.set("file_format", [self._parse_field()]) 6798 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6799 alter_set.set("file_format", self._parse_wrapped_options()) 6800 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6801 alter_set.set("copy_options", self._parse_wrapped_options()) 6802 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6803 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6804 else: 6805 if self._match_text_seq("SERDE"): 6806 alter_set.set("serde", self._parse_field()) 6807 6808 alter_set.set("expressions", [self._parse_properties()]) 6809 6810 return alter_set 6811 6812 def _parse_alter(self) -> exp.Alter | exp.Command: 6813 start = self._prev 6814 6815 alter_token = self._match_set(self.ALTERABLES) and self._prev 6816 if not alter_token: 6817 return self._parse_as_command(start) 6818 6819 exists = self._parse_exists() 6820 only = self._match_text_seq("ONLY") 6821 this = self._parse_table(schema=True) 6822 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6823 6824 if self._next: 6825 self._advance() 6826 6827 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6828 if parser: 6829 actions = ensure_list(parser(self)) 6830 not_valid = self._match_text_seq("NOT", "VALID") 6831 options = self._parse_csv(self._parse_property) 6832 6833 if not self._curr and actions: 6834 return self.expression( 6835 exp.Alter, 6836 this=this, 6837 kind=alter_token.text.upper(), 6838 exists=exists, 6839 actions=actions, 6840 only=only, 6841 options=options, 6842 cluster=cluster, 6843 not_valid=not_valid, 6844 ) 6845 6846 return self._parse_as_command(start) 6847 6848 def _parse_merge(self) -> exp.Merge: 6849 self._match(TokenType.INTO) 6850 target = self._parse_table() 6851 6852 if target and self._match(TokenType.ALIAS, advance=False): 6853 target.set("alias", self._parse_table_alias()) 6854 6855 self._match(TokenType.USING) 6856 using = self._parse_table() 6857 6858 self._match(TokenType.ON) 6859 on = self._parse_assignment() 6860 6861 return self.expression( 6862 exp.Merge, 6863 this=target, 6864 using=using, 6865 on=on, 6866 expressions=self._parse_when_matched(), 6867 returning=self._parse_returning(), 6868 ) 6869 6870 def _parse_when_matched(self) -> t.List[exp.When]: 6871 whens = [] 6872 6873 while self._match(TokenType.WHEN): 6874 matched = not self._match(TokenType.NOT) 6875 self._match_text_seq("MATCHED") 6876 source = ( 6877 False 6878 if self._match_text_seq("BY", "TARGET") 6879 else self._match_text_seq("BY", "SOURCE") 6880 ) 6881 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6882 6883 self._match(TokenType.THEN) 6884 6885 if self._match(TokenType.INSERT): 6886 this = self._parse_star() 6887 if this: 6888 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6889 else: 6890 then = self.expression( 6891 exp.Insert, 6892 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6893 expression=self._match_text_seq("VALUES") and self._parse_value(), 6894 ) 6895 elif self._match(TokenType.UPDATE): 6896 expressions = self._parse_star() 6897 if expressions: 6898 then = self.expression(exp.Update, expressions=expressions) 6899 else: 6900 then = self.expression( 6901 exp.Update, 6902 expressions=self._match(TokenType.SET) 6903 and self._parse_csv(self._parse_equality), 6904 ) 6905 elif self._match(TokenType.DELETE): 6906 then = self.expression(exp.Var, this=self._prev.text) 6907 else: 6908 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6909 6910 whens.append( 6911 self.expression( 6912 exp.When, 6913 matched=matched, 6914 source=source, 6915 condition=condition, 6916 then=then, 6917 ) 6918 ) 6919 return whens 6920 6921 def _parse_show(self) -> t.Optional[exp.Expression]: 6922 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6923 if parser: 6924 return parser(self) 6925 return self._parse_as_command(self._prev) 6926 6927 def _parse_set_item_assignment( 6928 self, kind: t.Optional[str] = None 6929 ) -> t.Optional[exp.Expression]: 6930 index = self._index 6931 6932 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6933 return self._parse_set_transaction(global_=kind == "GLOBAL") 6934 6935 left = self._parse_primary() or self._parse_column() 6936 assignment_delimiter = self._match_texts(("=", "TO")) 6937 6938 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6939 self._retreat(index) 6940 return None 6941 6942 right = self._parse_statement() or self._parse_id_var() 6943 if isinstance(right, (exp.Column, exp.Identifier)): 6944 right = exp.var(right.name) 6945 6946 this = self.expression(exp.EQ, this=left, expression=right) 6947 return self.expression(exp.SetItem, this=this, kind=kind) 6948 6949 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6950 self._match_text_seq("TRANSACTION") 6951 characteristics = self._parse_csv( 6952 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6953 ) 6954 return self.expression( 6955 exp.SetItem, 6956 expressions=characteristics, 6957 kind="TRANSACTION", 6958 **{"global": global_}, # type: ignore 6959 ) 6960 6961 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6962 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6963 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6964 6965 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6966 index = self._index 6967 set_ = self.expression( 6968 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6969 ) 6970 6971 if self._curr: 6972 self._retreat(index) 6973 return self._parse_as_command(self._prev) 6974 6975 return set_ 6976 6977 def _parse_var_from_options( 6978 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6979 ) -> t.Optional[exp.Var]: 6980 start = self._curr 6981 if not start: 6982 return None 6983 6984 option = start.text.upper() 6985 continuations = options.get(option) 6986 6987 index = self._index 6988 self._advance() 6989 for keywords in continuations or []: 6990 if isinstance(keywords, str): 6991 keywords = (keywords,) 6992 6993 if self._match_text_seq(*keywords): 6994 option = f"{option} {' '.join(keywords)}" 6995 break 6996 else: 6997 if continuations or continuations is None: 6998 if raise_unmatched: 6999 self.raise_error(f"Unknown option {option}") 7000 7001 self._retreat(index) 7002 return None 7003 7004 return exp.var(option) 7005 7006 def _parse_as_command(self, start: Token) -> exp.Command: 7007 while self._curr: 7008 self._advance() 7009 text = self._find_sql(start, self._prev) 7010 size = len(start.text) 7011 self._warn_unsupported() 7012 return exp.Command(this=text[:size], expression=text[size:]) 7013 7014 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7015 settings = [] 7016 7017 self._match_l_paren() 7018 kind = self._parse_id_var() 7019 7020 if self._match(TokenType.L_PAREN): 7021 while True: 7022 key = self._parse_id_var() 7023 value = self._parse_primary() 7024 7025 if not key and value is None: 7026 break 7027 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7028 self._match(TokenType.R_PAREN) 7029 7030 self._match_r_paren() 7031 7032 return self.expression( 7033 exp.DictProperty, 7034 this=this, 7035 kind=kind.this if kind else None, 7036 settings=settings, 7037 ) 7038 7039 def _parse_dict_range(self, this: str) -> exp.DictRange: 7040 self._match_l_paren() 7041 has_min = self._match_text_seq("MIN") 7042 if has_min: 7043 min = self._parse_var() or self._parse_primary() 7044 self._match_text_seq("MAX") 7045 max = self._parse_var() or self._parse_primary() 7046 else: 7047 max = self._parse_var() or self._parse_primary() 7048 min = exp.Literal.number(0) 7049 self._match_r_paren() 7050 return self.expression(exp.DictRange, this=this, min=min, max=max) 7051 7052 def _parse_comprehension( 7053 self, this: t.Optional[exp.Expression] 7054 ) -> t.Optional[exp.Comprehension]: 7055 index = self._index 7056 expression = self._parse_column() 7057 if not self._match(TokenType.IN): 7058 self._retreat(index - 1) 7059 return None 7060 iterator = self._parse_column() 7061 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7062 return self.expression( 7063 exp.Comprehension, 7064 this=this, 7065 expression=expression, 7066 iterator=iterator, 7067 condition=condition, 7068 ) 7069 7070 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7071 if self._match(TokenType.HEREDOC_STRING): 7072 return self.expression(exp.Heredoc, this=self._prev.text) 7073 7074 if not self._match_text_seq("$"): 7075 return None 7076 7077 tags = ["$"] 7078 tag_text = None 7079 7080 if self._is_connected(): 7081 self._advance() 7082 tags.append(self._prev.text.upper()) 7083 else: 7084 self.raise_error("No closing $ found") 7085 7086 if tags[-1] != "$": 7087 if self._is_connected() and self._match_text_seq("$"): 7088 tag_text = tags[-1] 7089 tags.append("$") 7090 else: 7091 self.raise_error("No closing $ found") 7092 7093 heredoc_start = self._curr 7094 7095 while self._curr: 7096 if self._match_text_seq(*tags, advance=False): 7097 this = self._find_sql(heredoc_start, self._prev) 7098 self._advance(len(tags)) 7099 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7100 7101 self._advance() 7102 7103 self.raise_error(f"No closing {''.join(tags)} found") 7104 return None 7105 7106 def _find_parser( 7107 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7108 ) -> t.Optional[t.Callable]: 7109 if not self._curr: 7110 return None 7111 7112 index = self._index 7113 this = [] 7114 while True: 7115 # The current token might be multiple words 7116 curr = self._curr.text.upper() 7117 key = curr.split(" ") 7118 this.append(curr) 7119 7120 self._advance() 7121 result, trie = in_trie(trie, key) 7122 if result == TrieResult.FAILED: 7123 break 7124 7125 if result == TrieResult.EXISTS: 7126 subparser = parsers[" ".join(this)] 7127 return subparser 7128 7129 self._retreat(index) 7130 return None 7131 7132 def _match(self, token_type, advance=True, expression=None): 7133 if not self._curr: 7134 return None 7135 7136 if self._curr.token_type == token_type: 7137 if advance: 7138 self._advance() 7139 self._add_comments(expression) 7140 return True 7141 7142 return None 7143 7144 def _match_set(self, types, advance=True): 7145 if not self._curr: 7146 return None 7147 7148 if self._curr.token_type in types: 7149 if advance: 7150 self._advance() 7151 return True 7152 7153 return None 7154 7155 def _match_pair(self, token_type_a, token_type_b, advance=True): 7156 if not self._curr or not self._next: 7157 return None 7158 7159 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7160 if advance: 7161 self._advance(2) 7162 return True 7163 7164 return None 7165 7166 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7167 if not self._match(TokenType.L_PAREN, expression=expression): 7168 self.raise_error("Expecting (") 7169 7170 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7171 if not self._match(TokenType.R_PAREN, expression=expression): 7172 self.raise_error("Expecting )") 7173 7174 def _match_texts(self, texts, advance=True): 7175 if ( 7176 self._curr 7177 and self._curr.token_type != TokenType.STRING 7178 and self._curr.text.upper() in texts 7179 ): 7180 if advance: 7181 self._advance() 7182 return True 7183 return None 7184 7185 def _match_text_seq(self, *texts, advance=True): 7186 index = self._index 7187 for text in texts: 7188 if ( 7189 self._curr 7190 and self._curr.token_type != TokenType.STRING 7191 and self._curr.text.upper() == text 7192 ): 7193 self._advance() 7194 else: 7195 self._retreat(index) 7196 return None 7197 7198 if not advance: 7199 self._retreat(index) 7200 7201 return True 7202 7203 def _replace_lambda( 7204 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7205 ) -> t.Optional[exp.Expression]: 7206 if not node: 7207 return node 7208 7209 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7210 7211 for column in node.find_all(exp.Column): 7212 typ = lambda_types.get(column.parts[0].name) 7213 if typ is not None: 7214 dot_or_id = column.to_dot() if column.table else column.this 7215 7216 if typ: 7217 dot_or_id = self.expression( 7218 exp.Cast, 7219 this=dot_or_id, 7220 to=typ, 7221 ) 7222 7223 parent = column.parent 7224 7225 while isinstance(parent, exp.Dot): 7226 if not isinstance(parent.parent, exp.Dot): 7227 parent.replace(dot_or_id) 7228 break 7229 parent = parent.parent 7230 else: 7231 if column is node: 7232 node = dot_or_id 7233 else: 7234 column.replace(dot_or_id) 7235 return node 7236 7237 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7238 start = self._prev 7239 7240 # Not to be confused with TRUNCATE(number, decimals) function call 7241 if self._match(TokenType.L_PAREN): 7242 self._retreat(self._index - 2) 7243 return self._parse_function() 7244 7245 # Clickhouse supports TRUNCATE DATABASE as well 7246 is_database = self._match(TokenType.DATABASE) 7247 7248 self._match(TokenType.TABLE) 7249 7250 exists = self._parse_exists(not_=False) 7251 7252 expressions = self._parse_csv( 7253 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7254 ) 7255 7256 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7257 7258 if self._match_text_seq("RESTART", "IDENTITY"): 7259 identity = "RESTART" 7260 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7261 identity = "CONTINUE" 7262 else: 7263 identity = None 7264 7265 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7266 option = self._prev.text 7267 else: 7268 option = None 7269 7270 partition = self._parse_partition() 7271 7272 # Fallback case 7273 if self._curr: 7274 return self._parse_as_command(start) 7275 7276 return self.expression( 7277 exp.TruncateTable, 7278 expressions=expressions, 7279 is_database=is_database, 7280 exists=exists, 7281 cluster=cluster, 7282 identity=identity, 7283 option=option, 7284 partition=partition, 7285 ) 7286 7287 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7288 this = self._parse_ordered(self._parse_opclass) 7289 7290 if not self._match(TokenType.WITH): 7291 return this 7292 7293 op = self._parse_var(any_token=True) 7294 7295 return self.expression(exp.WithOperator, this=this, op=op) 7296 7297 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7298 self._match(TokenType.EQ) 7299 self._match(TokenType.L_PAREN) 7300 7301 opts: t.List[t.Optional[exp.Expression]] = [] 7302 while self._curr and not self._match(TokenType.R_PAREN): 7303 if self._match_text_seq("FORMAT_NAME", "="): 7304 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7305 # so we parse it separately to use _parse_field() 7306 prop = self.expression( 7307 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7308 ) 7309 opts.append(prop) 7310 else: 7311 opts.append(self._parse_property()) 7312 7313 self._match(TokenType.COMMA) 7314 7315 return opts 7316 7317 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7318 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7319 7320 options = [] 7321 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7322 option = self._parse_var(any_token=True) 7323 prev = self._prev.text.upper() 7324 7325 # Different dialects might separate options and values by white space, "=" and "AS" 7326 self._match(TokenType.EQ) 7327 self._match(TokenType.ALIAS) 7328 7329 param = self.expression(exp.CopyParameter, this=option) 7330 7331 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7332 TokenType.L_PAREN, advance=False 7333 ): 7334 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7335 param.set("expressions", self._parse_wrapped_options()) 7336 elif prev == "FILE_FORMAT": 7337 # T-SQL's external file format case 7338 param.set("expression", self._parse_field()) 7339 else: 7340 param.set("expression", self._parse_unquoted_field()) 7341 7342 options.append(param) 7343 self._match(sep) 7344 7345 return options 7346 7347 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7348 expr = self.expression(exp.Credentials) 7349 7350 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7351 expr.set("storage", self._parse_field()) 7352 if self._match_text_seq("CREDENTIALS"): 7353 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7354 creds = ( 7355 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7356 ) 7357 expr.set("credentials", creds) 7358 if self._match_text_seq("ENCRYPTION"): 7359 expr.set("encryption", self._parse_wrapped_options()) 7360 if self._match_text_seq("IAM_ROLE"): 7361 expr.set("iam_role", self._parse_field()) 7362 if self._match_text_seq("REGION"): 7363 expr.set("region", self._parse_field()) 7364 7365 return expr 7366 7367 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7368 return self._parse_field() 7369 7370 def _parse_copy(self) -> exp.Copy | exp.Command: 7371 start = self._prev 7372 7373 self._match(TokenType.INTO) 7374 7375 this = ( 7376 self._parse_select(nested=True, parse_subquery_alias=False) 7377 if self._match(TokenType.L_PAREN, advance=False) 7378 else self._parse_table(schema=True) 7379 ) 7380 7381 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7382 7383 files = self._parse_csv(self._parse_file_location) 7384 credentials = self._parse_credentials() 7385 7386 self._match_text_seq("WITH") 7387 7388 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7389 7390 # Fallback case 7391 if self._curr: 7392 return self._parse_as_command(start) 7393 7394 return self.expression( 7395 exp.Copy, 7396 this=this, 7397 kind=kind, 7398 credentials=credentials, 7399 files=files, 7400 params=params, 7401 ) 7402 7403 def _parse_normalize(self) -> exp.Normalize: 7404 return self.expression( 7405 exp.Normalize, 7406 this=self._parse_bitwise(), 7407 form=self._match(TokenType.COMMA) and self._parse_var(), 7408 ) 7409 7410 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7411 if self._match_text_seq("COLUMNS", "(", advance=False): 7412 this = self._parse_function() 7413 if isinstance(this, exp.Columns): 7414 this.set("unpack", True) 7415 return this 7416 7417 return self.expression( 7418 exp.Star, 7419 **{ # type: ignore 7420 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7421 "replace": self._parse_star_op("REPLACE"), 7422 "rename": self._parse_star_op("RENAME"), 7423 }, 7424 ) 7425 7426 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7427 privilege_parts = [] 7428 7429 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7430 # (end of privilege list) or L_PAREN (start of column list) are met 7431 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7432 privilege_parts.append(self._curr.text.upper()) 7433 self._advance() 7434 7435 this = exp.var(" ".join(privilege_parts)) 7436 expressions = ( 7437 self._parse_wrapped_csv(self._parse_column) 7438 if self._match(TokenType.L_PAREN, advance=False) 7439 else None 7440 ) 7441 7442 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7443 7444 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7445 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7446 principal = self._parse_id_var() 7447 7448 if not principal: 7449 return None 7450 7451 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7452 7453 def _parse_grant(self) -> exp.Grant | exp.Command: 7454 start = self._prev 7455 7456 privileges = self._parse_csv(self._parse_grant_privilege) 7457 7458 self._match(TokenType.ON) 7459 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7460 7461 # Attempt to parse the securable e.g. MySQL allows names 7462 # such as "foo.*", "*.*" which are not easily parseable yet 7463 securable = self._try_parse(self._parse_table_parts) 7464 7465 if not securable or not self._match_text_seq("TO"): 7466 return self._parse_as_command(start) 7467 7468 principals = self._parse_csv(self._parse_grant_principal) 7469 7470 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7471 7472 if self._curr: 7473 return self._parse_as_command(start) 7474 7475 return self.expression( 7476 exp.Grant, 7477 privileges=privileges, 7478 kind=kind, 7479 securable=securable, 7480 principals=principals, 7481 grant_option=grant_option, 7482 ) 7483 7484 def _parse_overlay(self) -> exp.Overlay: 7485 return self.expression( 7486 exp.Overlay, 7487 **{ # type: ignore 7488 "this": self._parse_bitwise(), 7489 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7490 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7491 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7492 }, 7493 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.HLLSKETCH, 361 TokenType.HSTORE, 362 TokenType.PSEUDO_TYPE, 363 TokenType.SUPER, 364 TokenType.SERIAL, 365 TokenType.SMALLSERIAL, 366 TokenType.BIGSERIAL, 367 TokenType.XML, 368 TokenType.YEAR, 369 TokenType.UNIQUEIDENTIFIER, 370 TokenType.USERDEFINED, 371 TokenType.MONEY, 372 TokenType.SMALLMONEY, 373 TokenType.ROWVERSION, 374 TokenType.IMAGE, 375 TokenType.VARIANT, 376 TokenType.VECTOR, 377 TokenType.OBJECT, 378 TokenType.OBJECT_IDENTIFIER, 379 TokenType.INET, 380 TokenType.IPADDRESS, 381 TokenType.IPPREFIX, 382 TokenType.IPV4, 383 TokenType.IPV6, 384 TokenType.UNKNOWN, 385 TokenType.NULL, 386 TokenType.NAME, 387 TokenType.TDIGEST, 388 *ENUM_TYPE_TOKENS, 389 *NESTED_TYPE_TOKENS, 390 *AGGREGATE_TYPE_TOKENS, 391 } 392 393 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 394 TokenType.BIGINT: TokenType.UBIGINT, 395 TokenType.INT: TokenType.UINT, 396 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 397 TokenType.SMALLINT: TokenType.USMALLINT, 398 TokenType.TINYINT: TokenType.UTINYINT, 399 TokenType.DECIMAL: TokenType.UDECIMAL, 400 } 401 402 SUBQUERY_PREDICATES = { 403 TokenType.ANY: exp.Any, 404 TokenType.ALL: exp.All, 405 TokenType.EXISTS: exp.Exists, 406 TokenType.SOME: exp.Any, 407 } 408 409 RESERVED_TOKENS = { 410 *Tokenizer.SINGLE_TOKENS.values(), 411 TokenType.SELECT, 412 } - {TokenType.IDENTIFIER} 413 414 DB_CREATABLES = { 415 TokenType.DATABASE, 416 TokenType.DICTIONARY, 417 TokenType.MODEL, 418 TokenType.SCHEMA, 419 TokenType.SEQUENCE, 420 TokenType.STORAGE_INTEGRATION, 421 TokenType.TABLE, 422 TokenType.TAG, 423 TokenType.VIEW, 424 TokenType.WAREHOUSE, 425 TokenType.STREAMLIT, 426 } 427 428 CREATABLES = { 429 TokenType.COLUMN, 430 TokenType.CONSTRAINT, 431 TokenType.FOREIGN_KEY, 432 TokenType.FUNCTION, 433 TokenType.INDEX, 434 TokenType.PROCEDURE, 435 *DB_CREATABLES, 436 } 437 438 ALTERABLES = { 439 TokenType.INDEX, 440 TokenType.TABLE, 441 TokenType.VIEW, 442 } 443 444 # Tokens that can represent identifiers 445 ID_VAR_TOKENS = { 446 TokenType.ALL, 447 TokenType.VAR, 448 TokenType.ANTI, 449 TokenType.APPLY, 450 TokenType.ASC, 451 TokenType.ASOF, 452 TokenType.AUTO_INCREMENT, 453 TokenType.BEGIN, 454 TokenType.BPCHAR, 455 TokenType.CACHE, 456 TokenType.CASE, 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.COMMENT, 460 TokenType.COMMIT, 461 TokenType.CONSTRAINT, 462 TokenType.COPY, 463 TokenType.CUBE, 464 TokenType.DEFAULT, 465 TokenType.DELETE, 466 TokenType.DESC, 467 TokenType.DESCRIBE, 468 TokenType.DICTIONARY, 469 TokenType.DIV, 470 TokenType.END, 471 TokenType.EXECUTE, 472 TokenType.ESCAPE, 473 TokenType.FALSE, 474 TokenType.FIRST, 475 TokenType.FILTER, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.FULL, 479 TokenType.IDENTIFIER, 480 TokenType.IS, 481 TokenType.ISNULL, 482 TokenType.INTERVAL, 483 TokenType.KEEP, 484 TokenType.KILL, 485 TokenType.LEFT, 486 TokenType.LOAD, 487 TokenType.MERGE, 488 TokenType.NATURAL, 489 TokenType.NEXT, 490 TokenType.OFFSET, 491 TokenType.OPERATOR, 492 TokenType.ORDINALITY, 493 TokenType.OVERLAPS, 494 TokenType.OVERWRITE, 495 TokenType.PARTITION, 496 TokenType.PERCENT, 497 TokenType.PIVOT, 498 TokenType.PRAGMA, 499 TokenType.RANGE, 500 TokenType.RECURSIVE, 501 TokenType.REFERENCES, 502 TokenType.REFRESH, 503 TokenType.RENAME, 504 TokenType.REPLACE, 505 TokenType.RIGHT, 506 TokenType.ROLLUP, 507 TokenType.ROW, 508 TokenType.ROWS, 509 TokenType.SEMI, 510 TokenType.SET, 511 TokenType.SETTINGS, 512 TokenType.SHOW, 513 TokenType.TEMPORARY, 514 TokenType.TOP, 515 TokenType.TRUE, 516 TokenType.TRUNCATE, 517 TokenType.UNIQUE, 518 TokenType.UNNEST, 519 TokenType.UNPIVOT, 520 TokenType.UPDATE, 521 TokenType.USE, 522 TokenType.VOLATILE, 523 TokenType.WINDOW, 524 *CREATABLES, 525 *SUBQUERY_PREDICATES, 526 *TYPE_TOKENS, 527 *NO_PAREN_FUNCTIONS, 528 } 529 ID_VAR_TOKENS.remove(TokenType.UNION) 530 531 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 532 533 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 534 TokenType.ANTI, 535 TokenType.APPLY, 536 TokenType.ASOF, 537 TokenType.FULL, 538 TokenType.LEFT, 539 TokenType.LOCK, 540 TokenType.NATURAL, 541 TokenType.OFFSET, 542 TokenType.RIGHT, 543 TokenType.SEMI, 544 TokenType.WINDOW, 545 } 546 547 ALIAS_TOKENS = ID_VAR_TOKENS 548 549 ARRAY_CONSTRUCTORS = { 550 "ARRAY": exp.Array, 551 "LIST": exp.List, 552 } 553 554 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 555 556 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 557 558 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 559 560 FUNC_TOKENS = { 561 TokenType.COLLATE, 562 TokenType.COMMAND, 563 TokenType.CURRENT_DATE, 564 TokenType.CURRENT_DATETIME, 565 TokenType.CURRENT_TIMESTAMP, 566 TokenType.CURRENT_TIME, 567 TokenType.CURRENT_USER, 568 TokenType.FILTER, 569 TokenType.FIRST, 570 TokenType.FORMAT, 571 TokenType.GLOB, 572 TokenType.IDENTIFIER, 573 TokenType.INDEX, 574 TokenType.ISNULL, 575 TokenType.ILIKE, 576 TokenType.INSERT, 577 TokenType.LIKE, 578 TokenType.MERGE, 579 TokenType.OFFSET, 580 TokenType.PRIMARY_KEY, 581 TokenType.RANGE, 582 TokenType.REPLACE, 583 TokenType.RLIKE, 584 TokenType.ROW, 585 TokenType.UNNEST, 586 TokenType.VAR, 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.SEQUENCE, 590 TokenType.DATE, 591 TokenType.DATETIME, 592 TokenType.TABLE, 593 TokenType.TIMESTAMP, 594 TokenType.TIMESTAMPTZ, 595 TokenType.TRUNCATE, 596 TokenType.WINDOW, 597 TokenType.XOR, 598 *TYPE_TOKENS, 599 *SUBQUERY_PREDICATES, 600 } 601 602 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.AND: exp.And, 604 } 605 606 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.COLON_EQ: exp.PropertyEQ, 608 } 609 610 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 611 TokenType.OR: exp.Or, 612 } 613 614 EQUALITY = { 615 TokenType.EQ: exp.EQ, 616 TokenType.NEQ: exp.NEQ, 617 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 618 } 619 620 COMPARISON = { 621 TokenType.GT: exp.GT, 622 TokenType.GTE: exp.GTE, 623 TokenType.LT: exp.LT, 624 TokenType.LTE: exp.LTE, 625 } 626 627 BITWISE = { 628 TokenType.AMP: exp.BitwiseAnd, 629 TokenType.CARET: exp.BitwiseXor, 630 TokenType.PIPE: exp.BitwiseOr, 631 } 632 633 TERM = { 634 TokenType.DASH: exp.Sub, 635 TokenType.PLUS: exp.Add, 636 TokenType.MOD: exp.Mod, 637 TokenType.COLLATE: exp.Collate, 638 } 639 640 FACTOR = { 641 TokenType.DIV: exp.IntDiv, 642 TokenType.LR_ARROW: exp.Distance, 643 TokenType.SLASH: exp.Div, 644 TokenType.STAR: exp.Mul, 645 } 646 647 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 648 649 TIMES = { 650 TokenType.TIME, 651 TokenType.TIMETZ, 652 } 653 654 TIMESTAMPS = { 655 TokenType.TIMESTAMP, 656 TokenType.TIMESTAMPTZ, 657 TokenType.TIMESTAMPLTZ, 658 *TIMES, 659 } 660 661 SET_OPERATIONS = { 662 TokenType.UNION, 663 TokenType.INTERSECT, 664 TokenType.EXCEPT, 665 } 666 667 JOIN_METHODS = { 668 TokenType.ASOF, 669 TokenType.NATURAL, 670 TokenType.POSITIONAL, 671 } 672 673 JOIN_SIDES = { 674 TokenType.LEFT, 675 TokenType.RIGHT, 676 TokenType.FULL, 677 } 678 679 JOIN_KINDS = { 680 TokenType.ANTI, 681 TokenType.CROSS, 682 TokenType.INNER, 683 TokenType.OUTER, 684 TokenType.SEMI, 685 TokenType.STRAIGHT_JOIN, 686 } 687 688 JOIN_HINTS: t.Set[str] = set() 689 690 LAMBDAS = { 691 TokenType.ARROW: lambda self, expressions: self.expression( 692 exp.Lambda, 693 this=self._replace_lambda( 694 self._parse_assignment(), 695 expressions, 696 ), 697 expressions=expressions, 698 ), 699 TokenType.FARROW: lambda self, expressions: self.expression( 700 exp.Kwarg, 701 this=exp.var(expressions[0].name), 702 expression=self._parse_assignment(), 703 ), 704 } 705 706 COLUMN_OPERATORS = { 707 TokenType.DOT: None, 708 TokenType.DCOLON: lambda self, this, to: self.expression( 709 exp.Cast if self.STRICT_CAST else exp.TryCast, 710 this=this, 711 to=to, 712 ), 713 TokenType.ARROW: lambda self, this, path: self.expression( 714 exp.JSONExtract, 715 this=this, 716 expression=self.dialect.to_json_path(path), 717 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 718 ), 719 TokenType.DARROW: lambda self, this, path: self.expression( 720 exp.JSONExtractScalar, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtract, 727 this=this, 728 expression=path, 729 ), 730 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtractScalar, 732 this=this, 733 expression=path, 734 ), 735 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 736 exp.JSONBContains, 737 this=this, 738 expression=key, 739 ), 740 } 741 742 EXPRESSION_PARSERS = { 743 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 744 exp.Column: lambda self: self._parse_column(), 745 exp.Condition: lambda self: self._parse_assignment(), 746 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 747 exp.Expression: lambda self: self._parse_expression(), 748 exp.From: lambda self: self._parse_from(joins=True), 749 exp.Group: lambda self: self._parse_group(), 750 exp.Having: lambda self: self._parse_having(), 751 exp.Identifier: lambda self: self._parse_id_var(), 752 exp.Join: lambda self: self._parse_join(), 753 exp.Lambda: lambda self: self._parse_lambda(), 754 exp.Lateral: lambda self: self._parse_lateral(), 755 exp.Limit: lambda self: self._parse_limit(), 756 exp.Offset: lambda self: self._parse_offset(), 757 exp.Order: lambda self: self._parse_order(), 758 exp.Ordered: lambda self: self._parse_ordered(), 759 exp.Properties: lambda self: self._parse_properties(), 760 exp.Qualify: lambda self: self._parse_qualify(), 761 exp.Returning: lambda self: self._parse_returning(), 762 exp.Select: lambda self: self._parse_select(), 763 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 764 exp.Table: lambda self: self._parse_table_parts(), 765 exp.TableAlias: lambda self: self._parse_table_alias(), 766 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 767 exp.Where: lambda self: self._parse_where(), 768 exp.Window: lambda self: self._parse_named_window(), 769 exp.With: lambda self: self._parse_with(), 770 "JOIN_TYPE": lambda self: self._parse_join_parts(), 771 } 772 773 STATEMENT_PARSERS = { 774 TokenType.ALTER: lambda self: self._parse_alter(), 775 TokenType.BEGIN: lambda self: self._parse_transaction(), 776 TokenType.CACHE: lambda self: self._parse_cache(), 777 TokenType.COMMENT: lambda self: self._parse_comment(), 778 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 779 TokenType.COPY: lambda self: self._parse_copy(), 780 TokenType.CREATE: lambda self: self._parse_create(), 781 TokenType.DELETE: lambda self: self._parse_delete(), 782 TokenType.DESC: lambda self: self._parse_describe(), 783 TokenType.DESCRIBE: lambda self: self._parse_describe(), 784 TokenType.DROP: lambda self: self._parse_drop(), 785 TokenType.GRANT: lambda self: self._parse_grant(), 786 TokenType.INSERT: lambda self: self._parse_insert(), 787 TokenType.KILL: lambda self: self._parse_kill(), 788 TokenType.LOAD: lambda self: self._parse_load(), 789 TokenType.MERGE: lambda self: self._parse_merge(), 790 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 791 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 792 TokenType.REFRESH: lambda self: self._parse_refresh(), 793 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 794 TokenType.SET: lambda self: self._parse_set(), 795 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 796 TokenType.UNCACHE: lambda self: self._parse_uncache(), 797 TokenType.UPDATE: lambda self: self._parse_update(), 798 TokenType.USE: lambda self: self.expression( 799 exp.Use, 800 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 801 this=self._parse_table(schema=False), 802 ), 803 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 804 } 805 806 UNARY_PARSERS = { 807 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 808 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 809 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 810 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 811 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 812 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 813 } 814 815 STRING_PARSERS = { 816 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 817 exp.RawString, this=token.text 818 ), 819 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 820 exp.National, this=token.text 821 ), 822 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 823 TokenType.STRING: lambda self, token: self.expression( 824 exp.Literal, this=token.text, is_string=True 825 ), 826 TokenType.UNICODE_STRING: lambda self, token: self.expression( 827 exp.UnicodeString, 828 this=token.text, 829 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 830 ), 831 } 832 833 NUMERIC_PARSERS = { 834 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 835 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 836 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 837 TokenType.NUMBER: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=False 839 ), 840 } 841 842 PRIMARY_PARSERS = { 843 **STRING_PARSERS, 844 **NUMERIC_PARSERS, 845 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 846 TokenType.NULL: lambda self, _: self.expression(exp.Null), 847 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 848 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 849 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 850 TokenType.STAR: lambda self, _: self._parse_star_ops(), 851 } 852 853 PLACEHOLDER_PARSERS = { 854 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 855 TokenType.PARAMETER: lambda self: self._parse_parameter(), 856 TokenType.COLON: lambda self: ( 857 self.expression(exp.Placeholder, this=self._prev.text) 858 if self._match_set(self.ID_VAR_TOKENS) 859 else None 860 ), 861 } 862 863 RANGE_PARSERS = { 864 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 865 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 866 TokenType.GLOB: binary_range_parser(exp.Glob), 867 TokenType.ILIKE: binary_range_parser(exp.ILike), 868 TokenType.IN: lambda self, this: self._parse_in(this), 869 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 870 TokenType.IS: lambda self, this: self._parse_is(this), 871 TokenType.LIKE: binary_range_parser(exp.Like), 872 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 873 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 874 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 875 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 876 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 877 } 878 879 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 880 "ALLOWED_VALUES": lambda self: self.expression( 881 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 882 ), 883 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 884 "AUTO": lambda self: self._parse_auto_property(), 885 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 886 "BACKUP": lambda self: self.expression( 887 exp.BackupProperty, this=self._parse_var(any_token=True) 888 ), 889 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 890 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 891 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 892 "CHECKSUM": lambda self: self._parse_checksum(), 893 "CLUSTER BY": lambda self: self._parse_cluster(), 894 "CLUSTERED": lambda self: self._parse_clustered_by(), 895 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 896 exp.CollateProperty, **kwargs 897 ), 898 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 899 "CONTAINS": lambda self: self._parse_contains_property(), 900 "COPY": lambda self: self._parse_copy_property(), 901 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 902 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 903 "DEFINER": lambda self: self._parse_definer(), 904 "DETERMINISTIC": lambda self: self.expression( 905 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 906 ), 907 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 908 "DUPLICATE": lambda self: self._parse_duplicate(), 909 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 910 "DISTKEY": lambda self: self._parse_distkey(), 911 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 912 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 913 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 914 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 915 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 916 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 917 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 918 "FREESPACE": lambda self: self._parse_freespace(), 919 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 920 "HEAP": lambda self: self.expression(exp.HeapProperty), 921 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 922 "IMMUTABLE": lambda self: self.expression( 923 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 924 ), 925 "INHERITS": lambda self: self.expression( 926 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 927 ), 928 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 929 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 930 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 931 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 932 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 933 "LIKE": lambda self: self._parse_create_like(), 934 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 935 "LOCK": lambda self: self._parse_locking(), 936 "LOCKING": lambda self: self._parse_locking(), 937 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 938 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 939 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 940 "MODIFIES": lambda self: self._parse_modifies_property(), 941 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 942 "NO": lambda self: self._parse_no_property(), 943 "ON": lambda self: self._parse_on_property(), 944 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 945 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 946 "PARTITION": lambda self: self._parse_partitioned_of(), 947 "PARTITION BY": lambda self: self._parse_partitioned_by(), 948 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 949 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 950 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 951 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 952 "READS": lambda self: self._parse_reads_property(), 953 "REMOTE": lambda self: self._parse_remote_with_connection(), 954 "RETURNS": lambda self: self._parse_returns(), 955 "STRICT": lambda self: self.expression(exp.StrictProperty), 956 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 957 "ROW": lambda self: self._parse_row(), 958 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 959 "SAMPLE": lambda self: self.expression( 960 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 961 ), 962 "SECURE": lambda self: self.expression(exp.SecureProperty), 963 "SECURITY": lambda self: self._parse_security(), 964 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 965 "SETTINGS": lambda self: self._parse_settings_property(), 966 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 967 "SORTKEY": lambda self: self._parse_sortkey(), 968 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 969 "STABLE": lambda self: self.expression( 970 exp.StabilityProperty, this=exp.Literal.string("STABLE") 971 ), 972 "STORED": lambda self: self._parse_stored(), 973 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 974 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 975 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 976 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 977 "TO": lambda self: self._parse_to_table(), 978 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 979 "TRANSFORM": lambda self: self.expression( 980 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 981 ), 982 "TTL": lambda self: self._parse_ttl(), 983 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 985 "VOLATILE": lambda self: self._parse_volatile_property(), 986 "WITH": lambda self: self._parse_with_property(), 987 } 988 989 CONSTRAINT_PARSERS = { 990 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 991 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 992 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 993 "CHARACTER SET": lambda self: self.expression( 994 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 995 ), 996 "CHECK": lambda self: self.expression( 997 exp.CheckColumnConstraint, 998 this=self._parse_wrapped(self._parse_assignment), 999 enforced=self._match_text_seq("ENFORCED"), 1000 ), 1001 "COLLATE": lambda self: self.expression( 1002 exp.CollateColumnConstraint, 1003 this=self._parse_identifier() or self._parse_column(), 1004 ), 1005 "COMMENT": lambda self: self.expression( 1006 exp.CommentColumnConstraint, this=self._parse_string() 1007 ), 1008 "COMPRESS": lambda self: self._parse_compress(), 1009 "CLUSTERED": lambda self: self.expression( 1010 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "NONCLUSTERED": lambda self: self.expression( 1013 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1014 ), 1015 "DEFAULT": lambda self: self.expression( 1016 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1017 ), 1018 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1019 "EPHEMERAL": lambda self: self.expression( 1020 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1021 ), 1022 "EXCLUDE": lambda self: self.expression( 1023 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1024 ), 1025 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1026 "FORMAT": lambda self: self.expression( 1027 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "GENERATED": lambda self: self._parse_generated_as_identity(), 1030 "IDENTITY": lambda self: self._parse_auto_increment(), 1031 "INLINE": lambda self: self._parse_inline(), 1032 "LIKE": lambda self: self._parse_create_like(), 1033 "NOT": lambda self: self._parse_not_constraint(), 1034 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1035 "ON": lambda self: ( 1036 self._match(TokenType.UPDATE) 1037 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1038 ) 1039 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1040 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1041 "PERIOD": lambda self: self._parse_period_for_system_time(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1043 "REFERENCES": lambda self: self._parse_references(match=False), 1044 "TITLE": lambda self: self.expression( 1045 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1046 ), 1047 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1048 "UNIQUE": lambda self: self._parse_unique(), 1049 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1050 "WITH": lambda self: self.expression( 1051 exp.Properties, expressions=self._parse_wrapped_properties() 1052 ), 1053 } 1054 1055 ALTER_PARSERS = { 1056 "ADD": lambda self: self._parse_alter_table_add(), 1057 "ALTER": lambda self: self._parse_alter_table_alter(), 1058 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1059 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1060 "DROP": lambda self: self._parse_alter_table_drop(), 1061 "RENAME": lambda self: self._parse_alter_table_rename(), 1062 "SET": lambda self: self._parse_alter_table_set(), 1063 "AS": lambda self: self._parse_select(), 1064 } 1065 1066 ALTER_ALTER_PARSERS = { 1067 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1068 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1069 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1070 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1071 } 1072 1073 SCHEMA_UNNAMED_CONSTRAINTS = { 1074 "CHECK", 1075 "EXCLUDE", 1076 "FOREIGN KEY", 1077 "LIKE", 1078 "PERIOD", 1079 "PRIMARY KEY", 1080 "UNIQUE", 1081 } 1082 1083 NO_PAREN_FUNCTION_PARSERS = { 1084 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1085 "CASE": lambda self: self._parse_case(), 1086 "CONNECT_BY_ROOT": lambda self: self.expression( 1087 exp.ConnectByRoot, this=self._parse_column() 1088 ), 1089 "IF": lambda self: self._parse_if(), 1090 "NEXT": lambda self: self._parse_next_value_for(), 1091 } 1092 1093 INVALID_FUNC_NAME_TOKENS = { 1094 TokenType.IDENTIFIER, 1095 TokenType.STRING, 1096 } 1097 1098 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1099 1100 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1101 1102 FUNCTION_PARSERS = { 1103 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1104 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1105 "DECODE": lambda self: self._parse_decode(), 1106 "EXTRACT": lambda self: self._parse_extract(), 1107 "GAP_FILL": lambda self: self._parse_gap_fill(), 1108 "JSON_OBJECT": lambda self: self._parse_json_object(), 1109 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1110 "JSON_TABLE": lambda self: self._parse_json_table(), 1111 "MATCH": lambda self: self._parse_match_against(), 1112 "NORMALIZE": lambda self: self._parse_normalize(), 1113 "OPENJSON": lambda self: self._parse_open_json(), 1114 "OVERLAY": lambda self: self._parse_overlay(), 1115 "POSITION": lambda self: self._parse_position(), 1116 "PREDICT": lambda self: self._parse_predict(), 1117 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1118 "STRING_AGG": lambda self: self._parse_string_agg(), 1119 "SUBSTRING": lambda self: self._parse_substring(), 1120 "TRIM": lambda self: self._parse_trim(), 1121 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1122 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1123 } 1124 1125 QUERY_MODIFIER_PARSERS = { 1126 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1127 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1128 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1129 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1130 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1131 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1132 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1133 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1134 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1135 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1136 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1137 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1138 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1139 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1140 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1141 TokenType.CLUSTER_BY: lambda self: ( 1142 "cluster", 1143 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1144 ), 1145 TokenType.DISTRIBUTE_BY: lambda self: ( 1146 "distribute", 1147 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1148 ), 1149 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1150 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1151 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1152 } 1153 1154 SET_PARSERS = { 1155 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1156 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1157 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1158 "TRANSACTION": lambda self: self._parse_set_transaction(), 1159 } 1160 1161 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1162 1163 TYPE_LITERAL_PARSERS = { 1164 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1165 } 1166 1167 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1168 1169 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1170 1171 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1172 1173 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1174 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1175 "ISOLATION": ( 1176 ("LEVEL", "REPEATABLE", "READ"), 1177 ("LEVEL", "READ", "COMMITTED"), 1178 ("LEVEL", "READ", "UNCOMITTED"), 1179 ("LEVEL", "SERIALIZABLE"), 1180 ), 1181 "READ": ("WRITE", "ONLY"), 1182 } 1183 1184 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1185 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1186 ) 1187 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1188 1189 CREATE_SEQUENCE: OPTIONS_TYPE = { 1190 "SCALE": ("EXTEND", "NOEXTEND"), 1191 "SHARD": ("EXTEND", "NOEXTEND"), 1192 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1193 **dict.fromkeys( 1194 ( 1195 "SESSION", 1196 "GLOBAL", 1197 "KEEP", 1198 "NOKEEP", 1199 "ORDER", 1200 "NOORDER", 1201 "NOCACHE", 1202 "CYCLE", 1203 "NOCYCLE", 1204 "NOMINVALUE", 1205 "NOMAXVALUE", 1206 "NOSCALE", 1207 "NOSHARD", 1208 ), 1209 tuple(), 1210 ), 1211 } 1212 1213 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1214 1215 USABLES: OPTIONS_TYPE = dict.fromkeys( 1216 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1217 ) 1218 1219 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1220 1221 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1222 "TYPE": ("EVOLUTION",), 1223 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1224 } 1225 1226 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1227 "NOT": ("ENFORCED",), 1228 "MATCH": ( 1229 "FULL", 1230 "PARTIAL", 1231 "SIMPLE", 1232 ), 1233 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1234 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1235 } 1236 1237 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1238 1239 CLONE_KEYWORDS = {"CLONE", "COPY"} 1240 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1241 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1242 1243 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1244 1245 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1246 1247 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1248 1249 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1250 1251 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1252 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1253 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1254 1255 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1256 1257 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1258 1259 ADD_CONSTRAINT_TOKENS = { 1260 TokenType.CONSTRAINT, 1261 TokenType.FOREIGN_KEY, 1262 TokenType.INDEX, 1263 TokenType.KEY, 1264 TokenType.PRIMARY_KEY, 1265 TokenType.UNIQUE, 1266 } 1267 1268 DISTINCT_TOKENS = {TokenType.DISTINCT} 1269 1270 NULL_TOKENS = {TokenType.NULL} 1271 1272 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1273 1274 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1275 1276 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1277 1278 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1279 1280 ODBC_DATETIME_LITERALS = { 1281 "d": exp.Date, 1282 "t": exp.Time, 1283 "ts": exp.Timestamp, 1284 } 1285 1286 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1287 1288 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1289 1290 STRICT_CAST = True 1291 1292 PREFIXED_PIVOT_COLUMNS = False 1293 IDENTIFY_PIVOT_STRINGS = False 1294 1295 LOG_DEFAULTS_TO_LN = False 1296 1297 # Whether ADD is present for each column added by ALTER TABLE 1298 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1299 1300 # Whether the table sample clause expects CSV syntax 1301 TABLESAMPLE_CSV = False 1302 1303 # The default method used for table sampling 1304 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1305 1306 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1307 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1308 1309 # Whether the TRIM function expects the characters to trim as its first argument 1310 TRIM_PATTERN_FIRST = False 1311 1312 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1313 STRING_ALIASES = False 1314 1315 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1316 MODIFIERS_ATTACHED_TO_SET_OP = True 1317 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1318 1319 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1320 NO_PAREN_IF_COMMANDS = True 1321 1322 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1323 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1324 1325 # Whether the `:` operator is used to extract a value from a VARIANT column 1326 COLON_IS_VARIANT_EXTRACT = False 1327 1328 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1329 # If this is True and '(' is not found, the keyword will be treated as an identifier 1330 VALUES_FOLLOWED_BY_PAREN = True 1331 1332 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1333 SUPPORTS_IMPLICIT_UNNEST = False 1334 1335 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1336 INTERVAL_SPANS = True 1337 1338 # Whether a PARTITION clause can follow a table reference 1339 SUPPORTS_PARTITION_SELECTION = False 1340 1341 __slots__ = ( 1342 "error_level", 1343 "error_message_context", 1344 "max_errors", 1345 "dialect", 1346 "sql", 1347 "errors", 1348 "_tokens", 1349 "_index", 1350 "_curr", 1351 "_next", 1352 "_prev", 1353 "_prev_comments", 1354 ) 1355 1356 # Autofilled 1357 SHOW_TRIE: t.Dict = {} 1358 SET_TRIE: t.Dict = {} 1359 1360 def __init__( 1361 self, 1362 error_level: t.Optional[ErrorLevel] = None, 1363 error_message_context: int = 100, 1364 max_errors: int = 3, 1365 dialect: DialectType = None, 1366 ): 1367 from sqlglot.dialects import Dialect 1368 1369 self.error_level = error_level or ErrorLevel.IMMEDIATE 1370 self.error_message_context = error_message_context 1371 self.max_errors = max_errors 1372 self.dialect = Dialect.get_or_raise(dialect) 1373 self.reset() 1374 1375 def reset(self): 1376 self.sql = "" 1377 self.errors = [] 1378 self._tokens = [] 1379 self._index = 0 1380 self._curr = None 1381 self._next = None 1382 self._prev = None 1383 self._prev_comments = None 1384 1385 def parse( 1386 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1387 ) -> t.List[t.Optional[exp.Expression]]: 1388 """ 1389 Parses a list of tokens and returns a list of syntax trees, one tree 1390 per parsed SQL statement. 1391 1392 Args: 1393 raw_tokens: The list of tokens. 1394 sql: The original SQL string, used to produce helpful debug messages. 1395 1396 Returns: 1397 The list of the produced syntax trees. 1398 """ 1399 return self._parse( 1400 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1401 ) 1402 1403 def parse_into( 1404 self, 1405 expression_types: exp.IntoType, 1406 raw_tokens: t.List[Token], 1407 sql: t.Optional[str] = None, 1408 ) -> t.List[t.Optional[exp.Expression]]: 1409 """ 1410 Parses a list of tokens into a given Expression type. If a collection of Expression 1411 types is given instead, this method will try to parse the token list into each one 1412 of them, stopping at the first for which the parsing succeeds. 1413 1414 Args: 1415 expression_types: The expression type(s) to try and parse the token list into. 1416 raw_tokens: The list of tokens. 1417 sql: The original SQL string, used to produce helpful debug messages. 1418 1419 Returns: 1420 The target Expression. 1421 """ 1422 errors = [] 1423 for expression_type in ensure_list(expression_types): 1424 parser = self.EXPRESSION_PARSERS.get(expression_type) 1425 if not parser: 1426 raise TypeError(f"No parser registered for {expression_type}") 1427 1428 try: 1429 return self._parse(parser, raw_tokens, sql) 1430 except ParseError as e: 1431 e.errors[0]["into_expression"] = expression_type 1432 errors.append(e) 1433 1434 raise ParseError( 1435 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1436 errors=merge_errors(errors), 1437 ) from errors[-1] 1438 1439 def _parse( 1440 self, 1441 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1442 raw_tokens: t.List[Token], 1443 sql: t.Optional[str] = None, 1444 ) -> t.List[t.Optional[exp.Expression]]: 1445 self.reset() 1446 self.sql = sql or "" 1447 1448 total = len(raw_tokens) 1449 chunks: t.List[t.List[Token]] = [[]] 1450 1451 for i, token in enumerate(raw_tokens): 1452 if token.token_type == TokenType.SEMICOLON: 1453 if token.comments: 1454 chunks.append([token]) 1455 1456 if i < total - 1: 1457 chunks.append([]) 1458 else: 1459 chunks[-1].append(token) 1460 1461 expressions = [] 1462 1463 for tokens in chunks: 1464 self._index = -1 1465 self._tokens = tokens 1466 self._advance() 1467 1468 expressions.append(parse_method(self)) 1469 1470 if self._index < len(self._tokens): 1471 self.raise_error("Invalid expression / Unexpected token") 1472 1473 self.check_errors() 1474 1475 return expressions 1476 1477 def check_errors(self) -> None: 1478 """Logs or raises any found errors, depending on the chosen error level setting.""" 1479 if self.error_level == ErrorLevel.WARN: 1480 for error in self.errors: 1481 logger.error(str(error)) 1482 elif self.error_level == ErrorLevel.RAISE and self.errors: 1483 raise ParseError( 1484 concat_messages(self.errors, self.max_errors), 1485 errors=merge_errors(self.errors), 1486 ) 1487 1488 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1489 """ 1490 Appends an error in the list of recorded errors or raises it, depending on the chosen 1491 error level setting. 1492 """ 1493 token = token or self._curr or self._prev or Token.string("") 1494 start = token.start 1495 end = token.end + 1 1496 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1497 highlight = self.sql[start:end] 1498 end_context = self.sql[end : end + self.error_message_context] 1499 1500 error = ParseError.new( 1501 f"{message}. Line {token.line}, Col: {token.col}.\n" 1502 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1503 description=message, 1504 line=token.line, 1505 col=token.col, 1506 start_context=start_context, 1507 highlight=highlight, 1508 end_context=end_context, 1509 ) 1510 1511 if self.error_level == ErrorLevel.IMMEDIATE: 1512 raise error 1513 1514 self.errors.append(error) 1515 1516 def expression( 1517 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1518 ) -> E: 1519 """ 1520 Creates a new, validated Expression. 1521 1522 Args: 1523 exp_class: The expression class to instantiate. 1524 comments: An optional list of comments to attach to the expression. 1525 kwargs: The arguments to set for the expression along with their respective values. 1526 1527 Returns: 1528 The target expression. 1529 """ 1530 instance = exp_class(**kwargs) 1531 instance.add_comments(comments) if comments else self._add_comments(instance) 1532 return self.validate_expression(instance) 1533 1534 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1535 if expression and self._prev_comments: 1536 expression.add_comments(self._prev_comments) 1537 self._prev_comments = None 1538 1539 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1540 """ 1541 Validates an Expression, making sure that all its mandatory arguments are set. 1542 1543 Args: 1544 expression: The expression to validate. 1545 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1546 1547 Returns: 1548 The validated expression. 1549 """ 1550 if self.error_level != ErrorLevel.IGNORE: 1551 for error_message in expression.error_messages(args): 1552 self.raise_error(error_message) 1553 1554 return expression 1555 1556 def _find_sql(self, start: Token, end: Token) -> str: 1557 return self.sql[start.start : end.end + 1] 1558 1559 def _is_connected(self) -> bool: 1560 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1561 1562 def _advance(self, times: int = 1) -> None: 1563 self._index += times 1564 self._curr = seq_get(self._tokens, self._index) 1565 self._next = seq_get(self._tokens, self._index + 1) 1566 1567 if self._index > 0: 1568 self._prev = self._tokens[self._index - 1] 1569 self._prev_comments = self._prev.comments 1570 else: 1571 self._prev = None 1572 self._prev_comments = None 1573 1574 def _retreat(self, index: int) -> None: 1575 if index != self._index: 1576 self._advance(index - self._index) 1577 1578 def _warn_unsupported(self) -> None: 1579 if len(self._tokens) <= 1: 1580 return 1581 1582 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1583 # interested in emitting a warning for the one being currently processed. 1584 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1585 1586 logger.warning( 1587 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1588 ) 1589 1590 def _parse_command(self) -> exp.Command: 1591 self._warn_unsupported() 1592 return self.expression( 1593 exp.Command, 1594 comments=self._prev_comments, 1595 this=self._prev.text.upper(), 1596 expression=self._parse_string(), 1597 ) 1598 1599 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1600 """ 1601 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1602 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1603 solve this by setting & resetting the parser state accordingly 1604 """ 1605 index = self._index 1606 error_level = self.error_level 1607 1608 self.error_level = ErrorLevel.IMMEDIATE 1609 try: 1610 this = parse_method() 1611 except ParseError: 1612 this = None 1613 finally: 1614 if not this or retreat: 1615 self._retreat(index) 1616 self.error_level = error_level 1617 1618 return this 1619 1620 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1621 start = self._prev 1622 exists = self._parse_exists() if allow_exists else None 1623 1624 self._match(TokenType.ON) 1625 1626 materialized = self._match_text_seq("MATERIALIZED") 1627 kind = self._match_set(self.CREATABLES) and self._prev 1628 if not kind: 1629 return self._parse_as_command(start) 1630 1631 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1632 this = self._parse_user_defined_function(kind=kind.token_type) 1633 elif kind.token_type == TokenType.TABLE: 1634 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1635 elif kind.token_type == TokenType.COLUMN: 1636 this = self._parse_column() 1637 else: 1638 this = self._parse_id_var() 1639 1640 self._match(TokenType.IS) 1641 1642 return self.expression( 1643 exp.Comment, 1644 this=this, 1645 kind=kind.text, 1646 expression=self._parse_string(), 1647 exists=exists, 1648 materialized=materialized, 1649 ) 1650 1651 def _parse_to_table( 1652 self, 1653 ) -> exp.ToTableProperty: 1654 table = self._parse_table_parts(schema=True) 1655 return self.expression(exp.ToTableProperty, this=table) 1656 1657 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1658 def _parse_ttl(self) -> exp.Expression: 1659 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1660 this = self._parse_bitwise() 1661 1662 if self._match_text_seq("DELETE"): 1663 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1664 if self._match_text_seq("RECOMPRESS"): 1665 return self.expression( 1666 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1667 ) 1668 if self._match_text_seq("TO", "DISK"): 1669 return self.expression( 1670 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1671 ) 1672 if self._match_text_seq("TO", "VOLUME"): 1673 return self.expression( 1674 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1675 ) 1676 1677 return this 1678 1679 expressions = self._parse_csv(_parse_ttl_action) 1680 where = self._parse_where() 1681 group = self._parse_group() 1682 1683 aggregates = None 1684 if group and self._match(TokenType.SET): 1685 aggregates = self._parse_csv(self._parse_set_item) 1686 1687 return self.expression( 1688 exp.MergeTreeTTL, 1689 expressions=expressions, 1690 where=where, 1691 group=group, 1692 aggregates=aggregates, 1693 ) 1694 1695 def _parse_statement(self) -> t.Optional[exp.Expression]: 1696 if self._curr is None: 1697 return None 1698 1699 if self._match_set(self.STATEMENT_PARSERS): 1700 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1701 1702 if self._match_set(self.dialect.tokenizer.COMMANDS): 1703 return self._parse_command() 1704 1705 expression = self._parse_expression() 1706 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1707 return self._parse_query_modifiers(expression) 1708 1709 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1710 start = self._prev 1711 temporary = self._match(TokenType.TEMPORARY) 1712 materialized = self._match_text_seq("MATERIALIZED") 1713 1714 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1715 if not kind: 1716 return self._parse_as_command(start) 1717 1718 concurrently = self._match_text_seq("CONCURRENTLY") 1719 if_exists = exists or self._parse_exists() 1720 table = self._parse_table_parts( 1721 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1722 ) 1723 1724 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1725 1726 if self._match(TokenType.L_PAREN, advance=False): 1727 expressions = self._parse_wrapped_csv(self._parse_types) 1728 else: 1729 expressions = None 1730 1731 return self.expression( 1732 exp.Drop, 1733 comments=start.comments, 1734 exists=if_exists, 1735 this=table, 1736 expressions=expressions, 1737 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1738 temporary=temporary, 1739 materialized=materialized, 1740 cascade=self._match_text_seq("CASCADE"), 1741 constraints=self._match_text_seq("CONSTRAINTS"), 1742 purge=self._match_text_seq("PURGE"), 1743 cluster=cluster, 1744 concurrently=concurrently, 1745 ) 1746 1747 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1748 return ( 1749 self._match_text_seq("IF") 1750 and (not not_ or self._match(TokenType.NOT)) 1751 and self._match(TokenType.EXISTS) 1752 ) 1753 1754 def _parse_create(self) -> exp.Create | exp.Command: 1755 # Note: this can't be None because we've matched a statement parser 1756 start = self._prev 1757 comments = self._prev_comments 1758 1759 replace = ( 1760 start.token_type == TokenType.REPLACE 1761 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1762 or self._match_pair(TokenType.OR, TokenType.ALTER) 1763 ) 1764 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1765 1766 unique = self._match(TokenType.UNIQUE) 1767 1768 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1769 clustered = True 1770 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1771 "COLUMNSTORE" 1772 ): 1773 clustered = False 1774 else: 1775 clustered = None 1776 1777 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1778 self._advance() 1779 1780 properties = None 1781 create_token = self._match_set(self.CREATABLES) and self._prev 1782 1783 if not create_token: 1784 # exp.Properties.Location.POST_CREATE 1785 properties = self._parse_properties() 1786 create_token = self._match_set(self.CREATABLES) and self._prev 1787 1788 if not properties or not create_token: 1789 return self._parse_as_command(start) 1790 1791 concurrently = self._match_text_seq("CONCURRENTLY") 1792 exists = self._parse_exists(not_=True) 1793 this = None 1794 expression: t.Optional[exp.Expression] = None 1795 indexes = None 1796 no_schema_binding = None 1797 begin = None 1798 end = None 1799 clone = None 1800 1801 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1802 nonlocal properties 1803 if properties and temp_props: 1804 properties.expressions.extend(temp_props.expressions) 1805 elif temp_props: 1806 properties = temp_props 1807 1808 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1809 this = self._parse_user_defined_function(kind=create_token.token_type) 1810 1811 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1812 extend_props(self._parse_properties()) 1813 1814 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1815 extend_props(self._parse_properties()) 1816 1817 if not expression: 1818 if self._match(TokenType.COMMAND): 1819 expression = self._parse_as_command(self._prev) 1820 else: 1821 begin = self._match(TokenType.BEGIN) 1822 return_ = self._match_text_seq("RETURN") 1823 1824 if self._match(TokenType.STRING, advance=False): 1825 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1826 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1827 expression = self._parse_string() 1828 extend_props(self._parse_properties()) 1829 else: 1830 expression = self._parse_statement() 1831 1832 end = self._match_text_seq("END") 1833 1834 if return_: 1835 expression = self.expression(exp.Return, this=expression) 1836 elif create_token.token_type == TokenType.INDEX: 1837 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1838 if not self._match(TokenType.ON): 1839 index = self._parse_id_var() 1840 anonymous = False 1841 else: 1842 index = None 1843 anonymous = True 1844 1845 this = self._parse_index(index=index, anonymous=anonymous) 1846 elif create_token.token_type in self.DB_CREATABLES: 1847 table_parts = self._parse_table_parts( 1848 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1849 ) 1850 1851 # exp.Properties.Location.POST_NAME 1852 self._match(TokenType.COMMA) 1853 extend_props(self._parse_properties(before=True)) 1854 1855 this = self._parse_schema(this=table_parts) 1856 1857 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1858 extend_props(self._parse_properties()) 1859 1860 self._match(TokenType.ALIAS) 1861 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1862 # exp.Properties.Location.POST_ALIAS 1863 extend_props(self._parse_properties()) 1864 1865 if create_token.token_type == TokenType.SEQUENCE: 1866 expression = self._parse_types() 1867 extend_props(self._parse_properties()) 1868 else: 1869 expression = self._parse_ddl_select() 1870 1871 if create_token.token_type == TokenType.TABLE: 1872 # exp.Properties.Location.POST_EXPRESSION 1873 extend_props(self._parse_properties()) 1874 1875 indexes = [] 1876 while True: 1877 index = self._parse_index() 1878 1879 # exp.Properties.Location.POST_INDEX 1880 extend_props(self._parse_properties()) 1881 if not index: 1882 break 1883 else: 1884 self._match(TokenType.COMMA) 1885 indexes.append(index) 1886 elif create_token.token_type == TokenType.VIEW: 1887 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1888 no_schema_binding = True 1889 1890 shallow = self._match_text_seq("SHALLOW") 1891 1892 if self._match_texts(self.CLONE_KEYWORDS): 1893 copy = self._prev.text.lower() == "copy" 1894 clone = self.expression( 1895 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1896 ) 1897 1898 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1899 return self._parse_as_command(start) 1900 1901 create_kind_text = create_token.text.upper() 1902 return self.expression( 1903 exp.Create, 1904 comments=comments, 1905 this=this, 1906 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1907 replace=replace, 1908 refresh=refresh, 1909 unique=unique, 1910 expression=expression, 1911 exists=exists, 1912 properties=properties, 1913 indexes=indexes, 1914 no_schema_binding=no_schema_binding, 1915 begin=begin, 1916 end=end, 1917 clone=clone, 1918 concurrently=concurrently, 1919 clustered=clustered, 1920 ) 1921 1922 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1923 seq = exp.SequenceProperties() 1924 1925 options = [] 1926 index = self._index 1927 1928 while self._curr: 1929 self._match(TokenType.COMMA) 1930 if self._match_text_seq("INCREMENT"): 1931 self._match_text_seq("BY") 1932 self._match_text_seq("=") 1933 seq.set("increment", self._parse_term()) 1934 elif self._match_text_seq("MINVALUE"): 1935 seq.set("minvalue", self._parse_term()) 1936 elif self._match_text_seq("MAXVALUE"): 1937 seq.set("maxvalue", self._parse_term()) 1938 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1939 self._match_text_seq("=") 1940 seq.set("start", self._parse_term()) 1941 elif self._match_text_seq("CACHE"): 1942 # T-SQL allows empty CACHE which is initialized dynamically 1943 seq.set("cache", self._parse_number() or True) 1944 elif self._match_text_seq("OWNED", "BY"): 1945 # "OWNED BY NONE" is the default 1946 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1947 else: 1948 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1949 if opt: 1950 options.append(opt) 1951 else: 1952 break 1953 1954 seq.set("options", options if options else None) 1955 return None if self._index == index else seq 1956 1957 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1958 # only used for teradata currently 1959 self._match(TokenType.COMMA) 1960 1961 kwargs = { 1962 "no": self._match_text_seq("NO"), 1963 "dual": self._match_text_seq("DUAL"), 1964 "before": self._match_text_seq("BEFORE"), 1965 "default": self._match_text_seq("DEFAULT"), 1966 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1967 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1968 "after": self._match_text_seq("AFTER"), 1969 "minimum": self._match_texts(("MIN", "MINIMUM")), 1970 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1971 } 1972 1973 if self._match_texts(self.PROPERTY_PARSERS): 1974 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1975 try: 1976 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1977 except TypeError: 1978 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1979 1980 return None 1981 1982 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1983 return self._parse_wrapped_csv(self._parse_property) 1984 1985 def _parse_property(self) -> t.Optional[exp.Expression]: 1986 if self._match_texts(self.PROPERTY_PARSERS): 1987 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1988 1989 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1990 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1991 1992 if self._match_text_seq("COMPOUND", "SORTKEY"): 1993 return self._parse_sortkey(compound=True) 1994 1995 if self._match_text_seq("SQL", "SECURITY"): 1996 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1997 1998 index = self._index 1999 key = self._parse_column() 2000 2001 if not self._match(TokenType.EQ): 2002 self._retreat(index) 2003 return self._parse_sequence_properties() 2004 2005 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2006 if isinstance(key, exp.Column): 2007 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2008 2009 value = self._parse_bitwise() or self._parse_var(any_token=True) 2010 2011 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2012 if isinstance(value, exp.Column): 2013 value = exp.var(value.name) 2014 2015 return self.expression(exp.Property, this=key, value=value) 2016 2017 def _parse_stored(self) -> exp.FileFormatProperty: 2018 self._match(TokenType.ALIAS) 2019 2020 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2021 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2022 2023 return self.expression( 2024 exp.FileFormatProperty, 2025 this=( 2026 self.expression( 2027 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2028 ) 2029 if input_format or output_format 2030 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2031 ), 2032 ) 2033 2034 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2035 field = self._parse_field() 2036 if isinstance(field, exp.Identifier) and not field.quoted: 2037 field = exp.var(field) 2038 2039 return field 2040 2041 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2042 self._match(TokenType.EQ) 2043 self._match(TokenType.ALIAS) 2044 2045 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2046 2047 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2048 properties = [] 2049 while True: 2050 if before: 2051 prop = self._parse_property_before() 2052 else: 2053 prop = self._parse_property() 2054 if not prop: 2055 break 2056 for p in ensure_list(prop): 2057 properties.append(p) 2058 2059 if properties: 2060 return self.expression(exp.Properties, expressions=properties) 2061 2062 return None 2063 2064 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2065 return self.expression( 2066 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2067 ) 2068 2069 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2070 if self._match_texts(("DEFINER", "INVOKER")): 2071 security_specifier = self._prev.text.upper() 2072 return self.expression(exp.SecurityProperty, this=security_specifier) 2073 return None 2074 2075 def _parse_settings_property(self) -> exp.SettingsProperty: 2076 return self.expression( 2077 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2078 ) 2079 2080 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2081 if self._index >= 2: 2082 pre_volatile_token = self._tokens[self._index - 2] 2083 else: 2084 pre_volatile_token = None 2085 2086 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2087 return exp.VolatileProperty() 2088 2089 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2090 2091 def _parse_retention_period(self) -> exp.Var: 2092 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2093 number = self._parse_number() 2094 number_str = f"{number} " if number else "" 2095 unit = self._parse_var(any_token=True) 2096 return exp.var(f"{number_str}{unit}") 2097 2098 def _parse_system_versioning_property( 2099 self, with_: bool = False 2100 ) -> exp.WithSystemVersioningProperty: 2101 self._match(TokenType.EQ) 2102 prop = self.expression( 2103 exp.WithSystemVersioningProperty, 2104 **{ # type: ignore 2105 "on": True, 2106 "with": with_, 2107 }, 2108 ) 2109 2110 if self._match_text_seq("OFF"): 2111 prop.set("on", False) 2112 return prop 2113 2114 self._match(TokenType.ON) 2115 if self._match(TokenType.L_PAREN): 2116 while self._curr and not self._match(TokenType.R_PAREN): 2117 if self._match_text_seq("HISTORY_TABLE", "="): 2118 prop.set("this", self._parse_table_parts()) 2119 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2120 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2121 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2122 prop.set("retention_period", self._parse_retention_period()) 2123 2124 self._match(TokenType.COMMA) 2125 2126 return prop 2127 2128 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2129 self._match(TokenType.EQ) 2130 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2131 prop = self.expression(exp.DataDeletionProperty, on=on) 2132 2133 if self._match(TokenType.L_PAREN): 2134 while self._curr and not self._match(TokenType.R_PAREN): 2135 if self._match_text_seq("FILTER_COLUMN", "="): 2136 prop.set("filter_column", self._parse_column()) 2137 elif self._match_text_seq("RETENTION_PERIOD", "="): 2138 prop.set("retention_period", self._parse_retention_period()) 2139 2140 self._match(TokenType.COMMA) 2141 2142 return prop 2143 2144 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2145 kind = "HASH" 2146 expressions: t.Optional[t.List[exp.Expression]] = None 2147 if self._match_text_seq("BY", "HASH"): 2148 expressions = self._parse_wrapped_csv(self._parse_id_var) 2149 elif self._match_text_seq("BY", "RANDOM"): 2150 kind = "RANDOM" 2151 2152 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2153 buckets: t.Optional[exp.Expression] = None 2154 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2155 buckets = self._parse_number() 2156 2157 return self.expression( 2158 exp.DistributedByProperty, 2159 expressions=expressions, 2160 kind=kind, 2161 buckets=buckets, 2162 order=self._parse_order(), 2163 ) 2164 2165 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2166 self._match_text_seq("KEY") 2167 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2168 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2169 2170 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2171 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2172 prop = self._parse_system_versioning_property(with_=True) 2173 self._match_r_paren() 2174 return prop 2175 2176 if self._match(TokenType.L_PAREN, advance=False): 2177 return self._parse_wrapped_properties() 2178 2179 if self._match_text_seq("JOURNAL"): 2180 return self._parse_withjournaltable() 2181 2182 if self._match_texts(self.VIEW_ATTRIBUTES): 2183 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2184 2185 if self._match_text_seq("DATA"): 2186 return self._parse_withdata(no=False) 2187 elif self._match_text_seq("NO", "DATA"): 2188 return self._parse_withdata(no=True) 2189 2190 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2191 return self._parse_serde_properties(with_=True) 2192 2193 if self._match(TokenType.SCHEMA): 2194 return self.expression( 2195 exp.WithSchemaBindingProperty, 2196 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2197 ) 2198 2199 if not self._next: 2200 return None 2201 2202 return self._parse_withisolatedloading() 2203 2204 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2205 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2206 self._match(TokenType.EQ) 2207 2208 user = self._parse_id_var() 2209 self._match(TokenType.PARAMETER) 2210 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2211 2212 if not user or not host: 2213 return None 2214 2215 return exp.DefinerProperty(this=f"{user}@{host}") 2216 2217 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2218 self._match(TokenType.TABLE) 2219 self._match(TokenType.EQ) 2220 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2221 2222 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2223 return self.expression(exp.LogProperty, no=no) 2224 2225 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2226 return self.expression(exp.JournalProperty, **kwargs) 2227 2228 def _parse_checksum(self) -> exp.ChecksumProperty: 2229 self._match(TokenType.EQ) 2230 2231 on = None 2232 if self._match(TokenType.ON): 2233 on = True 2234 elif self._match_text_seq("OFF"): 2235 on = False 2236 2237 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2238 2239 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2240 return self.expression( 2241 exp.Cluster, 2242 expressions=( 2243 self._parse_wrapped_csv(self._parse_ordered) 2244 if wrapped 2245 else self._parse_csv(self._parse_ordered) 2246 ), 2247 ) 2248 2249 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2250 self._match_text_seq("BY") 2251 2252 self._match_l_paren() 2253 expressions = self._parse_csv(self._parse_column) 2254 self._match_r_paren() 2255 2256 if self._match_text_seq("SORTED", "BY"): 2257 self._match_l_paren() 2258 sorted_by = self._parse_csv(self._parse_ordered) 2259 self._match_r_paren() 2260 else: 2261 sorted_by = None 2262 2263 self._match(TokenType.INTO) 2264 buckets = self._parse_number() 2265 self._match_text_seq("BUCKETS") 2266 2267 return self.expression( 2268 exp.ClusteredByProperty, 2269 expressions=expressions, 2270 sorted_by=sorted_by, 2271 buckets=buckets, 2272 ) 2273 2274 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2275 if not self._match_text_seq("GRANTS"): 2276 self._retreat(self._index - 1) 2277 return None 2278 2279 return self.expression(exp.CopyGrantsProperty) 2280 2281 def _parse_freespace(self) -> exp.FreespaceProperty: 2282 self._match(TokenType.EQ) 2283 return self.expression( 2284 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2285 ) 2286 2287 def _parse_mergeblockratio( 2288 self, no: bool = False, default: bool = False 2289 ) -> exp.MergeBlockRatioProperty: 2290 if self._match(TokenType.EQ): 2291 return self.expression( 2292 exp.MergeBlockRatioProperty, 2293 this=self._parse_number(), 2294 percent=self._match(TokenType.PERCENT), 2295 ) 2296 2297 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2298 2299 def _parse_datablocksize( 2300 self, 2301 default: t.Optional[bool] = None, 2302 minimum: t.Optional[bool] = None, 2303 maximum: t.Optional[bool] = None, 2304 ) -> exp.DataBlocksizeProperty: 2305 self._match(TokenType.EQ) 2306 size = self._parse_number() 2307 2308 units = None 2309 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2310 units = self._prev.text 2311 2312 return self.expression( 2313 exp.DataBlocksizeProperty, 2314 size=size, 2315 units=units, 2316 default=default, 2317 minimum=minimum, 2318 maximum=maximum, 2319 ) 2320 2321 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2322 self._match(TokenType.EQ) 2323 always = self._match_text_seq("ALWAYS") 2324 manual = self._match_text_seq("MANUAL") 2325 never = self._match_text_seq("NEVER") 2326 default = self._match_text_seq("DEFAULT") 2327 2328 autotemp = None 2329 if self._match_text_seq("AUTOTEMP"): 2330 autotemp = self._parse_schema() 2331 2332 return self.expression( 2333 exp.BlockCompressionProperty, 2334 always=always, 2335 manual=manual, 2336 never=never, 2337 default=default, 2338 autotemp=autotemp, 2339 ) 2340 2341 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2342 index = self._index 2343 no = self._match_text_seq("NO") 2344 concurrent = self._match_text_seq("CONCURRENT") 2345 2346 if not self._match_text_seq("ISOLATED", "LOADING"): 2347 self._retreat(index) 2348 return None 2349 2350 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2351 return self.expression( 2352 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2353 ) 2354 2355 def _parse_locking(self) -> exp.LockingProperty: 2356 if self._match(TokenType.TABLE): 2357 kind = "TABLE" 2358 elif self._match(TokenType.VIEW): 2359 kind = "VIEW" 2360 elif self._match(TokenType.ROW): 2361 kind = "ROW" 2362 elif self._match_text_seq("DATABASE"): 2363 kind = "DATABASE" 2364 else: 2365 kind = None 2366 2367 if kind in ("DATABASE", "TABLE", "VIEW"): 2368 this = self._parse_table_parts() 2369 else: 2370 this = None 2371 2372 if self._match(TokenType.FOR): 2373 for_or_in = "FOR" 2374 elif self._match(TokenType.IN): 2375 for_or_in = "IN" 2376 else: 2377 for_or_in = None 2378 2379 if self._match_text_seq("ACCESS"): 2380 lock_type = "ACCESS" 2381 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2382 lock_type = "EXCLUSIVE" 2383 elif self._match_text_seq("SHARE"): 2384 lock_type = "SHARE" 2385 elif self._match_text_seq("READ"): 2386 lock_type = "READ" 2387 elif self._match_text_seq("WRITE"): 2388 lock_type = "WRITE" 2389 elif self._match_text_seq("CHECKSUM"): 2390 lock_type = "CHECKSUM" 2391 else: 2392 lock_type = None 2393 2394 override = self._match_text_seq("OVERRIDE") 2395 2396 return self.expression( 2397 exp.LockingProperty, 2398 this=this, 2399 kind=kind, 2400 for_or_in=for_or_in, 2401 lock_type=lock_type, 2402 override=override, 2403 ) 2404 2405 def _parse_partition_by(self) -> t.List[exp.Expression]: 2406 if self._match(TokenType.PARTITION_BY): 2407 return self._parse_csv(self._parse_assignment) 2408 return [] 2409 2410 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2411 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2412 if self._match_text_seq("MINVALUE"): 2413 return exp.var("MINVALUE") 2414 if self._match_text_seq("MAXVALUE"): 2415 return exp.var("MAXVALUE") 2416 return self._parse_bitwise() 2417 2418 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2419 expression = None 2420 from_expressions = None 2421 to_expressions = None 2422 2423 if self._match(TokenType.IN): 2424 this = self._parse_wrapped_csv(self._parse_bitwise) 2425 elif self._match(TokenType.FROM): 2426 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2427 self._match_text_seq("TO") 2428 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2429 elif self._match_text_seq("WITH", "(", "MODULUS"): 2430 this = self._parse_number() 2431 self._match_text_seq(",", "REMAINDER") 2432 expression = self._parse_number() 2433 self._match_r_paren() 2434 else: 2435 self.raise_error("Failed to parse partition bound spec.") 2436 2437 return self.expression( 2438 exp.PartitionBoundSpec, 2439 this=this, 2440 expression=expression, 2441 from_expressions=from_expressions, 2442 to_expressions=to_expressions, 2443 ) 2444 2445 # https://www.postgresql.org/docs/current/sql-createtable.html 2446 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2447 if not self._match_text_seq("OF"): 2448 self._retreat(self._index - 1) 2449 return None 2450 2451 this = self._parse_table(schema=True) 2452 2453 if self._match(TokenType.DEFAULT): 2454 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2455 elif self._match_text_seq("FOR", "VALUES"): 2456 expression = self._parse_partition_bound_spec() 2457 else: 2458 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2459 2460 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2461 2462 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2463 self._match(TokenType.EQ) 2464 return self.expression( 2465 exp.PartitionedByProperty, 2466 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2467 ) 2468 2469 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2470 if self._match_text_seq("AND", "STATISTICS"): 2471 statistics = True 2472 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2473 statistics = False 2474 else: 2475 statistics = None 2476 2477 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2478 2479 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2480 if self._match_text_seq("SQL"): 2481 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2482 return None 2483 2484 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2485 if self._match_text_seq("SQL", "DATA"): 2486 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2487 return None 2488 2489 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2490 if self._match_text_seq("PRIMARY", "INDEX"): 2491 return exp.NoPrimaryIndexProperty() 2492 if self._match_text_seq("SQL"): 2493 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2494 return None 2495 2496 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2497 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2498 return exp.OnCommitProperty() 2499 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2500 return exp.OnCommitProperty(delete=True) 2501 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2502 2503 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2504 if self._match_text_seq("SQL", "DATA"): 2505 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2506 return None 2507 2508 def _parse_distkey(self) -> exp.DistKeyProperty: 2509 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2510 2511 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2512 table = self._parse_table(schema=True) 2513 2514 options = [] 2515 while self._match_texts(("INCLUDING", "EXCLUDING")): 2516 this = self._prev.text.upper() 2517 2518 id_var = self._parse_id_var() 2519 if not id_var: 2520 return None 2521 2522 options.append( 2523 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2524 ) 2525 2526 return self.expression(exp.LikeProperty, this=table, expressions=options) 2527 2528 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2529 return self.expression( 2530 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2531 ) 2532 2533 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2534 self._match(TokenType.EQ) 2535 return self.expression( 2536 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2537 ) 2538 2539 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2540 self._match_text_seq("WITH", "CONNECTION") 2541 return self.expression( 2542 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2543 ) 2544 2545 def _parse_returns(self) -> exp.ReturnsProperty: 2546 value: t.Optional[exp.Expression] 2547 null = None 2548 is_table = self._match(TokenType.TABLE) 2549 2550 if is_table: 2551 if self._match(TokenType.LT): 2552 value = self.expression( 2553 exp.Schema, 2554 this="TABLE", 2555 expressions=self._parse_csv(self._parse_struct_types), 2556 ) 2557 if not self._match(TokenType.GT): 2558 self.raise_error("Expecting >") 2559 else: 2560 value = self._parse_schema(exp.var("TABLE")) 2561 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2562 null = True 2563 value = None 2564 else: 2565 value = self._parse_types() 2566 2567 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2568 2569 def _parse_describe(self) -> exp.Describe: 2570 kind = self._match_set(self.CREATABLES) and self._prev.text 2571 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2572 if self._match(TokenType.DOT): 2573 style = None 2574 self._retreat(self._index - 2) 2575 this = self._parse_table(schema=True) 2576 properties = self._parse_properties() 2577 expressions = properties.expressions if properties else None 2578 partition = self._parse_partition() 2579 return self.expression( 2580 exp.Describe, 2581 this=this, 2582 style=style, 2583 kind=kind, 2584 expressions=expressions, 2585 partition=partition, 2586 ) 2587 2588 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2589 kind = self._prev.text.upper() 2590 expressions = [] 2591 2592 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2593 if self._match(TokenType.WHEN): 2594 expression = self._parse_disjunction() 2595 self._match(TokenType.THEN) 2596 else: 2597 expression = None 2598 2599 else_ = self._match(TokenType.ELSE) 2600 2601 if not self._match(TokenType.INTO): 2602 return None 2603 2604 return self.expression( 2605 exp.ConditionalInsert, 2606 this=self.expression( 2607 exp.Insert, 2608 this=self._parse_table(schema=True), 2609 expression=self._parse_derived_table_values(), 2610 ), 2611 expression=expression, 2612 else_=else_, 2613 ) 2614 2615 expression = parse_conditional_insert() 2616 while expression is not None: 2617 expressions.append(expression) 2618 expression = parse_conditional_insert() 2619 2620 return self.expression( 2621 exp.MultitableInserts, 2622 kind=kind, 2623 comments=comments, 2624 expressions=expressions, 2625 source=self._parse_table(), 2626 ) 2627 2628 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2629 comments = ensure_list(self._prev_comments) 2630 hint = self._parse_hint() 2631 overwrite = self._match(TokenType.OVERWRITE) 2632 ignore = self._match(TokenType.IGNORE) 2633 local = self._match_text_seq("LOCAL") 2634 alternative = None 2635 is_function = None 2636 2637 if self._match_text_seq("DIRECTORY"): 2638 this: t.Optional[exp.Expression] = self.expression( 2639 exp.Directory, 2640 this=self._parse_var_or_string(), 2641 local=local, 2642 row_format=self._parse_row_format(match_row=True), 2643 ) 2644 else: 2645 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2646 comments += ensure_list(self._prev_comments) 2647 return self._parse_multitable_inserts(comments) 2648 2649 if self._match(TokenType.OR): 2650 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2651 2652 self._match(TokenType.INTO) 2653 comments += ensure_list(self._prev_comments) 2654 self._match(TokenType.TABLE) 2655 is_function = self._match(TokenType.FUNCTION) 2656 2657 this = ( 2658 self._parse_table(schema=True, parse_partition=True) 2659 if not is_function 2660 else self._parse_function() 2661 ) 2662 2663 returning = self._parse_returning() 2664 2665 return self.expression( 2666 exp.Insert, 2667 comments=comments, 2668 hint=hint, 2669 is_function=is_function, 2670 this=this, 2671 stored=self._match_text_seq("STORED") and self._parse_stored(), 2672 by_name=self._match_text_seq("BY", "NAME"), 2673 exists=self._parse_exists(), 2674 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2675 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2676 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2677 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2678 conflict=self._parse_on_conflict(), 2679 returning=returning or self._parse_returning(), 2680 overwrite=overwrite, 2681 alternative=alternative, 2682 ignore=ignore, 2683 source=self._match(TokenType.TABLE) and self._parse_table(), 2684 ) 2685 2686 def _parse_kill(self) -> exp.Kill: 2687 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2688 2689 return self.expression( 2690 exp.Kill, 2691 this=self._parse_primary(), 2692 kind=kind, 2693 ) 2694 2695 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2696 conflict = self._match_text_seq("ON", "CONFLICT") 2697 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2698 2699 if not conflict and not duplicate: 2700 return None 2701 2702 conflict_keys = None 2703 constraint = None 2704 2705 if conflict: 2706 if self._match_text_seq("ON", "CONSTRAINT"): 2707 constraint = self._parse_id_var() 2708 elif self._match(TokenType.L_PAREN): 2709 conflict_keys = self._parse_csv(self._parse_id_var) 2710 self._match_r_paren() 2711 2712 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2713 if self._prev.token_type == TokenType.UPDATE: 2714 self._match(TokenType.SET) 2715 expressions = self._parse_csv(self._parse_equality) 2716 else: 2717 expressions = None 2718 2719 return self.expression( 2720 exp.OnConflict, 2721 duplicate=duplicate, 2722 expressions=expressions, 2723 action=action, 2724 conflict_keys=conflict_keys, 2725 constraint=constraint, 2726 ) 2727 2728 def _parse_returning(self) -> t.Optional[exp.Returning]: 2729 if not self._match(TokenType.RETURNING): 2730 return None 2731 return self.expression( 2732 exp.Returning, 2733 expressions=self._parse_csv(self._parse_expression), 2734 into=self._match(TokenType.INTO) and self._parse_table_part(), 2735 ) 2736 2737 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2738 if not self._match(TokenType.FORMAT): 2739 return None 2740 return self._parse_row_format() 2741 2742 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2743 index = self._index 2744 with_ = with_ or self._match_text_seq("WITH") 2745 2746 if not self._match(TokenType.SERDE_PROPERTIES): 2747 self._retreat(index) 2748 return None 2749 return self.expression( 2750 exp.SerdeProperties, 2751 **{ # type: ignore 2752 "expressions": self._parse_wrapped_properties(), 2753 "with": with_, 2754 }, 2755 ) 2756 2757 def _parse_row_format( 2758 self, match_row: bool = False 2759 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2760 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2761 return None 2762 2763 if self._match_text_seq("SERDE"): 2764 this = self._parse_string() 2765 2766 serde_properties = self._parse_serde_properties() 2767 2768 return self.expression( 2769 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2770 ) 2771 2772 self._match_text_seq("DELIMITED") 2773 2774 kwargs = {} 2775 2776 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2777 kwargs["fields"] = self._parse_string() 2778 if self._match_text_seq("ESCAPED", "BY"): 2779 kwargs["escaped"] = self._parse_string() 2780 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2781 kwargs["collection_items"] = self._parse_string() 2782 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2783 kwargs["map_keys"] = self._parse_string() 2784 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2785 kwargs["lines"] = self._parse_string() 2786 if self._match_text_seq("NULL", "DEFINED", "AS"): 2787 kwargs["null"] = self._parse_string() 2788 2789 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2790 2791 def _parse_load(self) -> exp.LoadData | exp.Command: 2792 if self._match_text_seq("DATA"): 2793 local = self._match_text_seq("LOCAL") 2794 self._match_text_seq("INPATH") 2795 inpath = self._parse_string() 2796 overwrite = self._match(TokenType.OVERWRITE) 2797 self._match_pair(TokenType.INTO, TokenType.TABLE) 2798 2799 return self.expression( 2800 exp.LoadData, 2801 this=self._parse_table(schema=True), 2802 local=local, 2803 overwrite=overwrite, 2804 inpath=inpath, 2805 partition=self._parse_partition(), 2806 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2807 serde=self._match_text_seq("SERDE") and self._parse_string(), 2808 ) 2809 return self._parse_as_command(self._prev) 2810 2811 def _parse_delete(self) -> exp.Delete: 2812 # This handles MySQL's "Multiple-Table Syntax" 2813 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2814 tables = None 2815 comments = self._prev_comments 2816 if not self._match(TokenType.FROM, advance=False): 2817 tables = self._parse_csv(self._parse_table) or None 2818 2819 returning = self._parse_returning() 2820 2821 return self.expression( 2822 exp.Delete, 2823 comments=comments, 2824 tables=tables, 2825 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2826 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2827 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2828 where=self._parse_where(), 2829 returning=returning or self._parse_returning(), 2830 limit=self._parse_limit(), 2831 ) 2832 2833 def _parse_update(self) -> exp.Update: 2834 comments = self._prev_comments 2835 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2836 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2837 returning = self._parse_returning() 2838 return self.expression( 2839 exp.Update, 2840 comments=comments, 2841 **{ # type: ignore 2842 "this": this, 2843 "expressions": expressions, 2844 "from": self._parse_from(joins=True), 2845 "where": self._parse_where(), 2846 "returning": returning or self._parse_returning(), 2847 "order": self._parse_order(), 2848 "limit": self._parse_limit(), 2849 }, 2850 ) 2851 2852 def _parse_uncache(self) -> exp.Uncache: 2853 if not self._match(TokenType.TABLE): 2854 self.raise_error("Expecting TABLE after UNCACHE") 2855 2856 return self.expression( 2857 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2858 ) 2859 2860 def _parse_cache(self) -> exp.Cache: 2861 lazy = self._match_text_seq("LAZY") 2862 self._match(TokenType.TABLE) 2863 table = self._parse_table(schema=True) 2864 2865 options = [] 2866 if self._match_text_seq("OPTIONS"): 2867 self._match_l_paren() 2868 k = self._parse_string() 2869 self._match(TokenType.EQ) 2870 v = self._parse_string() 2871 options = [k, v] 2872 self._match_r_paren() 2873 2874 self._match(TokenType.ALIAS) 2875 return self.expression( 2876 exp.Cache, 2877 this=table, 2878 lazy=lazy, 2879 options=options, 2880 expression=self._parse_select(nested=True), 2881 ) 2882 2883 def _parse_partition(self) -> t.Optional[exp.Partition]: 2884 if not self._match(TokenType.PARTITION): 2885 return None 2886 2887 return self.expression( 2888 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2889 ) 2890 2891 def _parse_value(self) -> t.Optional[exp.Tuple]: 2892 if self._match(TokenType.L_PAREN): 2893 expressions = self._parse_csv(self._parse_expression) 2894 self._match_r_paren() 2895 return self.expression(exp.Tuple, expressions=expressions) 2896 2897 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2898 expression = self._parse_expression() 2899 if expression: 2900 return self.expression(exp.Tuple, expressions=[expression]) 2901 return None 2902 2903 def _parse_projections(self) -> t.List[exp.Expression]: 2904 return self._parse_expressions() 2905 2906 def _parse_select( 2907 self, 2908 nested: bool = False, 2909 table: bool = False, 2910 parse_subquery_alias: bool = True, 2911 parse_set_operation: bool = True, 2912 ) -> t.Optional[exp.Expression]: 2913 cte = self._parse_with() 2914 2915 if cte: 2916 this = self._parse_statement() 2917 2918 if not this: 2919 self.raise_error("Failed to parse any statement following CTE") 2920 return cte 2921 2922 if "with" in this.arg_types: 2923 this.set("with", cte) 2924 else: 2925 self.raise_error(f"{this.key} does not support CTE") 2926 this = cte 2927 2928 return this 2929 2930 # duckdb supports leading with FROM x 2931 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2932 2933 if self._match(TokenType.SELECT): 2934 comments = self._prev_comments 2935 2936 hint = self._parse_hint() 2937 2938 if self._next and not self._next.token_type == TokenType.DOT: 2939 all_ = self._match(TokenType.ALL) 2940 distinct = self._match_set(self.DISTINCT_TOKENS) 2941 else: 2942 all_, distinct = None, None 2943 2944 kind = ( 2945 self._match(TokenType.ALIAS) 2946 and self._match_texts(("STRUCT", "VALUE")) 2947 and self._prev.text.upper() 2948 ) 2949 2950 if distinct: 2951 distinct = self.expression( 2952 exp.Distinct, 2953 on=self._parse_value() if self._match(TokenType.ON) else None, 2954 ) 2955 2956 if all_ and distinct: 2957 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2958 2959 limit = self._parse_limit(top=True) 2960 projections = self._parse_projections() 2961 2962 this = self.expression( 2963 exp.Select, 2964 kind=kind, 2965 hint=hint, 2966 distinct=distinct, 2967 expressions=projections, 2968 limit=limit, 2969 ) 2970 this.comments = comments 2971 2972 into = self._parse_into() 2973 if into: 2974 this.set("into", into) 2975 2976 if not from_: 2977 from_ = self._parse_from() 2978 2979 if from_: 2980 this.set("from", from_) 2981 2982 this = self._parse_query_modifiers(this) 2983 elif (table or nested) and self._match(TokenType.L_PAREN): 2984 if self._match(TokenType.PIVOT): 2985 this = self._parse_simplified_pivot() 2986 elif self._match(TokenType.FROM): 2987 this = exp.select("*").from_( 2988 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2989 ) 2990 else: 2991 this = ( 2992 self._parse_table() 2993 if table 2994 else self._parse_select(nested=True, parse_set_operation=False) 2995 ) 2996 2997 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2998 # in case a modifier (e.g. join) is following 2999 if table and isinstance(this, exp.Values) and this.alias: 3000 alias = this.args["alias"].pop() 3001 this = exp.Table(this=this, alias=alias) 3002 3003 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3004 3005 self._match_r_paren() 3006 3007 # We return early here so that the UNION isn't attached to the subquery by the 3008 # following call to _parse_set_operations, but instead becomes the parent node 3009 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3010 elif self._match(TokenType.VALUES, advance=False): 3011 this = self._parse_derived_table_values() 3012 elif from_: 3013 this = exp.select("*").from_(from_.this, copy=False) 3014 elif self._match(TokenType.SUMMARIZE): 3015 table = self._match(TokenType.TABLE) 3016 this = self._parse_select() or self._parse_string() or self._parse_table() 3017 return self.expression(exp.Summarize, this=this, table=table) 3018 elif self._match(TokenType.DESCRIBE): 3019 this = self._parse_describe() 3020 elif self._match_text_seq("STREAM"): 3021 this = self.expression(exp.Stream, this=self._parse_function()) 3022 else: 3023 this = None 3024 3025 return self._parse_set_operations(this) if parse_set_operation else this 3026 3027 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3028 if not skip_with_token and not self._match(TokenType.WITH): 3029 return None 3030 3031 comments = self._prev_comments 3032 recursive = self._match(TokenType.RECURSIVE) 3033 3034 expressions = [] 3035 while True: 3036 expressions.append(self._parse_cte()) 3037 3038 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3039 break 3040 else: 3041 self._match(TokenType.WITH) 3042 3043 return self.expression( 3044 exp.With, comments=comments, expressions=expressions, recursive=recursive 3045 ) 3046 3047 def _parse_cte(self) -> exp.CTE: 3048 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3049 if not alias or not alias.this: 3050 self.raise_error("Expected CTE to have alias") 3051 3052 self._match(TokenType.ALIAS) 3053 comments = self._prev_comments 3054 3055 if self._match_text_seq("NOT", "MATERIALIZED"): 3056 materialized = False 3057 elif self._match_text_seq("MATERIALIZED"): 3058 materialized = True 3059 else: 3060 materialized = None 3061 3062 return self.expression( 3063 exp.CTE, 3064 this=self._parse_wrapped(self._parse_statement), 3065 alias=alias, 3066 materialized=materialized, 3067 comments=comments, 3068 ) 3069 3070 def _parse_table_alias( 3071 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3072 ) -> t.Optional[exp.TableAlias]: 3073 any_token = self._match(TokenType.ALIAS) 3074 alias = ( 3075 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3076 or self._parse_string_as_identifier() 3077 ) 3078 3079 index = self._index 3080 if self._match(TokenType.L_PAREN): 3081 columns = self._parse_csv(self._parse_function_parameter) 3082 self._match_r_paren() if columns else self._retreat(index) 3083 else: 3084 columns = None 3085 3086 if not alias and not columns: 3087 return None 3088 3089 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3090 3091 # We bubble up comments from the Identifier to the TableAlias 3092 if isinstance(alias, exp.Identifier): 3093 table_alias.add_comments(alias.pop_comments()) 3094 3095 return table_alias 3096 3097 def _parse_subquery( 3098 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3099 ) -> t.Optional[exp.Subquery]: 3100 if not this: 3101 return None 3102 3103 return self.expression( 3104 exp.Subquery, 3105 this=this, 3106 pivots=self._parse_pivots(), 3107 alias=self._parse_table_alias() if parse_alias else None, 3108 sample=self._parse_table_sample(), 3109 ) 3110 3111 def _implicit_unnests_to_explicit(self, this: E) -> E: 3112 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3113 3114 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3115 for i, join in enumerate(this.args.get("joins") or []): 3116 table = join.this 3117 normalized_table = table.copy() 3118 normalized_table.meta["maybe_column"] = True 3119 normalized_table = _norm(normalized_table, dialect=self.dialect) 3120 3121 if isinstance(table, exp.Table) and not join.args.get("on"): 3122 if normalized_table.parts[0].name in refs: 3123 table_as_column = table.to_column() 3124 unnest = exp.Unnest(expressions=[table_as_column]) 3125 3126 # Table.to_column creates a parent Alias node that we want to convert to 3127 # a TableAlias and attach to the Unnest, so it matches the parser's output 3128 if isinstance(table.args.get("alias"), exp.TableAlias): 3129 table_as_column.replace(table_as_column.this) 3130 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3131 3132 table.replace(unnest) 3133 3134 refs.add(normalized_table.alias_or_name) 3135 3136 return this 3137 3138 def _parse_query_modifiers( 3139 self, this: t.Optional[exp.Expression] 3140 ) -> t.Optional[exp.Expression]: 3141 if isinstance(this, (exp.Query, exp.Table)): 3142 for join in self._parse_joins(): 3143 this.append("joins", join) 3144 for lateral in iter(self._parse_lateral, None): 3145 this.append("laterals", lateral) 3146 3147 while True: 3148 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3149 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3150 key, expression = parser(self) 3151 3152 if expression: 3153 this.set(key, expression) 3154 if key == "limit": 3155 offset = expression.args.pop("offset", None) 3156 3157 if offset: 3158 offset = exp.Offset(expression=offset) 3159 this.set("offset", offset) 3160 3161 limit_by_expressions = expression.expressions 3162 expression.set("expressions", None) 3163 offset.set("expressions", limit_by_expressions) 3164 continue 3165 break 3166 3167 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3168 this = self._implicit_unnests_to_explicit(this) 3169 3170 return this 3171 3172 def _parse_hint(self) -> t.Optional[exp.Hint]: 3173 if self._match(TokenType.HINT): 3174 hints = [] 3175 for hint in iter( 3176 lambda: self._parse_csv( 3177 lambda: self._parse_function() or self._parse_var(upper=True) 3178 ), 3179 [], 3180 ): 3181 hints.extend(hint) 3182 3183 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3184 self.raise_error("Expected */ after HINT") 3185 3186 return self.expression(exp.Hint, expressions=hints) 3187 3188 return None 3189 3190 def _parse_into(self) -> t.Optional[exp.Into]: 3191 if not self._match(TokenType.INTO): 3192 return None 3193 3194 temp = self._match(TokenType.TEMPORARY) 3195 unlogged = self._match_text_seq("UNLOGGED") 3196 self._match(TokenType.TABLE) 3197 3198 return self.expression( 3199 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3200 ) 3201 3202 def _parse_from( 3203 self, joins: bool = False, skip_from_token: bool = False 3204 ) -> t.Optional[exp.From]: 3205 if not skip_from_token and not self._match(TokenType.FROM): 3206 return None 3207 3208 return self.expression( 3209 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3210 ) 3211 3212 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3213 return self.expression( 3214 exp.MatchRecognizeMeasure, 3215 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3216 this=self._parse_expression(), 3217 ) 3218 3219 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3220 if not self._match(TokenType.MATCH_RECOGNIZE): 3221 return None 3222 3223 self._match_l_paren() 3224 3225 partition = self._parse_partition_by() 3226 order = self._parse_order() 3227 3228 measures = ( 3229 self._parse_csv(self._parse_match_recognize_measure) 3230 if self._match_text_seq("MEASURES") 3231 else None 3232 ) 3233 3234 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3235 rows = exp.var("ONE ROW PER MATCH") 3236 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3237 text = "ALL ROWS PER MATCH" 3238 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3239 text += " SHOW EMPTY MATCHES" 3240 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3241 text += " OMIT EMPTY MATCHES" 3242 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3243 text += " WITH UNMATCHED ROWS" 3244 rows = exp.var(text) 3245 else: 3246 rows = None 3247 3248 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3249 text = "AFTER MATCH SKIP" 3250 if self._match_text_seq("PAST", "LAST", "ROW"): 3251 text += " PAST LAST ROW" 3252 elif self._match_text_seq("TO", "NEXT", "ROW"): 3253 text += " TO NEXT ROW" 3254 elif self._match_text_seq("TO", "FIRST"): 3255 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3256 elif self._match_text_seq("TO", "LAST"): 3257 text += f" TO LAST {self._advance_any().text}" # type: ignore 3258 after = exp.var(text) 3259 else: 3260 after = None 3261 3262 if self._match_text_seq("PATTERN"): 3263 self._match_l_paren() 3264 3265 if not self._curr: 3266 self.raise_error("Expecting )", self._curr) 3267 3268 paren = 1 3269 start = self._curr 3270 3271 while self._curr and paren > 0: 3272 if self._curr.token_type == TokenType.L_PAREN: 3273 paren += 1 3274 if self._curr.token_type == TokenType.R_PAREN: 3275 paren -= 1 3276 3277 end = self._prev 3278 self._advance() 3279 3280 if paren > 0: 3281 self.raise_error("Expecting )", self._curr) 3282 3283 pattern = exp.var(self._find_sql(start, end)) 3284 else: 3285 pattern = None 3286 3287 define = ( 3288 self._parse_csv(self._parse_name_as_expression) 3289 if self._match_text_seq("DEFINE") 3290 else None 3291 ) 3292 3293 self._match_r_paren() 3294 3295 return self.expression( 3296 exp.MatchRecognize, 3297 partition_by=partition, 3298 order=order, 3299 measures=measures, 3300 rows=rows, 3301 after=after, 3302 pattern=pattern, 3303 define=define, 3304 alias=self._parse_table_alias(), 3305 ) 3306 3307 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3308 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3309 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3310 cross_apply = False 3311 3312 if cross_apply is not None: 3313 this = self._parse_select(table=True) 3314 view = None 3315 outer = None 3316 elif self._match(TokenType.LATERAL): 3317 this = self._parse_select(table=True) 3318 view = self._match(TokenType.VIEW) 3319 outer = self._match(TokenType.OUTER) 3320 else: 3321 return None 3322 3323 if not this: 3324 this = ( 3325 self._parse_unnest() 3326 or self._parse_function() 3327 or self._parse_id_var(any_token=False) 3328 ) 3329 3330 while self._match(TokenType.DOT): 3331 this = exp.Dot( 3332 this=this, 3333 expression=self._parse_function() or self._parse_id_var(any_token=False), 3334 ) 3335 3336 if view: 3337 table = self._parse_id_var(any_token=False) 3338 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3339 table_alias: t.Optional[exp.TableAlias] = self.expression( 3340 exp.TableAlias, this=table, columns=columns 3341 ) 3342 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3343 # We move the alias from the lateral's child node to the lateral itself 3344 table_alias = this.args["alias"].pop() 3345 else: 3346 table_alias = self._parse_table_alias() 3347 3348 return self.expression( 3349 exp.Lateral, 3350 this=this, 3351 view=view, 3352 outer=outer, 3353 alias=table_alias, 3354 cross_apply=cross_apply, 3355 ) 3356 3357 def _parse_join_parts( 3358 self, 3359 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3360 return ( 3361 self._match_set(self.JOIN_METHODS) and self._prev, 3362 self._match_set(self.JOIN_SIDES) and self._prev, 3363 self._match_set(self.JOIN_KINDS) and self._prev, 3364 ) 3365 3366 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3367 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3368 this = self._parse_column() 3369 if isinstance(this, exp.Column): 3370 return this.this 3371 return this 3372 3373 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3374 3375 def _parse_join( 3376 self, skip_join_token: bool = False, parse_bracket: bool = False 3377 ) -> t.Optional[exp.Join]: 3378 if self._match(TokenType.COMMA): 3379 return self.expression(exp.Join, this=self._parse_table()) 3380 3381 index = self._index 3382 method, side, kind = self._parse_join_parts() 3383 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3384 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3385 3386 if not skip_join_token and not join: 3387 self._retreat(index) 3388 kind = None 3389 method = None 3390 side = None 3391 3392 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3393 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3394 3395 if not skip_join_token and not join and not outer_apply and not cross_apply: 3396 return None 3397 3398 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3399 3400 if method: 3401 kwargs["method"] = method.text 3402 if side: 3403 kwargs["side"] = side.text 3404 if kind: 3405 kwargs["kind"] = kind.text 3406 if hint: 3407 kwargs["hint"] = hint 3408 3409 if self._match(TokenType.MATCH_CONDITION): 3410 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3411 3412 if self._match(TokenType.ON): 3413 kwargs["on"] = self._parse_assignment() 3414 elif self._match(TokenType.USING): 3415 kwargs["using"] = self._parse_using_identifiers() 3416 elif ( 3417 not (outer_apply or cross_apply) 3418 and not isinstance(kwargs["this"], exp.Unnest) 3419 and not (kind and kind.token_type == TokenType.CROSS) 3420 ): 3421 index = self._index 3422 joins: t.Optional[list] = list(self._parse_joins()) 3423 3424 if joins and self._match(TokenType.ON): 3425 kwargs["on"] = self._parse_assignment() 3426 elif joins and self._match(TokenType.USING): 3427 kwargs["using"] = self._parse_using_identifiers() 3428 else: 3429 joins = None 3430 self._retreat(index) 3431 3432 kwargs["this"].set("joins", joins if joins else None) 3433 3434 comments = [c for token in (method, side, kind) if token for c in token.comments] 3435 return self.expression(exp.Join, comments=comments, **kwargs) 3436 3437 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3438 this = self._parse_assignment() 3439 3440 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3441 return this 3442 3443 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3444 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3445 3446 return this 3447 3448 def _parse_index_params(self) -> exp.IndexParameters: 3449 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3450 3451 if self._match(TokenType.L_PAREN, advance=False): 3452 columns = self._parse_wrapped_csv(self._parse_with_operator) 3453 else: 3454 columns = None 3455 3456 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3457 partition_by = self._parse_partition_by() 3458 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3459 tablespace = ( 3460 self._parse_var(any_token=True) 3461 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3462 else None 3463 ) 3464 where = self._parse_where() 3465 3466 on = self._parse_field() if self._match(TokenType.ON) else None 3467 3468 return self.expression( 3469 exp.IndexParameters, 3470 using=using, 3471 columns=columns, 3472 include=include, 3473 partition_by=partition_by, 3474 where=where, 3475 with_storage=with_storage, 3476 tablespace=tablespace, 3477 on=on, 3478 ) 3479 3480 def _parse_index( 3481 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3482 ) -> t.Optional[exp.Index]: 3483 if index or anonymous: 3484 unique = None 3485 primary = None 3486 amp = None 3487 3488 self._match(TokenType.ON) 3489 self._match(TokenType.TABLE) # hive 3490 table = self._parse_table_parts(schema=True) 3491 else: 3492 unique = self._match(TokenType.UNIQUE) 3493 primary = self._match_text_seq("PRIMARY") 3494 amp = self._match_text_seq("AMP") 3495 3496 if not self._match(TokenType.INDEX): 3497 return None 3498 3499 index = self._parse_id_var() 3500 table = None 3501 3502 params = self._parse_index_params() 3503 3504 return self.expression( 3505 exp.Index, 3506 this=index, 3507 table=table, 3508 unique=unique, 3509 primary=primary, 3510 amp=amp, 3511 params=params, 3512 ) 3513 3514 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3515 hints: t.List[exp.Expression] = [] 3516 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3517 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3518 hints.append( 3519 self.expression( 3520 exp.WithTableHint, 3521 expressions=self._parse_csv( 3522 lambda: self._parse_function() or self._parse_var(any_token=True) 3523 ), 3524 ) 3525 ) 3526 self._match_r_paren() 3527 else: 3528 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3529 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3530 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3531 3532 self._match_set((TokenType.INDEX, TokenType.KEY)) 3533 if self._match(TokenType.FOR): 3534 hint.set("target", self._advance_any() and self._prev.text.upper()) 3535 3536 hint.set("expressions", self._parse_wrapped_id_vars()) 3537 hints.append(hint) 3538 3539 return hints or None 3540 3541 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3542 return ( 3543 (not schema and self._parse_function(optional_parens=False)) 3544 or self._parse_id_var(any_token=False) 3545 or self._parse_string_as_identifier() 3546 or self._parse_placeholder() 3547 ) 3548 3549 def _parse_table_parts( 3550 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3551 ) -> exp.Table: 3552 catalog = None 3553 db = None 3554 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3555 3556 while self._match(TokenType.DOT): 3557 if catalog: 3558 # This allows nesting the table in arbitrarily many dot expressions if needed 3559 table = self.expression( 3560 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3561 ) 3562 else: 3563 catalog = db 3564 db = table 3565 # "" used for tsql FROM a..b case 3566 table = self._parse_table_part(schema=schema) or "" 3567 3568 if ( 3569 wildcard 3570 and self._is_connected() 3571 and (isinstance(table, exp.Identifier) or not table) 3572 and self._match(TokenType.STAR) 3573 ): 3574 if isinstance(table, exp.Identifier): 3575 table.args["this"] += "*" 3576 else: 3577 table = exp.Identifier(this="*") 3578 3579 # We bubble up comments from the Identifier to the Table 3580 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3581 3582 if is_db_reference: 3583 catalog = db 3584 db = table 3585 table = None 3586 3587 if not table and not is_db_reference: 3588 self.raise_error(f"Expected table name but got {self._curr}") 3589 if not db and is_db_reference: 3590 self.raise_error(f"Expected database name but got {self._curr}") 3591 3592 table = self.expression( 3593 exp.Table, 3594 comments=comments, 3595 this=table, 3596 db=db, 3597 catalog=catalog, 3598 ) 3599 3600 changes = self._parse_changes() 3601 if changes: 3602 table.set("changes", changes) 3603 3604 at_before = self._parse_historical_data() 3605 if at_before: 3606 table.set("when", at_before) 3607 3608 pivots = self._parse_pivots() 3609 if pivots: 3610 table.set("pivots", pivots) 3611 3612 return table 3613 3614 def _parse_table( 3615 self, 3616 schema: bool = False, 3617 joins: bool = False, 3618 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3619 parse_bracket: bool = False, 3620 is_db_reference: bool = False, 3621 parse_partition: bool = False, 3622 ) -> t.Optional[exp.Expression]: 3623 lateral = self._parse_lateral() 3624 if lateral: 3625 return lateral 3626 3627 unnest = self._parse_unnest() 3628 if unnest: 3629 return unnest 3630 3631 values = self._parse_derived_table_values() 3632 if values: 3633 return values 3634 3635 subquery = self._parse_select(table=True) 3636 if subquery: 3637 if not subquery.args.get("pivots"): 3638 subquery.set("pivots", self._parse_pivots()) 3639 return subquery 3640 3641 bracket = parse_bracket and self._parse_bracket(None) 3642 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3643 3644 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3645 self._parse_table 3646 ) 3647 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3648 3649 only = self._match(TokenType.ONLY) 3650 3651 this = t.cast( 3652 exp.Expression, 3653 bracket 3654 or rows_from 3655 or self._parse_bracket( 3656 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3657 ), 3658 ) 3659 3660 if only: 3661 this.set("only", only) 3662 3663 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3664 self._match_text_seq("*") 3665 3666 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3667 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3668 this.set("partition", self._parse_partition()) 3669 3670 if schema: 3671 return self._parse_schema(this=this) 3672 3673 version = self._parse_version() 3674 3675 if version: 3676 this.set("version", version) 3677 3678 if self.dialect.ALIAS_POST_TABLESAMPLE: 3679 this.set("sample", self._parse_table_sample()) 3680 3681 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3682 if alias: 3683 this.set("alias", alias) 3684 3685 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3686 return self.expression( 3687 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3688 ) 3689 3690 this.set("hints", self._parse_table_hints()) 3691 3692 if not this.args.get("pivots"): 3693 this.set("pivots", self._parse_pivots()) 3694 3695 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3696 this.set("sample", self._parse_table_sample()) 3697 3698 if joins: 3699 for join in self._parse_joins(): 3700 this.append("joins", join) 3701 3702 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3703 this.set("ordinality", True) 3704 this.set("alias", self._parse_table_alias()) 3705 3706 return this 3707 3708 def _parse_version(self) -> t.Optional[exp.Version]: 3709 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3710 this = "TIMESTAMP" 3711 elif self._match(TokenType.VERSION_SNAPSHOT): 3712 this = "VERSION" 3713 else: 3714 return None 3715 3716 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3717 kind = self._prev.text.upper() 3718 start = self._parse_bitwise() 3719 self._match_texts(("TO", "AND")) 3720 end = self._parse_bitwise() 3721 expression: t.Optional[exp.Expression] = self.expression( 3722 exp.Tuple, expressions=[start, end] 3723 ) 3724 elif self._match_text_seq("CONTAINED", "IN"): 3725 kind = "CONTAINED IN" 3726 expression = self.expression( 3727 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3728 ) 3729 elif self._match(TokenType.ALL): 3730 kind = "ALL" 3731 expression = None 3732 else: 3733 self._match_text_seq("AS", "OF") 3734 kind = "AS OF" 3735 expression = self._parse_type() 3736 3737 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3738 3739 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3740 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3741 index = self._index 3742 historical_data = None 3743 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3744 this = self._prev.text.upper() 3745 kind = ( 3746 self._match(TokenType.L_PAREN) 3747 and self._match_texts(self.HISTORICAL_DATA_KIND) 3748 and self._prev.text.upper() 3749 ) 3750 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3751 3752 if expression: 3753 self._match_r_paren() 3754 historical_data = self.expression( 3755 exp.HistoricalData, this=this, kind=kind, expression=expression 3756 ) 3757 else: 3758 self._retreat(index) 3759 3760 return historical_data 3761 3762 def _parse_changes(self) -> t.Optional[exp.Changes]: 3763 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3764 return None 3765 3766 information = self._parse_var(any_token=True) 3767 self._match_r_paren() 3768 3769 return self.expression( 3770 exp.Changes, 3771 information=information, 3772 at_before=self._parse_historical_data(), 3773 end=self._parse_historical_data(), 3774 ) 3775 3776 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3777 if not self._match(TokenType.UNNEST): 3778 return None 3779 3780 expressions = self._parse_wrapped_csv(self._parse_equality) 3781 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3782 3783 alias = self._parse_table_alias() if with_alias else None 3784 3785 if alias: 3786 if self.dialect.UNNEST_COLUMN_ONLY: 3787 if alias.args.get("columns"): 3788 self.raise_error("Unexpected extra column alias in unnest.") 3789 3790 alias.set("columns", [alias.this]) 3791 alias.set("this", None) 3792 3793 columns = alias.args.get("columns") or [] 3794 if offset and len(expressions) < len(columns): 3795 offset = columns.pop() 3796 3797 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3798 self._match(TokenType.ALIAS) 3799 offset = self._parse_id_var( 3800 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3801 ) or exp.to_identifier("offset") 3802 3803 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3804 3805 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3806 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3807 if not is_derived and not ( 3808 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3809 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3810 ): 3811 return None 3812 3813 expressions = self._parse_csv(self._parse_value) 3814 alias = self._parse_table_alias() 3815 3816 if is_derived: 3817 self._match_r_paren() 3818 3819 return self.expression( 3820 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3821 ) 3822 3823 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3824 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3825 as_modifier and self._match_text_seq("USING", "SAMPLE") 3826 ): 3827 return None 3828 3829 bucket_numerator = None 3830 bucket_denominator = None 3831 bucket_field = None 3832 percent = None 3833 size = None 3834 seed = None 3835 3836 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3837 matched_l_paren = self._match(TokenType.L_PAREN) 3838 3839 if self.TABLESAMPLE_CSV: 3840 num = None 3841 expressions = self._parse_csv(self._parse_primary) 3842 else: 3843 expressions = None 3844 num = ( 3845 self._parse_factor() 3846 if self._match(TokenType.NUMBER, advance=False) 3847 else self._parse_primary() or self._parse_placeholder() 3848 ) 3849 3850 if self._match_text_seq("BUCKET"): 3851 bucket_numerator = self._parse_number() 3852 self._match_text_seq("OUT", "OF") 3853 bucket_denominator = bucket_denominator = self._parse_number() 3854 self._match(TokenType.ON) 3855 bucket_field = self._parse_field() 3856 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3857 percent = num 3858 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3859 size = num 3860 else: 3861 percent = num 3862 3863 if matched_l_paren: 3864 self._match_r_paren() 3865 3866 if self._match(TokenType.L_PAREN): 3867 method = self._parse_var(upper=True) 3868 seed = self._match(TokenType.COMMA) and self._parse_number() 3869 self._match_r_paren() 3870 elif self._match_texts(("SEED", "REPEATABLE")): 3871 seed = self._parse_wrapped(self._parse_number) 3872 3873 if not method and self.DEFAULT_SAMPLING_METHOD: 3874 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3875 3876 return self.expression( 3877 exp.TableSample, 3878 expressions=expressions, 3879 method=method, 3880 bucket_numerator=bucket_numerator, 3881 bucket_denominator=bucket_denominator, 3882 bucket_field=bucket_field, 3883 percent=percent, 3884 size=size, 3885 seed=seed, 3886 ) 3887 3888 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3889 return list(iter(self._parse_pivot, None)) or None 3890 3891 def _parse_joins(self) -> t.Iterator[exp.Join]: 3892 return iter(self._parse_join, None) 3893 3894 # https://duckdb.org/docs/sql/statements/pivot 3895 def _parse_simplified_pivot(self) -> exp.Pivot: 3896 def _parse_on() -> t.Optional[exp.Expression]: 3897 this = self._parse_bitwise() 3898 return self._parse_in(this) if self._match(TokenType.IN) else this 3899 3900 this = self._parse_table() 3901 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3902 using = self._match(TokenType.USING) and self._parse_csv( 3903 lambda: self._parse_alias(self._parse_function()) 3904 ) 3905 group = self._parse_group() 3906 return self.expression( 3907 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3908 ) 3909 3910 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3911 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3912 this = self._parse_select_or_expression() 3913 3914 self._match(TokenType.ALIAS) 3915 alias = self._parse_bitwise() 3916 if alias: 3917 if isinstance(alias, exp.Column) and not alias.db: 3918 alias = alias.this 3919 return self.expression(exp.PivotAlias, this=this, alias=alias) 3920 3921 return this 3922 3923 value = self._parse_column() 3924 3925 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3926 self.raise_error("Expecting IN (") 3927 3928 if self._match(TokenType.ANY): 3929 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3930 else: 3931 exprs = self._parse_csv(_parse_aliased_expression) 3932 3933 self._match_r_paren() 3934 return self.expression(exp.In, this=value, expressions=exprs) 3935 3936 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3937 index = self._index 3938 include_nulls = None 3939 3940 if self._match(TokenType.PIVOT): 3941 unpivot = False 3942 elif self._match(TokenType.UNPIVOT): 3943 unpivot = True 3944 3945 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3946 if self._match_text_seq("INCLUDE", "NULLS"): 3947 include_nulls = True 3948 elif self._match_text_seq("EXCLUDE", "NULLS"): 3949 include_nulls = False 3950 else: 3951 return None 3952 3953 expressions = [] 3954 3955 if not self._match(TokenType.L_PAREN): 3956 self._retreat(index) 3957 return None 3958 3959 if unpivot: 3960 expressions = self._parse_csv(self._parse_column) 3961 else: 3962 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3963 3964 if not expressions: 3965 self.raise_error("Failed to parse PIVOT's aggregation list") 3966 3967 if not self._match(TokenType.FOR): 3968 self.raise_error("Expecting FOR") 3969 3970 field = self._parse_pivot_in() 3971 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3972 self._parse_bitwise 3973 ) 3974 3975 self._match_r_paren() 3976 3977 pivot = self.expression( 3978 exp.Pivot, 3979 expressions=expressions, 3980 field=field, 3981 unpivot=unpivot, 3982 include_nulls=include_nulls, 3983 default_on_null=default_on_null, 3984 ) 3985 3986 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3987 pivot.set("alias", self._parse_table_alias()) 3988 3989 if not unpivot: 3990 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3991 3992 columns: t.List[exp.Expression] = [] 3993 for fld in pivot.args["field"].expressions: 3994 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3995 for name in names: 3996 if self.PREFIXED_PIVOT_COLUMNS: 3997 name = f"{name}_{field_name}" if name else field_name 3998 else: 3999 name = f"{field_name}_{name}" if name else field_name 4000 4001 columns.append(exp.to_identifier(name)) 4002 4003 pivot.set("columns", columns) 4004 4005 return pivot 4006 4007 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4008 return [agg.alias for agg in aggregations] 4009 4010 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4011 if not skip_where_token and not self._match(TokenType.PREWHERE): 4012 return None 4013 4014 return self.expression( 4015 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4016 ) 4017 4018 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4019 if not skip_where_token and not self._match(TokenType.WHERE): 4020 return None 4021 4022 return self.expression( 4023 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4024 ) 4025 4026 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4027 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4028 return None 4029 4030 elements: t.Dict[str, t.Any] = defaultdict(list) 4031 4032 if self._match(TokenType.ALL): 4033 elements["all"] = True 4034 elif self._match(TokenType.DISTINCT): 4035 elements["all"] = False 4036 4037 while True: 4038 index = self._index 4039 4040 elements["expressions"].extend( 4041 self._parse_csv( 4042 lambda: None 4043 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4044 else self._parse_assignment() 4045 ) 4046 ) 4047 4048 before_with_index = self._index 4049 with_prefix = self._match(TokenType.WITH) 4050 4051 if self._match(TokenType.ROLLUP): 4052 elements["rollup"].append( 4053 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4054 ) 4055 elif self._match(TokenType.CUBE): 4056 elements["cube"].append( 4057 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4058 ) 4059 elif self._match(TokenType.GROUPING_SETS): 4060 elements["grouping_sets"].append( 4061 self.expression( 4062 exp.GroupingSets, 4063 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4064 ) 4065 ) 4066 elif self._match_text_seq("TOTALS"): 4067 elements["totals"] = True # type: ignore 4068 4069 if before_with_index <= self._index <= before_with_index + 1: 4070 self._retreat(before_with_index) 4071 break 4072 4073 if index == self._index: 4074 break 4075 4076 return self.expression(exp.Group, **elements) # type: ignore 4077 4078 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4079 return self.expression( 4080 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4081 ) 4082 4083 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4084 if self._match(TokenType.L_PAREN): 4085 grouping_set = self._parse_csv(self._parse_column) 4086 self._match_r_paren() 4087 return self.expression(exp.Tuple, expressions=grouping_set) 4088 4089 return self._parse_column() 4090 4091 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4092 if not skip_having_token and not self._match(TokenType.HAVING): 4093 return None 4094 return self.expression(exp.Having, this=self._parse_assignment()) 4095 4096 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4097 if not self._match(TokenType.QUALIFY): 4098 return None 4099 return self.expression(exp.Qualify, this=self._parse_assignment()) 4100 4101 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4102 if skip_start_token: 4103 start = None 4104 elif self._match(TokenType.START_WITH): 4105 start = self._parse_assignment() 4106 else: 4107 return None 4108 4109 self._match(TokenType.CONNECT_BY) 4110 nocycle = self._match_text_seq("NOCYCLE") 4111 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4112 exp.Prior, this=self._parse_bitwise() 4113 ) 4114 connect = self._parse_assignment() 4115 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4116 4117 if not start and self._match(TokenType.START_WITH): 4118 start = self._parse_assignment() 4119 4120 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4121 4122 def _parse_name_as_expression(self) -> exp.Alias: 4123 return self.expression( 4124 exp.Alias, 4125 alias=self._parse_id_var(any_token=True), 4126 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4127 ) 4128 4129 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4130 if self._match_text_seq("INTERPOLATE"): 4131 return self._parse_wrapped_csv(self._parse_name_as_expression) 4132 return None 4133 4134 def _parse_order( 4135 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4136 ) -> t.Optional[exp.Expression]: 4137 siblings = None 4138 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4139 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4140 return this 4141 4142 siblings = True 4143 4144 return self.expression( 4145 exp.Order, 4146 this=this, 4147 expressions=self._parse_csv(self._parse_ordered), 4148 siblings=siblings, 4149 ) 4150 4151 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4152 if not self._match(token): 4153 return None 4154 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4155 4156 def _parse_ordered( 4157 self, parse_method: t.Optional[t.Callable] = None 4158 ) -> t.Optional[exp.Ordered]: 4159 this = parse_method() if parse_method else self._parse_assignment() 4160 if not this: 4161 return None 4162 4163 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4164 this = exp.var("ALL") 4165 4166 asc = self._match(TokenType.ASC) 4167 desc = self._match(TokenType.DESC) or (asc and False) 4168 4169 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4170 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4171 4172 nulls_first = is_nulls_first or False 4173 explicitly_null_ordered = is_nulls_first or is_nulls_last 4174 4175 if ( 4176 not explicitly_null_ordered 4177 and ( 4178 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4179 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4180 ) 4181 and self.dialect.NULL_ORDERING != "nulls_are_last" 4182 ): 4183 nulls_first = True 4184 4185 if self._match_text_seq("WITH", "FILL"): 4186 with_fill = self.expression( 4187 exp.WithFill, 4188 **{ # type: ignore 4189 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4190 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4191 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4192 "interpolate": self._parse_interpolate(), 4193 }, 4194 ) 4195 else: 4196 with_fill = None 4197 4198 return self.expression( 4199 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4200 ) 4201 4202 def _parse_limit( 4203 self, 4204 this: t.Optional[exp.Expression] = None, 4205 top: bool = False, 4206 skip_limit_token: bool = False, 4207 ) -> t.Optional[exp.Expression]: 4208 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4209 comments = self._prev_comments 4210 if top: 4211 limit_paren = self._match(TokenType.L_PAREN) 4212 expression = self._parse_term() if limit_paren else self._parse_number() 4213 4214 if limit_paren: 4215 self._match_r_paren() 4216 else: 4217 expression = self._parse_term() 4218 4219 if self._match(TokenType.COMMA): 4220 offset = expression 4221 expression = self._parse_term() 4222 else: 4223 offset = None 4224 4225 limit_exp = self.expression( 4226 exp.Limit, 4227 this=this, 4228 expression=expression, 4229 offset=offset, 4230 comments=comments, 4231 expressions=self._parse_limit_by(), 4232 ) 4233 4234 return limit_exp 4235 4236 if self._match(TokenType.FETCH): 4237 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4238 direction = self._prev.text.upper() if direction else "FIRST" 4239 4240 count = self._parse_field(tokens=self.FETCH_TOKENS) 4241 percent = self._match(TokenType.PERCENT) 4242 4243 self._match_set((TokenType.ROW, TokenType.ROWS)) 4244 4245 only = self._match_text_seq("ONLY") 4246 with_ties = self._match_text_seq("WITH", "TIES") 4247 4248 if only and with_ties: 4249 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4250 4251 return self.expression( 4252 exp.Fetch, 4253 direction=direction, 4254 count=count, 4255 percent=percent, 4256 with_ties=with_ties, 4257 ) 4258 4259 return this 4260 4261 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4262 if not self._match(TokenType.OFFSET): 4263 return this 4264 4265 count = self._parse_term() 4266 self._match_set((TokenType.ROW, TokenType.ROWS)) 4267 4268 return self.expression( 4269 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4270 ) 4271 4272 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4273 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4274 4275 def _parse_locks(self) -> t.List[exp.Lock]: 4276 locks = [] 4277 while True: 4278 if self._match_text_seq("FOR", "UPDATE"): 4279 update = True 4280 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4281 "LOCK", "IN", "SHARE", "MODE" 4282 ): 4283 update = False 4284 else: 4285 break 4286 4287 expressions = None 4288 if self._match_text_seq("OF"): 4289 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4290 4291 wait: t.Optional[bool | exp.Expression] = None 4292 if self._match_text_seq("NOWAIT"): 4293 wait = True 4294 elif self._match_text_seq("WAIT"): 4295 wait = self._parse_primary() 4296 elif self._match_text_seq("SKIP", "LOCKED"): 4297 wait = False 4298 4299 locks.append( 4300 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4301 ) 4302 4303 return locks 4304 4305 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4306 while this and self._match_set(self.SET_OPERATIONS): 4307 token_type = self._prev.token_type 4308 4309 if token_type == TokenType.UNION: 4310 operation: t.Type[exp.SetOperation] = exp.Union 4311 elif token_type == TokenType.EXCEPT: 4312 operation = exp.Except 4313 else: 4314 operation = exp.Intersect 4315 4316 comments = self._prev.comments 4317 4318 if self._match(TokenType.DISTINCT): 4319 distinct: t.Optional[bool] = True 4320 elif self._match(TokenType.ALL): 4321 distinct = False 4322 else: 4323 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4324 if distinct is None: 4325 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4326 4327 by_name = self._match_text_seq("BY", "NAME") 4328 expression = self._parse_select(nested=True, parse_set_operation=False) 4329 4330 this = self.expression( 4331 operation, 4332 comments=comments, 4333 this=this, 4334 distinct=distinct, 4335 by_name=by_name, 4336 expression=expression, 4337 ) 4338 4339 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4340 expression = this.expression 4341 4342 if expression: 4343 for arg in self.SET_OP_MODIFIERS: 4344 expr = expression.args.get(arg) 4345 if expr: 4346 this.set(arg, expr.pop()) 4347 4348 return this 4349 4350 def _parse_expression(self) -> t.Optional[exp.Expression]: 4351 return self._parse_alias(self._parse_assignment()) 4352 4353 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4354 this = self._parse_disjunction() 4355 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4356 # This allows us to parse <non-identifier token> := <expr> 4357 this = exp.column( 4358 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4359 ) 4360 4361 while self._match_set(self.ASSIGNMENT): 4362 if isinstance(this, exp.Column) and len(this.parts) == 1: 4363 this = this.this 4364 4365 this = self.expression( 4366 self.ASSIGNMENT[self._prev.token_type], 4367 this=this, 4368 comments=self._prev_comments, 4369 expression=self._parse_assignment(), 4370 ) 4371 4372 return this 4373 4374 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4375 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4376 4377 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4378 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4379 4380 def _parse_equality(self) -> t.Optional[exp.Expression]: 4381 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4382 4383 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4384 return self._parse_tokens(self._parse_range, self.COMPARISON) 4385 4386 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4387 this = this or self._parse_bitwise() 4388 negate = self._match(TokenType.NOT) 4389 4390 if self._match_set(self.RANGE_PARSERS): 4391 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4392 if not expression: 4393 return this 4394 4395 this = expression 4396 elif self._match(TokenType.ISNULL): 4397 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4398 4399 # Postgres supports ISNULL and NOTNULL for conditions. 4400 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4401 if self._match(TokenType.NOTNULL): 4402 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4403 this = self.expression(exp.Not, this=this) 4404 4405 if negate: 4406 this = self._negate_range(this) 4407 4408 if self._match(TokenType.IS): 4409 this = self._parse_is(this) 4410 4411 return this 4412 4413 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4414 if not this: 4415 return this 4416 4417 return self.expression(exp.Not, this=this) 4418 4419 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4420 index = self._index - 1 4421 negate = self._match(TokenType.NOT) 4422 4423 if self._match_text_seq("DISTINCT", "FROM"): 4424 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4425 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4426 4427 if self._match(TokenType.JSON): 4428 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4429 4430 if self._match_text_seq("WITH"): 4431 _with = True 4432 elif self._match_text_seq("WITHOUT"): 4433 _with = False 4434 else: 4435 _with = None 4436 4437 unique = self._match(TokenType.UNIQUE) 4438 self._match_text_seq("KEYS") 4439 expression: t.Optional[exp.Expression] = self.expression( 4440 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4441 ) 4442 else: 4443 expression = self._parse_primary() or self._parse_null() 4444 if not expression: 4445 self._retreat(index) 4446 return None 4447 4448 this = self.expression(exp.Is, this=this, expression=expression) 4449 return self.expression(exp.Not, this=this) if negate else this 4450 4451 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4452 unnest = self._parse_unnest(with_alias=False) 4453 if unnest: 4454 this = self.expression(exp.In, this=this, unnest=unnest) 4455 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4456 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4457 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4458 4459 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4460 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4461 else: 4462 this = self.expression(exp.In, this=this, expressions=expressions) 4463 4464 if matched_l_paren: 4465 self._match_r_paren(this) 4466 elif not self._match(TokenType.R_BRACKET, expression=this): 4467 self.raise_error("Expecting ]") 4468 else: 4469 this = self.expression(exp.In, this=this, field=self._parse_field()) 4470 4471 return this 4472 4473 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4474 low = self._parse_bitwise() 4475 self._match(TokenType.AND) 4476 high = self._parse_bitwise() 4477 return self.expression(exp.Between, this=this, low=low, high=high) 4478 4479 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4480 if not self._match(TokenType.ESCAPE): 4481 return this 4482 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4483 4484 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4485 index = self._index 4486 4487 if not self._match(TokenType.INTERVAL) and match_interval: 4488 return None 4489 4490 if self._match(TokenType.STRING, advance=False): 4491 this = self._parse_primary() 4492 else: 4493 this = self._parse_term() 4494 4495 if not this or ( 4496 isinstance(this, exp.Column) 4497 and not this.table 4498 and not this.this.quoted 4499 and this.name.upper() == "IS" 4500 ): 4501 self._retreat(index) 4502 return None 4503 4504 unit = self._parse_function() or ( 4505 not self._match(TokenType.ALIAS, advance=False) 4506 and self._parse_var(any_token=True, upper=True) 4507 ) 4508 4509 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4510 # each INTERVAL expression into this canonical form so it's easy to transpile 4511 if this and this.is_number: 4512 this = exp.Literal.string(this.to_py()) 4513 elif this and this.is_string: 4514 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4515 if len(parts) == 1: 4516 if unit: 4517 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4518 self._retreat(self._index - 1) 4519 4520 this = exp.Literal.string(parts[0][0]) 4521 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4522 4523 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4524 unit = self.expression( 4525 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4526 ) 4527 4528 interval = self.expression(exp.Interval, this=this, unit=unit) 4529 4530 index = self._index 4531 self._match(TokenType.PLUS) 4532 4533 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4534 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4535 return self.expression( 4536 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4537 ) 4538 4539 self._retreat(index) 4540 return interval 4541 4542 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4543 this = self._parse_term() 4544 4545 while True: 4546 if self._match_set(self.BITWISE): 4547 this = self.expression( 4548 self.BITWISE[self._prev.token_type], 4549 this=this, 4550 expression=self._parse_term(), 4551 ) 4552 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4553 this = self.expression( 4554 exp.DPipe, 4555 this=this, 4556 expression=self._parse_term(), 4557 safe=not self.dialect.STRICT_STRING_CONCAT, 4558 ) 4559 elif self._match(TokenType.DQMARK): 4560 this = self.expression( 4561 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4562 ) 4563 elif self._match_pair(TokenType.LT, TokenType.LT): 4564 this = self.expression( 4565 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4566 ) 4567 elif self._match_pair(TokenType.GT, TokenType.GT): 4568 this = self.expression( 4569 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4570 ) 4571 else: 4572 break 4573 4574 return this 4575 4576 def _parse_term(self) -> t.Optional[exp.Expression]: 4577 this = self._parse_factor() 4578 4579 while self._match_set(self.TERM): 4580 klass = self.TERM[self._prev.token_type] 4581 comments = self._prev_comments 4582 expression = self._parse_factor() 4583 4584 this = self.expression(klass, this=this, comments=comments, expression=expression) 4585 4586 if isinstance(this, exp.Collate): 4587 expr = this.expression 4588 4589 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4590 # fallback to Identifier / Var 4591 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4592 ident = expr.this 4593 if isinstance(ident, exp.Identifier): 4594 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4595 4596 return this 4597 4598 def _parse_factor(self) -> t.Optional[exp.Expression]: 4599 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4600 this = parse_method() 4601 4602 while self._match_set(self.FACTOR): 4603 klass = self.FACTOR[self._prev.token_type] 4604 comments = self._prev_comments 4605 expression = parse_method() 4606 4607 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4608 self._retreat(self._index - 1) 4609 return this 4610 4611 this = self.expression(klass, this=this, comments=comments, expression=expression) 4612 4613 if isinstance(this, exp.Div): 4614 this.args["typed"] = self.dialect.TYPED_DIVISION 4615 this.args["safe"] = self.dialect.SAFE_DIVISION 4616 4617 return this 4618 4619 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4620 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4621 4622 def _parse_unary(self) -> t.Optional[exp.Expression]: 4623 if self._match_set(self.UNARY_PARSERS): 4624 return self.UNARY_PARSERS[self._prev.token_type](self) 4625 return self._parse_at_time_zone(self._parse_type()) 4626 4627 def _parse_type( 4628 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4629 ) -> t.Optional[exp.Expression]: 4630 interval = parse_interval and self._parse_interval() 4631 if interval: 4632 return interval 4633 4634 index = self._index 4635 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4636 4637 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4638 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4639 if isinstance(data_type, exp.Cast): 4640 # This constructor can contain ops directly after it, for instance struct unnesting: 4641 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4642 return self._parse_column_ops(data_type) 4643 4644 if data_type: 4645 index2 = self._index 4646 this = self._parse_primary() 4647 4648 if isinstance(this, exp.Literal): 4649 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4650 if parser: 4651 return parser(self, this, data_type) 4652 4653 return self.expression(exp.Cast, this=this, to=data_type) 4654 4655 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4656 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4657 # 4658 # If the index difference here is greater than 1, that means the parser itself must have 4659 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4660 # 4661 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4662 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4663 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4664 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4665 # 4666 # In these cases, we don't really want to return the converted type, but instead retreat 4667 # and try to parse a Column or Identifier in the section below. 4668 if data_type.expressions and index2 - index > 1: 4669 self._retreat(index2) 4670 return self._parse_column_ops(data_type) 4671 4672 self._retreat(index) 4673 4674 if fallback_to_identifier: 4675 return self._parse_id_var() 4676 4677 this = self._parse_column() 4678 return this and self._parse_column_ops(this) 4679 4680 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4681 this = self._parse_type() 4682 if not this: 4683 return None 4684 4685 if isinstance(this, exp.Column) and not this.table: 4686 this = exp.var(this.name.upper()) 4687 4688 return self.expression( 4689 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4690 ) 4691 4692 def _parse_types( 4693 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4694 ) -> t.Optional[exp.Expression]: 4695 index = self._index 4696 4697 this: t.Optional[exp.Expression] = None 4698 prefix = self._match_text_seq("SYSUDTLIB", ".") 4699 4700 if not self._match_set(self.TYPE_TOKENS): 4701 identifier = allow_identifiers and self._parse_id_var( 4702 any_token=False, tokens=(TokenType.VAR,) 4703 ) 4704 if isinstance(identifier, exp.Identifier): 4705 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4706 4707 if len(tokens) != 1: 4708 self.raise_error("Unexpected identifier", self._prev) 4709 4710 if tokens[0].token_type in self.TYPE_TOKENS: 4711 self._prev = tokens[0] 4712 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4713 type_name = identifier.name 4714 4715 while self._match(TokenType.DOT): 4716 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4717 4718 this = exp.DataType.build(type_name, udt=True) 4719 else: 4720 self._retreat(self._index - 1) 4721 return None 4722 else: 4723 return None 4724 4725 type_token = self._prev.token_type 4726 4727 if type_token == TokenType.PSEUDO_TYPE: 4728 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4729 4730 if type_token == TokenType.OBJECT_IDENTIFIER: 4731 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4732 4733 # https://materialize.com/docs/sql/types/map/ 4734 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4735 key_type = self._parse_types( 4736 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4737 ) 4738 if not self._match(TokenType.FARROW): 4739 self._retreat(index) 4740 return None 4741 4742 value_type = self._parse_types( 4743 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4744 ) 4745 if not self._match(TokenType.R_BRACKET): 4746 self._retreat(index) 4747 return None 4748 4749 return exp.DataType( 4750 this=exp.DataType.Type.MAP, 4751 expressions=[key_type, value_type], 4752 nested=True, 4753 prefix=prefix, 4754 ) 4755 4756 nested = type_token in self.NESTED_TYPE_TOKENS 4757 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4758 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4759 expressions = None 4760 maybe_func = False 4761 4762 if self._match(TokenType.L_PAREN): 4763 if is_struct: 4764 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4765 elif nested: 4766 expressions = self._parse_csv( 4767 lambda: self._parse_types( 4768 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4769 ) 4770 ) 4771 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4772 this = expressions[0] 4773 this.set("nullable", True) 4774 self._match_r_paren() 4775 return this 4776 elif type_token in self.ENUM_TYPE_TOKENS: 4777 expressions = self._parse_csv(self._parse_equality) 4778 elif is_aggregate: 4779 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4780 any_token=False, tokens=(TokenType.VAR,) 4781 ) 4782 if not func_or_ident or not self._match(TokenType.COMMA): 4783 return None 4784 expressions = self._parse_csv( 4785 lambda: self._parse_types( 4786 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4787 ) 4788 ) 4789 expressions.insert(0, func_or_ident) 4790 else: 4791 expressions = self._parse_csv(self._parse_type_size) 4792 4793 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4794 if type_token == TokenType.VECTOR and len(expressions) == 2: 4795 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4796 4797 if not expressions or not self._match(TokenType.R_PAREN): 4798 self._retreat(index) 4799 return None 4800 4801 maybe_func = True 4802 4803 values: t.Optional[t.List[exp.Expression]] = None 4804 4805 if nested and self._match(TokenType.LT): 4806 if is_struct: 4807 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4808 else: 4809 expressions = self._parse_csv( 4810 lambda: self._parse_types( 4811 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4812 ) 4813 ) 4814 4815 if not self._match(TokenType.GT): 4816 self.raise_error("Expecting >") 4817 4818 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4819 values = self._parse_csv(self._parse_assignment) 4820 if not values and is_struct: 4821 values = None 4822 self._retreat(self._index - 1) 4823 else: 4824 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4825 4826 if type_token in self.TIMESTAMPS: 4827 if self._match_text_seq("WITH", "TIME", "ZONE"): 4828 maybe_func = False 4829 tz_type = ( 4830 exp.DataType.Type.TIMETZ 4831 if type_token in self.TIMES 4832 else exp.DataType.Type.TIMESTAMPTZ 4833 ) 4834 this = exp.DataType(this=tz_type, expressions=expressions) 4835 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4836 maybe_func = False 4837 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4838 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4839 maybe_func = False 4840 elif type_token == TokenType.INTERVAL: 4841 unit = self._parse_var(upper=True) 4842 if unit: 4843 if self._match_text_seq("TO"): 4844 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4845 4846 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4847 else: 4848 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4849 4850 if maybe_func and check_func: 4851 index2 = self._index 4852 peek = self._parse_string() 4853 4854 if not peek: 4855 self._retreat(index) 4856 return None 4857 4858 self._retreat(index2) 4859 4860 if not this: 4861 if self._match_text_seq("UNSIGNED"): 4862 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4863 if not unsigned_type_token: 4864 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4865 4866 type_token = unsigned_type_token or type_token 4867 4868 this = exp.DataType( 4869 this=exp.DataType.Type[type_token.value], 4870 expressions=expressions, 4871 nested=nested, 4872 prefix=prefix, 4873 ) 4874 4875 # Empty arrays/structs are allowed 4876 if values is not None: 4877 cls = exp.Struct if is_struct else exp.Array 4878 this = exp.cast(cls(expressions=values), this, copy=False) 4879 4880 elif expressions: 4881 this.set("expressions", expressions) 4882 4883 # https://materialize.com/docs/sql/types/list/#type-name 4884 while self._match(TokenType.LIST): 4885 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4886 4887 index = self._index 4888 4889 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4890 matched_array = self._match(TokenType.ARRAY) 4891 4892 while self._curr: 4893 datatype_token = self._prev.token_type 4894 matched_l_bracket = self._match(TokenType.L_BRACKET) 4895 if not matched_l_bracket and not matched_array: 4896 break 4897 4898 matched_array = False 4899 values = self._parse_csv(self._parse_assignment) or None 4900 if ( 4901 values 4902 and not schema 4903 and ( 4904 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4905 ) 4906 ): 4907 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4908 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4909 self._retreat(index) 4910 break 4911 4912 this = exp.DataType( 4913 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4914 ) 4915 self._match(TokenType.R_BRACKET) 4916 4917 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4918 converter = self.TYPE_CONVERTERS.get(this.this) 4919 if converter: 4920 this = converter(t.cast(exp.DataType, this)) 4921 4922 return this 4923 4924 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4925 index = self._index 4926 4927 if ( 4928 self._curr 4929 and self._next 4930 and self._curr.token_type in self.TYPE_TOKENS 4931 and self._next.token_type in self.TYPE_TOKENS 4932 ): 4933 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4934 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4935 this = self._parse_id_var() 4936 else: 4937 this = ( 4938 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4939 or self._parse_id_var() 4940 ) 4941 4942 self._match(TokenType.COLON) 4943 4944 if ( 4945 type_required 4946 and not isinstance(this, exp.DataType) 4947 and not self._match_set(self.TYPE_TOKENS, advance=False) 4948 ): 4949 self._retreat(index) 4950 return self._parse_types() 4951 4952 return self._parse_column_def(this) 4953 4954 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4955 if not self._match_text_seq("AT", "TIME", "ZONE"): 4956 return this 4957 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4958 4959 def _parse_column(self) -> t.Optional[exp.Expression]: 4960 this = self._parse_column_reference() 4961 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4962 4963 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4964 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4965 4966 return column 4967 4968 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4969 this = self._parse_field() 4970 if ( 4971 not this 4972 and self._match(TokenType.VALUES, advance=False) 4973 and self.VALUES_FOLLOWED_BY_PAREN 4974 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4975 ): 4976 this = self._parse_id_var() 4977 4978 if isinstance(this, exp.Identifier): 4979 # We bubble up comments from the Identifier to the Column 4980 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4981 4982 return this 4983 4984 def _parse_colon_as_variant_extract( 4985 self, this: t.Optional[exp.Expression] 4986 ) -> t.Optional[exp.Expression]: 4987 casts = [] 4988 json_path = [] 4989 escape = None 4990 4991 while self._match(TokenType.COLON): 4992 start_index = self._index 4993 4994 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4995 path = self._parse_column_ops( 4996 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4997 ) 4998 4999 # The cast :: operator has a lower precedence than the extraction operator :, so 5000 # we rearrange the AST appropriately to avoid casting the JSON path 5001 while isinstance(path, exp.Cast): 5002 casts.append(path.to) 5003 path = path.this 5004 5005 if casts: 5006 dcolon_offset = next( 5007 i 5008 for i, t in enumerate(self._tokens[start_index:]) 5009 if t.token_type == TokenType.DCOLON 5010 ) 5011 end_token = self._tokens[start_index + dcolon_offset - 1] 5012 else: 5013 end_token = self._prev 5014 5015 if path: 5016 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5017 # it'll roundtrip to a string literal in GET_PATH 5018 if isinstance(path, exp.Identifier) and path.quoted: 5019 escape = True 5020 5021 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5022 5023 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5024 # Databricks transforms it back to the colon/dot notation 5025 if json_path: 5026 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5027 5028 if json_path_expr: 5029 json_path_expr.set("escape", escape) 5030 5031 this = self.expression( 5032 exp.JSONExtract, 5033 this=this, 5034 expression=json_path_expr, 5035 variant_extract=True, 5036 ) 5037 5038 while casts: 5039 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5040 5041 return this 5042 5043 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5044 return self._parse_types() 5045 5046 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5047 this = self._parse_bracket(this) 5048 5049 while self._match_set(self.COLUMN_OPERATORS): 5050 op_token = self._prev.token_type 5051 op = self.COLUMN_OPERATORS.get(op_token) 5052 5053 if op_token == TokenType.DCOLON: 5054 field = self._parse_dcolon() 5055 if not field: 5056 self.raise_error("Expected type") 5057 elif op and self._curr: 5058 field = self._parse_column_reference() or self._parse_bracket() 5059 else: 5060 field = self._parse_field(any_token=True, anonymous_func=True) 5061 5062 if isinstance(field, exp.Func) and this: 5063 # bigquery allows function calls like x.y.count(...) 5064 # SAFE.SUBSTR(...) 5065 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5066 this = exp.replace_tree( 5067 this, 5068 lambda n: ( 5069 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5070 if n.table 5071 else n.this 5072 ) 5073 if isinstance(n, exp.Column) 5074 else n, 5075 ) 5076 5077 if op: 5078 this = op(self, this, field) 5079 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5080 this = self.expression( 5081 exp.Column, 5082 comments=this.comments, 5083 this=field, 5084 table=this.this, 5085 db=this.args.get("table"), 5086 catalog=this.args.get("db"), 5087 ) 5088 else: 5089 this = self.expression(exp.Dot, this=this, expression=field) 5090 5091 this = self._parse_bracket(this) 5092 5093 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5094 5095 def _parse_primary(self) -> t.Optional[exp.Expression]: 5096 if self._match_set(self.PRIMARY_PARSERS): 5097 token_type = self._prev.token_type 5098 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5099 5100 if token_type == TokenType.STRING: 5101 expressions = [primary] 5102 while self._match(TokenType.STRING): 5103 expressions.append(exp.Literal.string(self._prev.text)) 5104 5105 if len(expressions) > 1: 5106 return self.expression(exp.Concat, expressions=expressions) 5107 5108 return primary 5109 5110 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5111 return exp.Literal.number(f"0.{self._prev.text}") 5112 5113 if self._match(TokenType.L_PAREN): 5114 comments = self._prev_comments 5115 query = self._parse_select() 5116 5117 if query: 5118 expressions = [query] 5119 else: 5120 expressions = self._parse_expressions() 5121 5122 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5123 5124 if not this and self._match(TokenType.R_PAREN, advance=False): 5125 this = self.expression(exp.Tuple) 5126 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5127 this = self._parse_subquery(this=this, parse_alias=False) 5128 elif isinstance(this, exp.Subquery): 5129 this = self._parse_subquery( 5130 this=self._parse_set_operations(this), parse_alias=False 5131 ) 5132 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5133 this = self.expression(exp.Tuple, expressions=expressions) 5134 else: 5135 this = self.expression(exp.Paren, this=this) 5136 5137 if this: 5138 this.add_comments(comments) 5139 5140 self._match_r_paren(expression=this) 5141 return this 5142 5143 return None 5144 5145 def _parse_field( 5146 self, 5147 any_token: bool = False, 5148 tokens: t.Optional[t.Collection[TokenType]] = None, 5149 anonymous_func: bool = False, 5150 ) -> t.Optional[exp.Expression]: 5151 if anonymous_func: 5152 field = ( 5153 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5154 or self._parse_primary() 5155 ) 5156 else: 5157 field = self._parse_primary() or self._parse_function( 5158 anonymous=anonymous_func, any_token=any_token 5159 ) 5160 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5161 5162 def _parse_function( 5163 self, 5164 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5165 anonymous: bool = False, 5166 optional_parens: bool = True, 5167 any_token: bool = False, 5168 ) -> t.Optional[exp.Expression]: 5169 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5170 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5171 fn_syntax = False 5172 if ( 5173 self._match(TokenType.L_BRACE, advance=False) 5174 and self._next 5175 and self._next.text.upper() == "FN" 5176 ): 5177 self._advance(2) 5178 fn_syntax = True 5179 5180 func = self._parse_function_call( 5181 functions=functions, 5182 anonymous=anonymous, 5183 optional_parens=optional_parens, 5184 any_token=any_token, 5185 ) 5186 5187 if fn_syntax: 5188 self._match(TokenType.R_BRACE) 5189 5190 return func 5191 5192 def _parse_function_call( 5193 self, 5194 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5195 anonymous: bool = False, 5196 optional_parens: bool = True, 5197 any_token: bool = False, 5198 ) -> t.Optional[exp.Expression]: 5199 if not self._curr: 5200 return None 5201 5202 comments = self._curr.comments 5203 token_type = self._curr.token_type 5204 this = self._curr.text 5205 upper = this.upper() 5206 5207 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5208 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5209 self._advance() 5210 return self._parse_window(parser(self)) 5211 5212 if not self._next or self._next.token_type != TokenType.L_PAREN: 5213 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5214 self._advance() 5215 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5216 5217 return None 5218 5219 if any_token: 5220 if token_type in self.RESERVED_TOKENS: 5221 return None 5222 elif token_type not in self.FUNC_TOKENS: 5223 return None 5224 5225 self._advance(2) 5226 5227 parser = self.FUNCTION_PARSERS.get(upper) 5228 if parser and not anonymous: 5229 this = parser(self) 5230 else: 5231 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5232 5233 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5234 this = self.expression( 5235 subquery_predicate, comments=comments, this=self._parse_select() 5236 ) 5237 self._match_r_paren() 5238 return this 5239 5240 if functions is None: 5241 functions = self.FUNCTIONS 5242 5243 function = functions.get(upper) 5244 5245 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5246 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5247 5248 if alias: 5249 args = self._kv_to_prop_eq(args) 5250 5251 if function and not anonymous: 5252 if "dialect" in function.__code__.co_varnames: 5253 func = function(args, dialect=self.dialect) 5254 else: 5255 func = function(args) 5256 5257 func = self.validate_expression(func, args) 5258 if not self.dialect.NORMALIZE_FUNCTIONS: 5259 func.meta["name"] = this 5260 5261 this = func 5262 else: 5263 if token_type == TokenType.IDENTIFIER: 5264 this = exp.Identifier(this=this, quoted=True) 5265 this = self.expression(exp.Anonymous, this=this, expressions=args) 5266 5267 if isinstance(this, exp.Expression): 5268 this.add_comments(comments) 5269 5270 self._match_r_paren(this) 5271 return self._parse_window(this) 5272 5273 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5274 return expression 5275 5276 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5277 transformed = [] 5278 5279 for index, e in enumerate(expressions): 5280 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5281 if isinstance(e, exp.Alias): 5282 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5283 5284 if not isinstance(e, exp.PropertyEQ): 5285 e = self.expression( 5286 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5287 ) 5288 5289 if isinstance(e.this, exp.Column): 5290 e.this.replace(e.this.this) 5291 else: 5292 e = self._to_prop_eq(e, index) 5293 5294 transformed.append(e) 5295 5296 return transformed 5297 5298 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5299 return self._parse_column_def(self._parse_id_var()) 5300 5301 def _parse_user_defined_function( 5302 self, kind: t.Optional[TokenType] = None 5303 ) -> t.Optional[exp.Expression]: 5304 this = self._parse_id_var() 5305 5306 while self._match(TokenType.DOT): 5307 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5308 5309 if not self._match(TokenType.L_PAREN): 5310 return this 5311 5312 expressions = self._parse_csv(self._parse_function_parameter) 5313 self._match_r_paren() 5314 return self.expression( 5315 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5316 ) 5317 5318 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5319 literal = self._parse_primary() 5320 if literal: 5321 return self.expression(exp.Introducer, this=token.text, expression=literal) 5322 5323 return self.expression(exp.Identifier, this=token.text) 5324 5325 def _parse_session_parameter(self) -> exp.SessionParameter: 5326 kind = None 5327 this = self._parse_id_var() or self._parse_primary() 5328 5329 if this and self._match(TokenType.DOT): 5330 kind = this.name 5331 this = self._parse_var() or self._parse_primary() 5332 5333 return self.expression(exp.SessionParameter, this=this, kind=kind) 5334 5335 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5336 return self._parse_id_var() 5337 5338 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5339 index = self._index 5340 5341 if self._match(TokenType.L_PAREN): 5342 expressions = t.cast( 5343 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5344 ) 5345 5346 if not self._match(TokenType.R_PAREN): 5347 self._retreat(index) 5348 else: 5349 expressions = [self._parse_lambda_arg()] 5350 5351 if self._match_set(self.LAMBDAS): 5352 return self.LAMBDAS[self._prev.token_type](self, expressions) 5353 5354 self._retreat(index) 5355 5356 this: t.Optional[exp.Expression] 5357 5358 if self._match(TokenType.DISTINCT): 5359 this = self.expression( 5360 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5361 ) 5362 else: 5363 this = self._parse_select_or_expression(alias=alias) 5364 5365 return self._parse_limit( 5366 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5367 ) 5368 5369 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5370 index = self._index 5371 if not self._match(TokenType.L_PAREN): 5372 return this 5373 5374 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5375 # expr can be of both types 5376 if self._match_set(self.SELECT_START_TOKENS): 5377 self._retreat(index) 5378 return this 5379 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5380 self._match_r_paren() 5381 return self.expression(exp.Schema, this=this, expressions=args) 5382 5383 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5384 return self._parse_column_def(self._parse_field(any_token=True)) 5385 5386 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5387 # column defs are not really columns, they're identifiers 5388 if isinstance(this, exp.Column): 5389 this = this.this 5390 5391 kind = self._parse_types(schema=True) 5392 5393 if self._match_text_seq("FOR", "ORDINALITY"): 5394 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5395 5396 constraints: t.List[exp.Expression] = [] 5397 5398 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5399 ("ALIAS", "MATERIALIZED") 5400 ): 5401 persisted = self._prev.text.upper() == "MATERIALIZED" 5402 constraint_kind = exp.ComputedColumnConstraint( 5403 this=self._parse_assignment(), 5404 persisted=persisted or self._match_text_seq("PERSISTED"), 5405 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5406 ) 5407 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5408 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5409 self._match(TokenType.ALIAS) 5410 constraints.append( 5411 self.expression( 5412 exp.ColumnConstraint, 5413 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5414 ) 5415 ) 5416 5417 while True: 5418 constraint = self._parse_column_constraint() 5419 if not constraint: 5420 break 5421 constraints.append(constraint) 5422 5423 if not kind and not constraints: 5424 return this 5425 5426 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5427 5428 def _parse_auto_increment( 5429 self, 5430 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5431 start = None 5432 increment = None 5433 5434 if self._match(TokenType.L_PAREN, advance=False): 5435 args = self._parse_wrapped_csv(self._parse_bitwise) 5436 start = seq_get(args, 0) 5437 increment = seq_get(args, 1) 5438 elif self._match_text_seq("START"): 5439 start = self._parse_bitwise() 5440 self._match_text_seq("INCREMENT") 5441 increment = self._parse_bitwise() 5442 5443 if start and increment: 5444 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5445 5446 return exp.AutoIncrementColumnConstraint() 5447 5448 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5449 if not self._match_text_seq("REFRESH"): 5450 self._retreat(self._index - 1) 5451 return None 5452 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5453 5454 def _parse_compress(self) -> exp.CompressColumnConstraint: 5455 if self._match(TokenType.L_PAREN, advance=False): 5456 return self.expression( 5457 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5458 ) 5459 5460 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5461 5462 def _parse_generated_as_identity( 5463 self, 5464 ) -> ( 5465 exp.GeneratedAsIdentityColumnConstraint 5466 | exp.ComputedColumnConstraint 5467 | exp.GeneratedAsRowColumnConstraint 5468 ): 5469 if self._match_text_seq("BY", "DEFAULT"): 5470 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5471 this = self.expression( 5472 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5473 ) 5474 else: 5475 self._match_text_seq("ALWAYS") 5476 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5477 5478 self._match(TokenType.ALIAS) 5479 5480 if self._match_text_seq("ROW"): 5481 start = self._match_text_seq("START") 5482 if not start: 5483 self._match(TokenType.END) 5484 hidden = self._match_text_seq("HIDDEN") 5485 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5486 5487 identity = self._match_text_seq("IDENTITY") 5488 5489 if self._match(TokenType.L_PAREN): 5490 if self._match(TokenType.START_WITH): 5491 this.set("start", self._parse_bitwise()) 5492 if self._match_text_seq("INCREMENT", "BY"): 5493 this.set("increment", self._parse_bitwise()) 5494 if self._match_text_seq("MINVALUE"): 5495 this.set("minvalue", self._parse_bitwise()) 5496 if self._match_text_seq("MAXVALUE"): 5497 this.set("maxvalue", self._parse_bitwise()) 5498 5499 if self._match_text_seq("CYCLE"): 5500 this.set("cycle", True) 5501 elif self._match_text_seq("NO", "CYCLE"): 5502 this.set("cycle", False) 5503 5504 if not identity: 5505 this.set("expression", self._parse_range()) 5506 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5507 args = self._parse_csv(self._parse_bitwise) 5508 this.set("start", seq_get(args, 0)) 5509 this.set("increment", seq_get(args, 1)) 5510 5511 self._match_r_paren() 5512 5513 return this 5514 5515 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5516 self._match_text_seq("LENGTH") 5517 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5518 5519 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5520 if self._match_text_seq("NULL"): 5521 return self.expression(exp.NotNullColumnConstraint) 5522 if self._match_text_seq("CASESPECIFIC"): 5523 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5524 if self._match_text_seq("FOR", "REPLICATION"): 5525 return self.expression(exp.NotForReplicationColumnConstraint) 5526 5527 # Unconsume the `NOT` token 5528 self._retreat(self._index - 1) 5529 return None 5530 5531 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5532 if self._match(TokenType.CONSTRAINT): 5533 this = self._parse_id_var() 5534 else: 5535 this = None 5536 5537 if self._match_texts(self.CONSTRAINT_PARSERS): 5538 return self.expression( 5539 exp.ColumnConstraint, 5540 this=this, 5541 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5542 ) 5543 5544 return this 5545 5546 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5547 if not self._match(TokenType.CONSTRAINT): 5548 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5549 5550 return self.expression( 5551 exp.Constraint, 5552 this=self._parse_id_var(), 5553 expressions=self._parse_unnamed_constraints(), 5554 ) 5555 5556 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5557 constraints = [] 5558 while True: 5559 constraint = self._parse_unnamed_constraint() or self._parse_function() 5560 if not constraint: 5561 break 5562 constraints.append(constraint) 5563 5564 return constraints 5565 5566 def _parse_unnamed_constraint( 5567 self, constraints: t.Optional[t.Collection[str]] = None 5568 ) -> t.Optional[exp.Expression]: 5569 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5570 constraints or self.CONSTRAINT_PARSERS 5571 ): 5572 return None 5573 5574 constraint = self._prev.text.upper() 5575 if constraint not in self.CONSTRAINT_PARSERS: 5576 self.raise_error(f"No parser found for schema constraint {constraint}.") 5577 5578 return self.CONSTRAINT_PARSERS[constraint](self) 5579 5580 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5581 return self._parse_id_var(any_token=False) 5582 5583 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5584 self._match_text_seq("KEY") 5585 return self.expression( 5586 exp.UniqueColumnConstraint, 5587 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5588 this=self._parse_schema(self._parse_unique_key()), 5589 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5590 on_conflict=self._parse_on_conflict(), 5591 ) 5592 5593 def _parse_key_constraint_options(self) -> t.List[str]: 5594 options = [] 5595 while True: 5596 if not self._curr: 5597 break 5598 5599 if self._match(TokenType.ON): 5600 action = None 5601 on = self._advance_any() and self._prev.text 5602 5603 if self._match_text_seq("NO", "ACTION"): 5604 action = "NO ACTION" 5605 elif self._match_text_seq("CASCADE"): 5606 action = "CASCADE" 5607 elif self._match_text_seq("RESTRICT"): 5608 action = "RESTRICT" 5609 elif self._match_pair(TokenType.SET, TokenType.NULL): 5610 action = "SET NULL" 5611 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5612 action = "SET DEFAULT" 5613 else: 5614 self.raise_error("Invalid key constraint") 5615 5616 options.append(f"ON {on} {action}") 5617 else: 5618 var = self._parse_var_from_options( 5619 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5620 ) 5621 if not var: 5622 break 5623 options.append(var.name) 5624 5625 return options 5626 5627 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5628 if match and not self._match(TokenType.REFERENCES): 5629 return None 5630 5631 expressions = None 5632 this = self._parse_table(schema=True) 5633 options = self._parse_key_constraint_options() 5634 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5635 5636 def _parse_foreign_key(self) -> exp.ForeignKey: 5637 expressions = self._parse_wrapped_id_vars() 5638 reference = self._parse_references() 5639 options = {} 5640 5641 while self._match(TokenType.ON): 5642 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5643 self.raise_error("Expected DELETE or UPDATE") 5644 5645 kind = self._prev.text.lower() 5646 5647 if self._match_text_seq("NO", "ACTION"): 5648 action = "NO ACTION" 5649 elif self._match(TokenType.SET): 5650 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5651 action = "SET " + self._prev.text.upper() 5652 else: 5653 self._advance() 5654 action = self._prev.text.upper() 5655 5656 options[kind] = action 5657 5658 return self.expression( 5659 exp.ForeignKey, 5660 expressions=expressions, 5661 reference=reference, 5662 **options, # type: ignore 5663 ) 5664 5665 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5666 return self._parse_field() 5667 5668 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5669 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5670 self._retreat(self._index - 1) 5671 return None 5672 5673 id_vars = self._parse_wrapped_id_vars() 5674 return self.expression( 5675 exp.PeriodForSystemTimeConstraint, 5676 this=seq_get(id_vars, 0), 5677 expression=seq_get(id_vars, 1), 5678 ) 5679 5680 def _parse_primary_key( 5681 self, wrapped_optional: bool = False, in_props: bool = False 5682 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5683 desc = ( 5684 self._match_set((TokenType.ASC, TokenType.DESC)) 5685 and self._prev.token_type == TokenType.DESC 5686 ) 5687 5688 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5689 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5690 5691 expressions = self._parse_wrapped_csv( 5692 self._parse_primary_key_part, optional=wrapped_optional 5693 ) 5694 options = self._parse_key_constraint_options() 5695 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5696 5697 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5698 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5699 5700 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5701 """ 5702 Parses a datetime column in ODBC format. We parse the column into the corresponding 5703 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5704 same as we did for `DATE('yyyy-mm-dd')`. 5705 5706 Reference: 5707 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5708 """ 5709 self._match(TokenType.VAR) 5710 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5711 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5712 if not self._match(TokenType.R_BRACE): 5713 self.raise_error("Expected }") 5714 return expression 5715 5716 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5717 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5718 return this 5719 5720 bracket_kind = self._prev.token_type 5721 if ( 5722 bracket_kind == TokenType.L_BRACE 5723 and self._curr 5724 and self._curr.token_type == TokenType.VAR 5725 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5726 ): 5727 return self._parse_odbc_datetime_literal() 5728 5729 expressions = self._parse_csv( 5730 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5731 ) 5732 5733 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5734 self.raise_error("Expected ]") 5735 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5736 self.raise_error("Expected }") 5737 5738 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5739 if bracket_kind == TokenType.L_BRACE: 5740 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5741 elif not this: 5742 this = build_array_constructor( 5743 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5744 ) 5745 else: 5746 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5747 if constructor_type: 5748 return build_array_constructor( 5749 constructor_type, 5750 args=expressions, 5751 bracket_kind=bracket_kind, 5752 dialect=self.dialect, 5753 ) 5754 5755 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5756 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5757 5758 self._add_comments(this) 5759 return self._parse_bracket(this) 5760 5761 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5762 if self._match(TokenType.COLON): 5763 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5764 return this 5765 5766 def _parse_case(self) -> t.Optional[exp.Expression]: 5767 ifs = [] 5768 default = None 5769 5770 comments = self._prev_comments 5771 expression = self._parse_assignment() 5772 5773 while self._match(TokenType.WHEN): 5774 this = self._parse_assignment() 5775 self._match(TokenType.THEN) 5776 then = self._parse_assignment() 5777 ifs.append(self.expression(exp.If, this=this, true=then)) 5778 5779 if self._match(TokenType.ELSE): 5780 default = self._parse_assignment() 5781 5782 if not self._match(TokenType.END): 5783 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5784 default = exp.column("interval") 5785 else: 5786 self.raise_error("Expected END after CASE", self._prev) 5787 5788 return self.expression( 5789 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5790 ) 5791 5792 def _parse_if(self) -> t.Optional[exp.Expression]: 5793 if self._match(TokenType.L_PAREN): 5794 args = self._parse_csv(self._parse_assignment) 5795 this = self.validate_expression(exp.If.from_arg_list(args), args) 5796 self._match_r_paren() 5797 else: 5798 index = self._index - 1 5799 5800 if self.NO_PAREN_IF_COMMANDS and index == 0: 5801 return self._parse_as_command(self._prev) 5802 5803 condition = self._parse_assignment() 5804 5805 if not condition: 5806 self._retreat(index) 5807 return None 5808 5809 self._match(TokenType.THEN) 5810 true = self._parse_assignment() 5811 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5812 self._match(TokenType.END) 5813 this = self.expression(exp.If, this=condition, true=true, false=false) 5814 5815 return this 5816 5817 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5818 if not self._match_text_seq("VALUE", "FOR"): 5819 self._retreat(self._index - 1) 5820 return None 5821 5822 return self.expression( 5823 exp.NextValueFor, 5824 this=self._parse_column(), 5825 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5826 ) 5827 5828 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5829 this = self._parse_function() or self._parse_var_or_string(upper=True) 5830 5831 if self._match(TokenType.FROM): 5832 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5833 5834 if not self._match(TokenType.COMMA): 5835 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5836 5837 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5838 5839 def _parse_gap_fill(self) -> exp.GapFill: 5840 self._match(TokenType.TABLE) 5841 this = self._parse_table() 5842 5843 self._match(TokenType.COMMA) 5844 args = [this, *self._parse_csv(self._parse_lambda)] 5845 5846 gap_fill = exp.GapFill.from_arg_list(args) 5847 return self.validate_expression(gap_fill, args) 5848 5849 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5850 this = self._parse_assignment() 5851 5852 if not self._match(TokenType.ALIAS): 5853 if self._match(TokenType.COMMA): 5854 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5855 5856 self.raise_error("Expected AS after CAST") 5857 5858 fmt = None 5859 to = self._parse_types() 5860 5861 if self._match(TokenType.FORMAT): 5862 fmt_string = self._parse_string() 5863 fmt = self._parse_at_time_zone(fmt_string) 5864 5865 if not to: 5866 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5867 if to.this in exp.DataType.TEMPORAL_TYPES: 5868 this = self.expression( 5869 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5870 this=this, 5871 format=exp.Literal.string( 5872 format_time( 5873 fmt_string.this if fmt_string else "", 5874 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5875 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5876 ) 5877 ), 5878 safe=safe, 5879 ) 5880 5881 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5882 this.set("zone", fmt.args["zone"]) 5883 return this 5884 elif not to: 5885 self.raise_error("Expected TYPE after CAST") 5886 elif isinstance(to, exp.Identifier): 5887 to = exp.DataType.build(to.name, udt=True) 5888 elif to.this == exp.DataType.Type.CHAR: 5889 if self._match(TokenType.CHARACTER_SET): 5890 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5891 5892 return self.expression( 5893 exp.Cast if strict else exp.TryCast, 5894 this=this, 5895 to=to, 5896 format=fmt, 5897 safe=safe, 5898 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5899 ) 5900 5901 def _parse_string_agg(self) -> exp.Expression: 5902 if self._match(TokenType.DISTINCT): 5903 args: t.List[t.Optional[exp.Expression]] = [ 5904 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5905 ] 5906 if self._match(TokenType.COMMA): 5907 args.extend(self._parse_csv(self._parse_assignment)) 5908 else: 5909 args = self._parse_csv(self._parse_assignment) # type: ignore 5910 5911 index = self._index 5912 if not self._match(TokenType.R_PAREN) and args: 5913 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5914 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5915 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5916 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5917 5918 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5919 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5920 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5921 if not self._match_text_seq("WITHIN", "GROUP"): 5922 self._retreat(index) 5923 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5924 5925 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5926 order = self._parse_order(this=seq_get(args, 0)) 5927 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5928 5929 def _parse_convert( 5930 self, strict: bool, safe: t.Optional[bool] = None 5931 ) -> t.Optional[exp.Expression]: 5932 this = self._parse_bitwise() 5933 5934 if self._match(TokenType.USING): 5935 to: t.Optional[exp.Expression] = self.expression( 5936 exp.CharacterSet, this=self._parse_var() 5937 ) 5938 elif self._match(TokenType.COMMA): 5939 to = self._parse_types() 5940 else: 5941 to = None 5942 5943 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5944 5945 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5946 """ 5947 There are generally two variants of the DECODE function: 5948 5949 - DECODE(bin, charset) 5950 - DECODE(expression, search, result [, search, result] ... [, default]) 5951 5952 The second variant will always be parsed into a CASE expression. Note that NULL 5953 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5954 instead of relying on pattern matching. 5955 """ 5956 args = self._parse_csv(self._parse_assignment) 5957 5958 if len(args) < 3: 5959 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5960 5961 expression, *expressions = args 5962 if not expression: 5963 return None 5964 5965 ifs = [] 5966 for search, result in zip(expressions[::2], expressions[1::2]): 5967 if not search or not result: 5968 return None 5969 5970 if isinstance(search, exp.Literal): 5971 ifs.append( 5972 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5973 ) 5974 elif isinstance(search, exp.Null): 5975 ifs.append( 5976 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5977 ) 5978 else: 5979 cond = exp.or_( 5980 exp.EQ(this=expression.copy(), expression=search), 5981 exp.and_( 5982 exp.Is(this=expression.copy(), expression=exp.Null()), 5983 exp.Is(this=search.copy(), expression=exp.Null()), 5984 copy=False, 5985 ), 5986 copy=False, 5987 ) 5988 ifs.append(exp.If(this=cond, true=result)) 5989 5990 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5991 5992 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5993 self._match_text_seq("KEY") 5994 key = self._parse_column() 5995 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5996 self._match_text_seq("VALUE") 5997 value = self._parse_bitwise() 5998 5999 if not key and not value: 6000 return None 6001 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6002 6003 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6004 if not this or not self._match_text_seq("FORMAT", "JSON"): 6005 return this 6006 6007 return self.expression(exp.FormatJson, this=this) 6008 6009 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6010 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6011 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6012 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6013 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6014 else: 6015 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6016 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6017 6018 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6019 6020 if not empty and not error and not null: 6021 return None 6022 6023 return self.expression( 6024 exp.OnCondition, 6025 empty=empty, 6026 error=error, 6027 null=null, 6028 ) 6029 6030 def _parse_on_handling( 6031 self, on: str, *values: str 6032 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6033 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6034 for value in values: 6035 if self._match_text_seq(value, "ON", on): 6036 return f"{value} ON {on}" 6037 6038 index = self._index 6039 if self._match(TokenType.DEFAULT): 6040 default_value = self._parse_bitwise() 6041 if self._match_text_seq("ON", on): 6042 return default_value 6043 6044 self._retreat(index) 6045 6046 return None 6047 6048 @t.overload 6049 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6050 6051 @t.overload 6052 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6053 6054 def _parse_json_object(self, agg=False): 6055 star = self._parse_star() 6056 expressions = ( 6057 [star] 6058 if star 6059 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6060 ) 6061 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6062 6063 unique_keys = None 6064 if self._match_text_seq("WITH", "UNIQUE"): 6065 unique_keys = True 6066 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6067 unique_keys = False 6068 6069 self._match_text_seq("KEYS") 6070 6071 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6072 self._parse_type() 6073 ) 6074 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6075 6076 return self.expression( 6077 exp.JSONObjectAgg if agg else exp.JSONObject, 6078 expressions=expressions, 6079 null_handling=null_handling, 6080 unique_keys=unique_keys, 6081 return_type=return_type, 6082 encoding=encoding, 6083 ) 6084 6085 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6086 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6087 if not self._match_text_seq("NESTED"): 6088 this = self._parse_id_var() 6089 kind = self._parse_types(allow_identifiers=False) 6090 nested = None 6091 else: 6092 this = None 6093 kind = None 6094 nested = True 6095 6096 path = self._match_text_seq("PATH") and self._parse_string() 6097 nested_schema = nested and self._parse_json_schema() 6098 6099 return self.expression( 6100 exp.JSONColumnDef, 6101 this=this, 6102 kind=kind, 6103 path=path, 6104 nested_schema=nested_schema, 6105 ) 6106 6107 def _parse_json_schema(self) -> exp.JSONSchema: 6108 self._match_text_seq("COLUMNS") 6109 return self.expression( 6110 exp.JSONSchema, 6111 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6112 ) 6113 6114 def _parse_json_table(self) -> exp.JSONTable: 6115 this = self._parse_format_json(self._parse_bitwise()) 6116 path = self._match(TokenType.COMMA) and self._parse_string() 6117 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6118 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6119 schema = self._parse_json_schema() 6120 6121 return exp.JSONTable( 6122 this=this, 6123 schema=schema, 6124 path=path, 6125 error_handling=error_handling, 6126 empty_handling=empty_handling, 6127 ) 6128 6129 def _parse_match_against(self) -> exp.MatchAgainst: 6130 expressions = self._parse_csv(self._parse_column) 6131 6132 self._match_text_seq(")", "AGAINST", "(") 6133 6134 this = self._parse_string() 6135 6136 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6137 modifier = "IN NATURAL LANGUAGE MODE" 6138 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6139 modifier = f"{modifier} WITH QUERY EXPANSION" 6140 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6141 modifier = "IN BOOLEAN MODE" 6142 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6143 modifier = "WITH QUERY EXPANSION" 6144 else: 6145 modifier = None 6146 6147 return self.expression( 6148 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6149 ) 6150 6151 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6152 def _parse_open_json(self) -> exp.OpenJSON: 6153 this = self._parse_bitwise() 6154 path = self._match(TokenType.COMMA) and self._parse_string() 6155 6156 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6157 this = self._parse_field(any_token=True) 6158 kind = self._parse_types() 6159 path = self._parse_string() 6160 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6161 6162 return self.expression( 6163 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6164 ) 6165 6166 expressions = None 6167 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6168 self._match_l_paren() 6169 expressions = self._parse_csv(_parse_open_json_column_def) 6170 6171 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6172 6173 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6174 args = self._parse_csv(self._parse_bitwise) 6175 6176 if self._match(TokenType.IN): 6177 return self.expression( 6178 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6179 ) 6180 6181 if haystack_first: 6182 haystack = seq_get(args, 0) 6183 needle = seq_get(args, 1) 6184 else: 6185 needle = seq_get(args, 0) 6186 haystack = seq_get(args, 1) 6187 6188 return self.expression( 6189 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6190 ) 6191 6192 def _parse_predict(self) -> exp.Predict: 6193 self._match_text_seq("MODEL") 6194 this = self._parse_table() 6195 6196 self._match(TokenType.COMMA) 6197 self._match_text_seq("TABLE") 6198 6199 return self.expression( 6200 exp.Predict, 6201 this=this, 6202 expression=self._parse_table(), 6203 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6204 ) 6205 6206 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6207 args = self._parse_csv(self._parse_table) 6208 return exp.JoinHint(this=func_name.upper(), expressions=args) 6209 6210 def _parse_substring(self) -> exp.Substring: 6211 # Postgres supports the form: substring(string [from int] [for int]) 6212 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6213 6214 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6215 6216 if self._match(TokenType.FROM): 6217 args.append(self._parse_bitwise()) 6218 if self._match(TokenType.FOR): 6219 if len(args) == 1: 6220 args.append(exp.Literal.number(1)) 6221 args.append(self._parse_bitwise()) 6222 6223 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6224 6225 def _parse_trim(self) -> exp.Trim: 6226 # https://www.w3resource.com/sql/character-functions/trim.php 6227 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6228 6229 position = None 6230 collation = None 6231 expression = None 6232 6233 if self._match_texts(self.TRIM_TYPES): 6234 position = self._prev.text.upper() 6235 6236 this = self._parse_bitwise() 6237 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6238 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6239 expression = self._parse_bitwise() 6240 6241 if invert_order: 6242 this, expression = expression, this 6243 6244 if self._match(TokenType.COLLATE): 6245 collation = self._parse_bitwise() 6246 6247 return self.expression( 6248 exp.Trim, this=this, position=position, expression=expression, collation=collation 6249 ) 6250 6251 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6252 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6253 6254 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6255 return self._parse_window(self._parse_id_var(), alias=True) 6256 6257 def _parse_respect_or_ignore_nulls( 6258 self, this: t.Optional[exp.Expression] 6259 ) -> t.Optional[exp.Expression]: 6260 if self._match_text_seq("IGNORE", "NULLS"): 6261 return self.expression(exp.IgnoreNulls, this=this) 6262 if self._match_text_seq("RESPECT", "NULLS"): 6263 return self.expression(exp.RespectNulls, this=this) 6264 return this 6265 6266 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6267 if self._match(TokenType.HAVING): 6268 self._match_texts(("MAX", "MIN")) 6269 max = self._prev.text.upper() != "MIN" 6270 return self.expression( 6271 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6272 ) 6273 6274 return this 6275 6276 def _parse_window( 6277 self, this: t.Optional[exp.Expression], alias: bool = False 6278 ) -> t.Optional[exp.Expression]: 6279 func = this 6280 comments = func.comments if isinstance(func, exp.Expression) else None 6281 6282 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6283 self._match(TokenType.WHERE) 6284 this = self.expression( 6285 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6286 ) 6287 self._match_r_paren() 6288 6289 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6290 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6291 if self._match_text_seq("WITHIN", "GROUP"): 6292 order = self._parse_wrapped(self._parse_order) 6293 this = self.expression(exp.WithinGroup, this=this, expression=order) 6294 6295 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6296 # Some dialects choose to implement and some do not. 6297 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6298 6299 # There is some code above in _parse_lambda that handles 6300 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6301 6302 # The below changes handle 6303 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6304 6305 # Oracle allows both formats 6306 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6307 # and Snowflake chose to do the same for familiarity 6308 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6309 if isinstance(this, exp.AggFunc): 6310 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6311 6312 if ignore_respect and ignore_respect is not this: 6313 ignore_respect.replace(ignore_respect.this) 6314 this = self.expression(ignore_respect.__class__, this=this) 6315 6316 this = self._parse_respect_or_ignore_nulls(this) 6317 6318 # bigquery select from window x AS (partition by ...) 6319 if alias: 6320 over = None 6321 self._match(TokenType.ALIAS) 6322 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6323 return this 6324 else: 6325 over = self._prev.text.upper() 6326 6327 if comments and isinstance(func, exp.Expression): 6328 func.pop_comments() 6329 6330 if not self._match(TokenType.L_PAREN): 6331 return self.expression( 6332 exp.Window, 6333 comments=comments, 6334 this=this, 6335 alias=self._parse_id_var(False), 6336 over=over, 6337 ) 6338 6339 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6340 6341 first = self._match(TokenType.FIRST) 6342 if self._match_text_seq("LAST"): 6343 first = False 6344 6345 partition, order = self._parse_partition_and_order() 6346 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6347 6348 if kind: 6349 self._match(TokenType.BETWEEN) 6350 start = self._parse_window_spec() 6351 self._match(TokenType.AND) 6352 end = self._parse_window_spec() 6353 6354 spec = self.expression( 6355 exp.WindowSpec, 6356 kind=kind, 6357 start=start["value"], 6358 start_side=start["side"], 6359 end=end["value"], 6360 end_side=end["side"], 6361 ) 6362 else: 6363 spec = None 6364 6365 self._match_r_paren() 6366 6367 window = self.expression( 6368 exp.Window, 6369 comments=comments, 6370 this=this, 6371 partition_by=partition, 6372 order=order, 6373 spec=spec, 6374 alias=window_alias, 6375 over=over, 6376 first=first, 6377 ) 6378 6379 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6380 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6381 return self._parse_window(window, alias=alias) 6382 6383 return window 6384 6385 def _parse_partition_and_order( 6386 self, 6387 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6388 return self._parse_partition_by(), self._parse_order() 6389 6390 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6391 self._match(TokenType.BETWEEN) 6392 6393 return { 6394 "value": ( 6395 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6396 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6397 or self._parse_bitwise() 6398 ), 6399 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6400 } 6401 6402 def _parse_alias( 6403 self, this: t.Optional[exp.Expression], explicit: bool = False 6404 ) -> t.Optional[exp.Expression]: 6405 any_token = self._match(TokenType.ALIAS) 6406 comments = self._prev_comments or [] 6407 6408 if explicit and not any_token: 6409 return this 6410 6411 if self._match(TokenType.L_PAREN): 6412 aliases = self.expression( 6413 exp.Aliases, 6414 comments=comments, 6415 this=this, 6416 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6417 ) 6418 self._match_r_paren(aliases) 6419 return aliases 6420 6421 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6422 self.STRING_ALIASES and self._parse_string_as_identifier() 6423 ) 6424 6425 if alias: 6426 comments.extend(alias.pop_comments()) 6427 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6428 column = this.this 6429 6430 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6431 if not this.comments and column and column.comments: 6432 this.comments = column.pop_comments() 6433 6434 return this 6435 6436 def _parse_id_var( 6437 self, 6438 any_token: bool = True, 6439 tokens: t.Optional[t.Collection[TokenType]] = None, 6440 ) -> t.Optional[exp.Expression]: 6441 expression = self._parse_identifier() 6442 if not expression and ( 6443 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6444 ): 6445 quoted = self._prev.token_type == TokenType.STRING 6446 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6447 6448 return expression 6449 6450 def _parse_string(self) -> t.Optional[exp.Expression]: 6451 if self._match_set(self.STRING_PARSERS): 6452 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6453 return self._parse_placeholder() 6454 6455 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6456 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6457 6458 def _parse_number(self) -> t.Optional[exp.Expression]: 6459 if self._match_set(self.NUMERIC_PARSERS): 6460 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6461 return self._parse_placeholder() 6462 6463 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6464 if self._match(TokenType.IDENTIFIER): 6465 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6466 return self._parse_placeholder() 6467 6468 def _parse_var( 6469 self, 6470 any_token: bool = False, 6471 tokens: t.Optional[t.Collection[TokenType]] = None, 6472 upper: bool = False, 6473 ) -> t.Optional[exp.Expression]: 6474 if ( 6475 (any_token and self._advance_any()) 6476 or self._match(TokenType.VAR) 6477 or (self._match_set(tokens) if tokens else False) 6478 ): 6479 return self.expression( 6480 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6481 ) 6482 return self._parse_placeholder() 6483 6484 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6485 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6486 self._advance() 6487 return self._prev 6488 return None 6489 6490 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6491 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6492 6493 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6494 return self._parse_primary() or self._parse_var(any_token=True) 6495 6496 def _parse_null(self) -> t.Optional[exp.Expression]: 6497 if self._match_set(self.NULL_TOKENS): 6498 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6499 return self._parse_placeholder() 6500 6501 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6502 if self._match(TokenType.TRUE): 6503 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6504 if self._match(TokenType.FALSE): 6505 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6506 return self._parse_placeholder() 6507 6508 def _parse_star(self) -> t.Optional[exp.Expression]: 6509 if self._match(TokenType.STAR): 6510 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6511 return self._parse_placeholder() 6512 6513 def _parse_parameter(self) -> exp.Parameter: 6514 this = self._parse_identifier() or self._parse_primary_or_var() 6515 return self.expression(exp.Parameter, this=this) 6516 6517 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6518 if self._match_set(self.PLACEHOLDER_PARSERS): 6519 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6520 if placeholder: 6521 return placeholder 6522 self._advance(-1) 6523 return None 6524 6525 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6526 if not self._match_texts(keywords): 6527 return None 6528 if self._match(TokenType.L_PAREN, advance=False): 6529 return self._parse_wrapped_csv(self._parse_expression) 6530 6531 expression = self._parse_expression() 6532 return [expression] if expression else None 6533 6534 def _parse_csv( 6535 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6536 ) -> t.List[exp.Expression]: 6537 parse_result = parse_method() 6538 items = [parse_result] if parse_result is not None else [] 6539 6540 while self._match(sep): 6541 self._add_comments(parse_result) 6542 parse_result = parse_method() 6543 if parse_result is not None: 6544 items.append(parse_result) 6545 6546 return items 6547 6548 def _parse_tokens( 6549 self, parse_method: t.Callable, expressions: t.Dict 6550 ) -> t.Optional[exp.Expression]: 6551 this = parse_method() 6552 6553 while self._match_set(expressions): 6554 this = self.expression( 6555 expressions[self._prev.token_type], 6556 this=this, 6557 comments=self._prev_comments, 6558 expression=parse_method(), 6559 ) 6560 6561 return this 6562 6563 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6564 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6565 6566 def _parse_wrapped_csv( 6567 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6568 ) -> t.List[exp.Expression]: 6569 return self._parse_wrapped( 6570 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6571 ) 6572 6573 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6574 wrapped = self._match(TokenType.L_PAREN) 6575 if not wrapped and not optional: 6576 self.raise_error("Expecting (") 6577 parse_result = parse_method() 6578 if wrapped: 6579 self._match_r_paren() 6580 return parse_result 6581 6582 def _parse_expressions(self) -> t.List[exp.Expression]: 6583 return self._parse_csv(self._parse_expression) 6584 6585 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6586 return self._parse_select() or self._parse_set_operations( 6587 self._parse_expression() if alias else self._parse_assignment() 6588 ) 6589 6590 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6591 return self._parse_query_modifiers( 6592 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6593 ) 6594 6595 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6596 this = None 6597 if self._match_texts(self.TRANSACTION_KIND): 6598 this = self._prev.text 6599 6600 self._match_texts(("TRANSACTION", "WORK")) 6601 6602 modes = [] 6603 while True: 6604 mode = [] 6605 while self._match(TokenType.VAR): 6606 mode.append(self._prev.text) 6607 6608 if mode: 6609 modes.append(" ".join(mode)) 6610 if not self._match(TokenType.COMMA): 6611 break 6612 6613 return self.expression(exp.Transaction, this=this, modes=modes) 6614 6615 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6616 chain = None 6617 savepoint = None 6618 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6619 6620 self._match_texts(("TRANSACTION", "WORK")) 6621 6622 if self._match_text_seq("TO"): 6623 self._match_text_seq("SAVEPOINT") 6624 savepoint = self._parse_id_var() 6625 6626 if self._match(TokenType.AND): 6627 chain = not self._match_text_seq("NO") 6628 self._match_text_seq("CHAIN") 6629 6630 if is_rollback: 6631 return self.expression(exp.Rollback, savepoint=savepoint) 6632 6633 return self.expression(exp.Commit, chain=chain) 6634 6635 def _parse_refresh(self) -> exp.Refresh: 6636 self._match(TokenType.TABLE) 6637 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6638 6639 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6640 if not self._match_text_seq("ADD"): 6641 return None 6642 6643 self._match(TokenType.COLUMN) 6644 exists_column = self._parse_exists(not_=True) 6645 expression = self._parse_field_def() 6646 6647 if expression: 6648 expression.set("exists", exists_column) 6649 6650 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6651 if self._match_texts(("FIRST", "AFTER")): 6652 position = self._prev.text 6653 column_position = self.expression( 6654 exp.ColumnPosition, this=self._parse_column(), position=position 6655 ) 6656 expression.set("position", column_position) 6657 6658 return expression 6659 6660 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6661 drop = self._match(TokenType.DROP) and self._parse_drop() 6662 if drop and not isinstance(drop, exp.Command): 6663 drop.set("kind", drop.args.get("kind", "COLUMN")) 6664 return drop 6665 6666 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6667 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6668 return self.expression( 6669 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6670 ) 6671 6672 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6673 index = self._index - 1 6674 6675 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6676 return self._parse_csv( 6677 lambda: self.expression( 6678 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6679 ) 6680 ) 6681 6682 self._retreat(index) 6683 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6684 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6685 6686 if self._match_text_seq("ADD", "COLUMNS"): 6687 schema = self._parse_schema() 6688 if schema: 6689 return [schema] 6690 return [] 6691 6692 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6693 6694 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6695 if self._match_texts(self.ALTER_ALTER_PARSERS): 6696 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6697 6698 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6699 # keyword after ALTER we default to parsing this statement 6700 self._match(TokenType.COLUMN) 6701 column = self._parse_field(any_token=True) 6702 6703 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6704 return self.expression(exp.AlterColumn, this=column, drop=True) 6705 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6706 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6707 if self._match(TokenType.COMMENT): 6708 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6709 if self._match_text_seq("DROP", "NOT", "NULL"): 6710 return self.expression( 6711 exp.AlterColumn, 6712 this=column, 6713 drop=True, 6714 allow_null=True, 6715 ) 6716 if self._match_text_seq("SET", "NOT", "NULL"): 6717 return self.expression( 6718 exp.AlterColumn, 6719 this=column, 6720 allow_null=False, 6721 ) 6722 self._match_text_seq("SET", "DATA") 6723 self._match_text_seq("TYPE") 6724 return self.expression( 6725 exp.AlterColumn, 6726 this=column, 6727 dtype=self._parse_types(), 6728 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6729 using=self._match(TokenType.USING) and self._parse_assignment(), 6730 ) 6731 6732 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6733 if self._match_texts(("ALL", "EVEN", "AUTO")): 6734 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6735 6736 self._match_text_seq("KEY", "DISTKEY") 6737 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6738 6739 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6740 if compound: 6741 self._match_text_seq("SORTKEY") 6742 6743 if self._match(TokenType.L_PAREN, advance=False): 6744 return self.expression( 6745 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6746 ) 6747 6748 self._match_texts(("AUTO", "NONE")) 6749 return self.expression( 6750 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6751 ) 6752 6753 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6754 index = self._index - 1 6755 6756 partition_exists = self._parse_exists() 6757 if self._match(TokenType.PARTITION, advance=False): 6758 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6759 6760 self._retreat(index) 6761 return self._parse_csv(self._parse_drop_column) 6762 6763 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6764 if self._match(TokenType.COLUMN): 6765 exists = self._parse_exists() 6766 old_column = self._parse_column() 6767 to = self._match_text_seq("TO") 6768 new_column = self._parse_column() 6769 6770 if old_column is None or to is None or new_column is None: 6771 return None 6772 6773 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6774 6775 self._match_text_seq("TO") 6776 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6777 6778 def _parse_alter_table_set(self) -> exp.AlterSet: 6779 alter_set = self.expression(exp.AlterSet) 6780 6781 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6782 "TABLE", "PROPERTIES" 6783 ): 6784 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6785 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6786 alter_set.set("expressions", [self._parse_assignment()]) 6787 elif self._match_texts(("LOGGED", "UNLOGGED")): 6788 alter_set.set("option", exp.var(self._prev.text.upper())) 6789 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6790 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6791 elif self._match_text_seq("LOCATION"): 6792 alter_set.set("location", self._parse_field()) 6793 elif self._match_text_seq("ACCESS", "METHOD"): 6794 alter_set.set("access_method", self._parse_field()) 6795 elif self._match_text_seq("TABLESPACE"): 6796 alter_set.set("tablespace", self._parse_field()) 6797 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6798 alter_set.set("file_format", [self._parse_field()]) 6799 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6800 alter_set.set("file_format", self._parse_wrapped_options()) 6801 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6802 alter_set.set("copy_options", self._parse_wrapped_options()) 6803 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6804 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6805 else: 6806 if self._match_text_seq("SERDE"): 6807 alter_set.set("serde", self._parse_field()) 6808 6809 alter_set.set("expressions", [self._parse_properties()]) 6810 6811 return alter_set 6812 6813 def _parse_alter(self) -> exp.Alter | exp.Command: 6814 start = self._prev 6815 6816 alter_token = self._match_set(self.ALTERABLES) and self._prev 6817 if not alter_token: 6818 return self._parse_as_command(start) 6819 6820 exists = self._parse_exists() 6821 only = self._match_text_seq("ONLY") 6822 this = self._parse_table(schema=True) 6823 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6824 6825 if self._next: 6826 self._advance() 6827 6828 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6829 if parser: 6830 actions = ensure_list(parser(self)) 6831 not_valid = self._match_text_seq("NOT", "VALID") 6832 options = self._parse_csv(self._parse_property) 6833 6834 if not self._curr and actions: 6835 return self.expression( 6836 exp.Alter, 6837 this=this, 6838 kind=alter_token.text.upper(), 6839 exists=exists, 6840 actions=actions, 6841 only=only, 6842 options=options, 6843 cluster=cluster, 6844 not_valid=not_valid, 6845 ) 6846 6847 return self._parse_as_command(start) 6848 6849 def _parse_merge(self) -> exp.Merge: 6850 self._match(TokenType.INTO) 6851 target = self._parse_table() 6852 6853 if target and self._match(TokenType.ALIAS, advance=False): 6854 target.set("alias", self._parse_table_alias()) 6855 6856 self._match(TokenType.USING) 6857 using = self._parse_table() 6858 6859 self._match(TokenType.ON) 6860 on = self._parse_assignment() 6861 6862 return self.expression( 6863 exp.Merge, 6864 this=target, 6865 using=using, 6866 on=on, 6867 expressions=self._parse_when_matched(), 6868 returning=self._parse_returning(), 6869 ) 6870 6871 def _parse_when_matched(self) -> t.List[exp.When]: 6872 whens = [] 6873 6874 while self._match(TokenType.WHEN): 6875 matched = not self._match(TokenType.NOT) 6876 self._match_text_seq("MATCHED") 6877 source = ( 6878 False 6879 if self._match_text_seq("BY", "TARGET") 6880 else self._match_text_seq("BY", "SOURCE") 6881 ) 6882 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6883 6884 self._match(TokenType.THEN) 6885 6886 if self._match(TokenType.INSERT): 6887 this = self._parse_star() 6888 if this: 6889 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6890 else: 6891 then = self.expression( 6892 exp.Insert, 6893 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6894 expression=self._match_text_seq("VALUES") and self._parse_value(), 6895 ) 6896 elif self._match(TokenType.UPDATE): 6897 expressions = self._parse_star() 6898 if expressions: 6899 then = self.expression(exp.Update, expressions=expressions) 6900 else: 6901 then = self.expression( 6902 exp.Update, 6903 expressions=self._match(TokenType.SET) 6904 and self._parse_csv(self._parse_equality), 6905 ) 6906 elif self._match(TokenType.DELETE): 6907 then = self.expression(exp.Var, this=self._prev.text) 6908 else: 6909 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6910 6911 whens.append( 6912 self.expression( 6913 exp.When, 6914 matched=matched, 6915 source=source, 6916 condition=condition, 6917 then=then, 6918 ) 6919 ) 6920 return whens 6921 6922 def _parse_show(self) -> t.Optional[exp.Expression]: 6923 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6924 if parser: 6925 return parser(self) 6926 return self._parse_as_command(self._prev) 6927 6928 def _parse_set_item_assignment( 6929 self, kind: t.Optional[str] = None 6930 ) -> t.Optional[exp.Expression]: 6931 index = self._index 6932 6933 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6934 return self._parse_set_transaction(global_=kind == "GLOBAL") 6935 6936 left = self._parse_primary() or self._parse_column() 6937 assignment_delimiter = self._match_texts(("=", "TO")) 6938 6939 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6940 self._retreat(index) 6941 return None 6942 6943 right = self._parse_statement() or self._parse_id_var() 6944 if isinstance(right, (exp.Column, exp.Identifier)): 6945 right = exp.var(right.name) 6946 6947 this = self.expression(exp.EQ, this=left, expression=right) 6948 return self.expression(exp.SetItem, this=this, kind=kind) 6949 6950 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6951 self._match_text_seq("TRANSACTION") 6952 characteristics = self._parse_csv( 6953 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6954 ) 6955 return self.expression( 6956 exp.SetItem, 6957 expressions=characteristics, 6958 kind="TRANSACTION", 6959 **{"global": global_}, # type: ignore 6960 ) 6961 6962 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6963 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6964 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6965 6966 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6967 index = self._index 6968 set_ = self.expression( 6969 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6970 ) 6971 6972 if self._curr: 6973 self._retreat(index) 6974 return self._parse_as_command(self._prev) 6975 6976 return set_ 6977 6978 def _parse_var_from_options( 6979 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6980 ) -> t.Optional[exp.Var]: 6981 start = self._curr 6982 if not start: 6983 return None 6984 6985 option = start.text.upper() 6986 continuations = options.get(option) 6987 6988 index = self._index 6989 self._advance() 6990 for keywords in continuations or []: 6991 if isinstance(keywords, str): 6992 keywords = (keywords,) 6993 6994 if self._match_text_seq(*keywords): 6995 option = f"{option} {' '.join(keywords)}" 6996 break 6997 else: 6998 if continuations or continuations is None: 6999 if raise_unmatched: 7000 self.raise_error(f"Unknown option {option}") 7001 7002 self._retreat(index) 7003 return None 7004 7005 return exp.var(option) 7006 7007 def _parse_as_command(self, start: Token) -> exp.Command: 7008 while self._curr: 7009 self._advance() 7010 text = self._find_sql(start, self._prev) 7011 size = len(start.text) 7012 self._warn_unsupported() 7013 return exp.Command(this=text[:size], expression=text[size:]) 7014 7015 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7016 settings = [] 7017 7018 self._match_l_paren() 7019 kind = self._parse_id_var() 7020 7021 if self._match(TokenType.L_PAREN): 7022 while True: 7023 key = self._parse_id_var() 7024 value = self._parse_primary() 7025 7026 if not key and value is None: 7027 break 7028 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7029 self._match(TokenType.R_PAREN) 7030 7031 self._match_r_paren() 7032 7033 return self.expression( 7034 exp.DictProperty, 7035 this=this, 7036 kind=kind.this if kind else None, 7037 settings=settings, 7038 ) 7039 7040 def _parse_dict_range(self, this: str) -> exp.DictRange: 7041 self._match_l_paren() 7042 has_min = self._match_text_seq("MIN") 7043 if has_min: 7044 min = self._parse_var() or self._parse_primary() 7045 self._match_text_seq("MAX") 7046 max = self._parse_var() or self._parse_primary() 7047 else: 7048 max = self._parse_var() or self._parse_primary() 7049 min = exp.Literal.number(0) 7050 self._match_r_paren() 7051 return self.expression(exp.DictRange, this=this, min=min, max=max) 7052 7053 def _parse_comprehension( 7054 self, this: t.Optional[exp.Expression] 7055 ) -> t.Optional[exp.Comprehension]: 7056 index = self._index 7057 expression = self._parse_column() 7058 if not self._match(TokenType.IN): 7059 self._retreat(index - 1) 7060 return None 7061 iterator = self._parse_column() 7062 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7063 return self.expression( 7064 exp.Comprehension, 7065 this=this, 7066 expression=expression, 7067 iterator=iterator, 7068 condition=condition, 7069 ) 7070 7071 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7072 if self._match(TokenType.HEREDOC_STRING): 7073 return self.expression(exp.Heredoc, this=self._prev.text) 7074 7075 if not self._match_text_seq("$"): 7076 return None 7077 7078 tags = ["$"] 7079 tag_text = None 7080 7081 if self._is_connected(): 7082 self._advance() 7083 tags.append(self._prev.text.upper()) 7084 else: 7085 self.raise_error("No closing $ found") 7086 7087 if tags[-1] != "$": 7088 if self._is_connected() and self._match_text_seq("$"): 7089 tag_text = tags[-1] 7090 tags.append("$") 7091 else: 7092 self.raise_error("No closing $ found") 7093 7094 heredoc_start = self._curr 7095 7096 while self._curr: 7097 if self._match_text_seq(*tags, advance=False): 7098 this = self._find_sql(heredoc_start, self._prev) 7099 self._advance(len(tags)) 7100 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7101 7102 self._advance() 7103 7104 self.raise_error(f"No closing {''.join(tags)} found") 7105 return None 7106 7107 def _find_parser( 7108 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7109 ) -> t.Optional[t.Callable]: 7110 if not self._curr: 7111 return None 7112 7113 index = self._index 7114 this = [] 7115 while True: 7116 # The current token might be multiple words 7117 curr = self._curr.text.upper() 7118 key = curr.split(" ") 7119 this.append(curr) 7120 7121 self._advance() 7122 result, trie = in_trie(trie, key) 7123 if result == TrieResult.FAILED: 7124 break 7125 7126 if result == TrieResult.EXISTS: 7127 subparser = parsers[" ".join(this)] 7128 return subparser 7129 7130 self._retreat(index) 7131 return None 7132 7133 def _match(self, token_type, advance=True, expression=None): 7134 if not self._curr: 7135 return None 7136 7137 if self._curr.token_type == token_type: 7138 if advance: 7139 self._advance() 7140 self._add_comments(expression) 7141 return True 7142 7143 return None 7144 7145 def _match_set(self, types, advance=True): 7146 if not self._curr: 7147 return None 7148 7149 if self._curr.token_type in types: 7150 if advance: 7151 self._advance() 7152 return True 7153 7154 return None 7155 7156 def _match_pair(self, token_type_a, token_type_b, advance=True): 7157 if not self._curr or not self._next: 7158 return None 7159 7160 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7161 if advance: 7162 self._advance(2) 7163 return True 7164 7165 return None 7166 7167 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7168 if not self._match(TokenType.L_PAREN, expression=expression): 7169 self.raise_error("Expecting (") 7170 7171 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7172 if not self._match(TokenType.R_PAREN, expression=expression): 7173 self.raise_error("Expecting )") 7174 7175 def _match_texts(self, texts, advance=True): 7176 if ( 7177 self._curr 7178 and self._curr.token_type != TokenType.STRING 7179 and self._curr.text.upper() in texts 7180 ): 7181 if advance: 7182 self._advance() 7183 return True 7184 return None 7185 7186 def _match_text_seq(self, *texts, advance=True): 7187 index = self._index 7188 for text in texts: 7189 if ( 7190 self._curr 7191 and self._curr.token_type != TokenType.STRING 7192 and self._curr.text.upper() == text 7193 ): 7194 self._advance() 7195 else: 7196 self._retreat(index) 7197 return None 7198 7199 if not advance: 7200 self._retreat(index) 7201 7202 return True 7203 7204 def _replace_lambda( 7205 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7206 ) -> t.Optional[exp.Expression]: 7207 if not node: 7208 return node 7209 7210 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7211 7212 for column in node.find_all(exp.Column): 7213 typ = lambda_types.get(column.parts[0].name) 7214 if typ is not None: 7215 dot_or_id = column.to_dot() if column.table else column.this 7216 7217 if typ: 7218 dot_or_id = self.expression( 7219 exp.Cast, 7220 this=dot_or_id, 7221 to=typ, 7222 ) 7223 7224 parent = column.parent 7225 7226 while isinstance(parent, exp.Dot): 7227 if not isinstance(parent.parent, exp.Dot): 7228 parent.replace(dot_or_id) 7229 break 7230 parent = parent.parent 7231 else: 7232 if column is node: 7233 node = dot_or_id 7234 else: 7235 column.replace(dot_or_id) 7236 return node 7237 7238 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7239 start = self._prev 7240 7241 # Not to be confused with TRUNCATE(number, decimals) function call 7242 if self._match(TokenType.L_PAREN): 7243 self._retreat(self._index - 2) 7244 return self._parse_function() 7245 7246 # Clickhouse supports TRUNCATE DATABASE as well 7247 is_database = self._match(TokenType.DATABASE) 7248 7249 self._match(TokenType.TABLE) 7250 7251 exists = self._parse_exists(not_=False) 7252 7253 expressions = self._parse_csv( 7254 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7255 ) 7256 7257 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7258 7259 if self._match_text_seq("RESTART", "IDENTITY"): 7260 identity = "RESTART" 7261 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7262 identity = "CONTINUE" 7263 else: 7264 identity = None 7265 7266 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7267 option = self._prev.text 7268 else: 7269 option = None 7270 7271 partition = self._parse_partition() 7272 7273 # Fallback case 7274 if self._curr: 7275 return self._parse_as_command(start) 7276 7277 return self.expression( 7278 exp.TruncateTable, 7279 expressions=expressions, 7280 is_database=is_database, 7281 exists=exists, 7282 cluster=cluster, 7283 identity=identity, 7284 option=option, 7285 partition=partition, 7286 ) 7287 7288 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7289 this = self._parse_ordered(self._parse_opclass) 7290 7291 if not self._match(TokenType.WITH): 7292 return this 7293 7294 op = self._parse_var(any_token=True) 7295 7296 return self.expression(exp.WithOperator, this=this, op=op) 7297 7298 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7299 self._match(TokenType.EQ) 7300 self._match(TokenType.L_PAREN) 7301 7302 opts: t.List[t.Optional[exp.Expression]] = [] 7303 while self._curr and not self._match(TokenType.R_PAREN): 7304 if self._match_text_seq("FORMAT_NAME", "="): 7305 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7306 # so we parse it separately to use _parse_field() 7307 prop = self.expression( 7308 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7309 ) 7310 opts.append(prop) 7311 else: 7312 opts.append(self._parse_property()) 7313 7314 self._match(TokenType.COMMA) 7315 7316 return opts 7317 7318 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7319 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7320 7321 options = [] 7322 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7323 option = self._parse_var(any_token=True) 7324 prev = self._prev.text.upper() 7325 7326 # Different dialects might separate options and values by white space, "=" and "AS" 7327 self._match(TokenType.EQ) 7328 self._match(TokenType.ALIAS) 7329 7330 param = self.expression(exp.CopyParameter, this=option) 7331 7332 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7333 TokenType.L_PAREN, advance=False 7334 ): 7335 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7336 param.set("expressions", self._parse_wrapped_options()) 7337 elif prev == "FILE_FORMAT": 7338 # T-SQL's external file format case 7339 param.set("expression", self._parse_field()) 7340 else: 7341 param.set("expression", self._parse_unquoted_field()) 7342 7343 options.append(param) 7344 self._match(sep) 7345 7346 return options 7347 7348 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7349 expr = self.expression(exp.Credentials) 7350 7351 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7352 expr.set("storage", self._parse_field()) 7353 if self._match_text_seq("CREDENTIALS"): 7354 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7355 creds = ( 7356 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7357 ) 7358 expr.set("credentials", creds) 7359 if self._match_text_seq("ENCRYPTION"): 7360 expr.set("encryption", self._parse_wrapped_options()) 7361 if self._match_text_seq("IAM_ROLE"): 7362 expr.set("iam_role", self._parse_field()) 7363 if self._match_text_seq("REGION"): 7364 expr.set("region", self._parse_field()) 7365 7366 return expr 7367 7368 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7369 return self._parse_field() 7370 7371 def _parse_copy(self) -> exp.Copy | exp.Command: 7372 start = self._prev 7373 7374 self._match(TokenType.INTO) 7375 7376 this = ( 7377 self._parse_select(nested=True, parse_subquery_alias=False) 7378 if self._match(TokenType.L_PAREN, advance=False) 7379 else self._parse_table(schema=True) 7380 ) 7381 7382 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7383 7384 files = self._parse_csv(self._parse_file_location) 7385 credentials = self._parse_credentials() 7386 7387 self._match_text_seq("WITH") 7388 7389 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7390 7391 # Fallback case 7392 if self._curr: 7393 return self._parse_as_command(start) 7394 7395 return self.expression( 7396 exp.Copy, 7397 this=this, 7398 kind=kind, 7399 credentials=credentials, 7400 files=files, 7401 params=params, 7402 ) 7403 7404 def _parse_normalize(self) -> exp.Normalize: 7405 return self.expression( 7406 exp.Normalize, 7407 this=self._parse_bitwise(), 7408 form=self._match(TokenType.COMMA) and self._parse_var(), 7409 ) 7410 7411 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7412 if self._match_text_seq("COLUMNS", "(", advance=False): 7413 this = self._parse_function() 7414 if isinstance(this, exp.Columns): 7415 this.set("unpack", True) 7416 return this 7417 7418 return self.expression( 7419 exp.Star, 7420 **{ # type: ignore 7421 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7422 "replace": self._parse_star_op("REPLACE"), 7423 "rename": self._parse_star_op("RENAME"), 7424 }, 7425 ) 7426 7427 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7428 privilege_parts = [] 7429 7430 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7431 # (end of privilege list) or L_PAREN (start of column list) are met 7432 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7433 privilege_parts.append(self._curr.text.upper()) 7434 self._advance() 7435 7436 this = exp.var(" ".join(privilege_parts)) 7437 expressions = ( 7438 self._parse_wrapped_csv(self._parse_column) 7439 if self._match(TokenType.L_PAREN, advance=False) 7440 else None 7441 ) 7442 7443 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7444 7445 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7446 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7447 principal = self._parse_id_var() 7448 7449 if not principal: 7450 return None 7451 7452 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7453 7454 def _parse_grant(self) -> exp.Grant | exp.Command: 7455 start = self._prev 7456 7457 privileges = self._parse_csv(self._parse_grant_privilege) 7458 7459 self._match(TokenType.ON) 7460 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7461 7462 # Attempt to parse the securable e.g. MySQL allows names 7463 # such as "foo.*", "*.*" which are not easily parseable yet 7464 securable = self._try_parse(self._parse_table_parts) 7465 7466 if not securable or not self._match_text_seq("TO"): 7467 return self._parse_as_command(start) 7468 7469 principals = self._parse_csv(self._parse_grant_principal) 7470 7471 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7472 7473 if self._curr: 7474 return self._parse_as_command(start) 7475 7476 return self.expression( 7477 exp.Grant, 7478 privileges=privileges, 7479 kind=kind, 7480 securable=securable, 7481 principals=principals, 7482 grant_option=grant_option, 7483 ) 7484 7485 def _parse_overlay(self) -> exp.Overlay: 7486 return self.expression( 7487 exp.Overlay, 7488 **{ # type: ignore 7489 "this": self._parse_bitwise(), 7490 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7491 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7492 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7493 }, 7494 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1360 def __init__( 1361 self, 1362 error_level: t.Optional[ErrorLevel] = None, 1363 error_message_context: int = 100, 1364 max_errors: int = 3, 1365 dialect: DialectType = None, 1366 ): 1367 from sqlglot.dialects import Dialect 1368 1369 self.error_level = error_level or ErrorLevel.IMMEDIATE 1370 self.error_message_context = error_message_context 1371 self.max_errors = max_errors 1372 self.dialect = Dialect.get_or_raise(dialect) 1373 self.reset()
1385 def parse( 1386 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1387 ) -> t.List[t.Optional[exp.Expression]]: 1388 """ 1389 Parses a list of tokens and returns a list of syntax trees, one tree 1390 per parsed SQL statement. 1391 1392 Args: 1393 raw_tokens: The list of tokens. 1394 sql: The original SQL string, used to produce helpful debug messages. 1395 1396 Returns: 1397 The list of the produced syntax trees. 1398 """ 1399 return self._parse( 1400 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1401 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1403 def parse_into( 1404 self, 1405 expression_types: exp.IntoType, 1406 raw_tokens: t.List[Token], 1407 sql: t.Optional[str] = None, 1408 ) -> t.List[t.Optional[exp.Expression]]: 1409 """ 1410 Parses a list of tokens into a given Expression type. If a collection of Expression 1411 types is given instead, this method will try to parse the token list into each one 1412 of them, stopping at the first for which the parsing succeeds. 1413 1414 Args: 1415 expression_types: The expression type(s) to try and parse the token list into. 1416 raw_tokens: The list of tokens. 1417 sql: The original SQL string, used to produce helpful debug messages. 1418 1419 Returns: 1420 The target Expression. 1421 """ 1422 errors = [] 1423 for expression_type in ensure_list(expression_types): 1424 parser = self.EXPRESSION_PARSERS.get(expression_type) 1425 if not parser: 1426 raise TypeError(f"No parser registered for {expression_type}") 1427 1428 try: 1429 return self._parse(parser, raw_tokens, sql) 1430 except ParseError as e: 1431 e.errors[0]["into_expression"] = expression_type 1432 errors.append(e) 1433 1434 raise ParseError( 1435 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1436 errors=merge_errors(errors), 1437 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1477 def check_errors(self) -> None: 1478 """Logs or raises any found errors, depending on the chosen error level setting.""" 1479 if self.error_level == ErrorLevel.WARN: 1480 for error in self.errors: 1481 logger.error(str(error)) 1482 elif self.error_level == ErrorLevel.RAISE and self.errors: 1483 raise ParseError( 1484 concat_messages(self.errors, self.max_errors), 1485 errors=merge_errors(self.errors), 1486 )
Logs or raises any found errors, depending on the chosen error level setting.
1488 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1489 """ 1490 Appends an error in the list of recorded errors or raises it, depending on the chosen 1491 error level setting. 1492 """ 1493 token = token or self._curr or self._prev or Token.string("") 1494 start = token.start 1495 end = token.end + 1 1496 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1497 highlight = self.sql[start:end] 1498 end_context = self.sql[end : end + self.error_message_context] 1499 1500 error = ParseError.new( 1501 f"{message}. Line {token.line}, Col: {token.col}.\n" 1502 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1503 description=message, 1504 line=token.line, 1505 col=token.col, 1506 start_context=start_context, 1507 highlight=highlight, 1508 end_context=end_context, 1509 ) 1510 1511 if self.error_level == ErrorLevel.IMMEDIATE: 1512 raise error 1513 1514 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1516 def expression( 1517 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1518 ) -> E: 1519 """ 1520 Creates a new, validated Expression. 1521 1522 Args: 1523 exp_class: The expression class to instantiate. 1524 comments: An optional list of comments to attach to the expression. 1525 kwargs: The arguments to set for the expression along with their respective values. 1526 1527 Returns: 1528 The target expression. 1529 """ 1530 instance = exp_class(**kwargs) 1531 instance.add_comments(comments) if comments else self._add_comments(instance) 1532 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1539 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1540 """ 1541 Validates an Expression, making sure that all its mandatory arguments are set. 1542 1543 Args: 1544 expression: The expression to validate. 1545 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1546 1547 Returns: 1548 The validated expression. 1549 """ 1550 if self.error_level != ErrorLevel.IGNORE: 1551 for error_message in expression.error_messages(args): 1552 self.raise_error(error_message) 1553 1554 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.