iffyio commented on code in PR #1513: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1513#discussion_r1844267451
########## src/parser/mod.rs: ########## @@ -1013,175 +1189,22 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); let expr = match next_token.token { - Token::Word(w) => match w.keyword { - Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Keyword::NULL => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Keyword::CURRENT_CATALOG - | Keyword::CURRENT_USER - | Keyword::SESSION_USER - | Keyword::USER - if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - Ok(Expr::Function(Function { - name: ObjectName(vec![w.to_ident()]), - parameters: FunctionArguments::None, - args: FunctionArguments::None, - null_treatment: None, - filter: None, - over: None, - within_group: vec![], - })) - } - Keyword::CURRENT_TIMESTAMP - | Keyword::CURRENT_TIME - | Keyword::CURRENT_DATE - | Keyword::LOCALTIME - | Keyword::LOCALTIMESTAMP => { - self.parse_time_functions(ObjectName(vec![w.to_ident()])) - } - Keyword::CASE => self.parse_case_expr(), - Keyword::CONVERT => self.parse_convert_expr(false), - Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => self.parse_convert_expr(true), - Keyword::CAST => self.parse_cast_expr(CastKind::Cast), - Keyword::TRY_CAST => self.parse_cast_expr(CastKind::TryCast), - Keyword::SAFE_CAST => self.parse_cast_expr(CastKind::SafeCast), - Keyword::EXISTS - // Support parsing Databricks has a function named `exists`. - if !dialect_of!(self is DatabricksDialect) - || matches!( - self.peek_nth_token(1).token, - Token::Word(Word { - keyword: Keyword::SELECT | Keyword::WITH, - .. - }) - ) => - { - self.parse_exists_expr(false) - } - Keyword::EXTRACT => self.parse_extract_expr(), - Keyword::CEIL => self.parse_ceil_floor_expr(true), - Keyword::FLOOR => self.parse_ceil_floor_expr(false), - Keyword::POSITION if self.peek_token().token == Token::LParen => { - self.parse_position_expr(w.to_ident()) - } - Keyword::SUBSTRING => self.parse_substring_expr(), - Keyword::OVERLAY => self.parse_overlay_expr(), - Keyword::TRIM => self.parse_trim_expr(), - Keyword::INTERVAL => self.parse_interval(), - // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call - Keyword::ARRAY if self.peek_token() == Token::LBracket => { - self.expect_token(&Token::LBracket)?; - self.parse_array_expr(true) - } - Keyword::ARRAY - if self.peek_token() == Token::LParen - && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => - { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Function(Function { - name: ObjectName(vec![w.to_ident()]), - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(query), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - })) - } - Keyword::NOT => self.parse_not(), - Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { - self.parse_match_against() - } - Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { - self.prev_token(); - self.parse_bigquery_struct_literal() - } - Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; - Ok(Expr::Prior(Box::new(expr))) - } - Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { - self.parse_duckdb_map_literal() - } - // Here `w` is a word, check if it's a part of a multipart - // identifier, a function call, or a simple identifier: - _ => match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec<Ident> = vec![w.to_ident()]; - let mut ends_with_wildcard = false; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident()), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ends_with_wildcard = true; - break; - } else { - return self - .expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => { - id_parts.push(Ident::with_quote('\'', s)) - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } - } + // We first try to parse the word as the prefix of an expression. + // For example, the word INTERVAL in: SELECT INTERVAL '7' DAY + Token::Word(w) => match self.try_parse(|parser| parser.parse_expr_by_keyword(&w)) { Review Comment: Ah right, can we use a enum to represent the states returned by `parse_expr_by_keyword` instead? Im thinking that would be more appropriate here, we wouldnt want to flag this as an error since that's a public api, and it'll likely let us use maybe_parse as well so that we avoid the try_parse introduction -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org