iffyio commented on code in PR #1551:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1551#discussion_r1883938066


##########
src/parser/mod.rs:
##########
@@ -1427,6 +1424,112 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`.
+    /// If all the fields are `Expr::Identifier`s, return an 
[Expr::CompoundIdentifier] instead.
+    /// If only the root exists, return the root.
+    /// If self supports [Dialect::supports_partiql], it will fall back when 
occurs [Token::LBracket] for JsonAccess parsing.
+    pub fn parse_compound_field_access(
+        &mut self,
+        root: Expr,
+        mut chain: Vec<AccessExpr>,
+    ) -> Result<Expr, ParserError> {
+        let mut ending_wildcard: Option<TokenWithSpan> = None;
+        let mut ending_lbracket = false;
+        while self.consume_token(&Token::Period) {
+            let next_token = self.next_token();
+            match next_token.token {
+                Token::Word(w) => {
+                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    chain.push(AccessExpr::Dot(expr));
+                    if self.consume_token(&Token::LBracket) {
+                        if self.dialect.supports_partiql() {
+                            ending_lbracket = true;
+                            break;
+                        } else {
+                            self.parse_multi_dim_subscript(&mut chain)?
+                        }
+                    }
+                }
+                Token::Mul => {
+                    // Postgres explicitly allows funcnm(tablenm.*) and the
+                    // function array_agg traverses this control flow
+                    if dialect_of!(self is PostgreSqlDialect) {
+                        ending_wildcard = Some(next_token);
+                        break;
+                    } else {
+                        return self.expected("an identifier after '.'", 
next_token);
+                    }
+                }
+                Token::SingleQuotedString(s) => {
+                    let expr = Expr::Identifier(Ident::with_quote('\'', s));
+                    chain.push(AccessExpr::Dot(expr));
+                }
+                _ => {
+                    return self.expected("an identifier or a '*' after '.'", 
next_token);
+                }
+            }
+        }
+
+        // if dialect supports partiql, we need to go back one Token::LBracket 
for the JsonAccess parsing
+        if self.dialect.supports_partiql() && ending_lbracket {
+            self.prev_token();
+        }
+
+        if let Some(wildcard_token) = ending_wildcard {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            Ok(Expr::QualifiedWildcard(
+                ObjectName(id_parts),
+                AttachedToken(wildcard_token),
+            ))
+        } else if self.consume_token(&Token::LParen) {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
+                && self.consume_tokens(&[Token::Plus, Token::RParen])
+            {
+                Ok(Expr::OuterJoin(Box::new(
+                    match <[Ident; 1]>::try_from(id_parts) {
+                        Ok([ident]) => Expr::Identifier(ident),
+                        Err(parts) => Expr::CompoundIdentifier(parts),
+                    },
+                )))

Review Comment:
   I think it would be nice if we could reuse this logic somehow in a function? 
since its not trivial and lists dialects, now that its duplicated it could be 
easy to miss if changes are made to one but not the other?



##########
src/parser/mod.rs:
##########
@@ -3095,15 +3201,29 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parse an multi-dimension array accessing like `[1:3][1][1]`

Review Comment:
   ```suggestion
       /// Parses a multi-dimension array accessing like `[1:3][1][1]`
   ```



##########
src/parser/mod.rs:
##########
@@ -3167,42 +3287,23 @@ impl<'a> Parser<'a> {
 
     pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr, 
ParserError> {
         let key = self.parse_expr()?;
+        let result = match key {

Review Comment:
   hmm I was initially wondering about this match since it looked incorrect to 
restrict the key expression type. e.g. this BigQuery [test 
case](https://github.com/apache/datafusion-sqlparser-rs/blob/e16b246/tests/sqlparser_bigquery.rs#L1969).
 But then I realised that test case is passing because it now takes a different 
codepath via `parse_compound_field_access`. And so I wonder if there are any 
scenarios that rely on this method anymore, if there aren't it seems like we 
might be able to remove it entirely?



##########
src/parser/mod.rs:
##########
@@ -3095,15 +3201,29 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parse an multi-dimension array accessing like `[1:3][1][1]`
+    ///
+    /// Parser is right after the first `[`

Review Comment:
   given this is a pub function it would be nice if the api was symmetric so 
that the function also consumed the leading `[`? I think within the parser 
itself that would mean calling `prev_token()` or replacing with `peek()` before 
invoking this function which would be a fair price to pay?



##########
src/parser/mod.rs:
##########
@@ -1144,53 +1144,52 @@ impl<'a> Parser<'a> {
         w_span: Span,
     ) -> Result<Expr, ParserError> {
         match self.peek_token().token {
-            Token::LParen | Token::Period => {
-                let mut id_parts: Vec<Ident> = vec![w.to_ident(w_span)];
-                let mut ending_wildcard: Option<TokenWithSpan> = None;
-                while self.consume_token(&Token::Period) {
-                    let next_token = self.next_token();
-                    match next_token.token {
-                        Token::Word(w) => 
id_parts.push(w.to_ident(next_token.span)),
-                        Token::Mul => {
-                            // Postgres explicitly allows funcnm(tablenm.*) 
and the
-                            // function array_agg traverses this control flow
-                            if dialect_of!(self is PostgreSqlDialect) {
-                                ending_wildcard = Some(next_token);
-                                break;
-                            } else {
-                                return self.expected("an identifier after 
'.'", next_token);
-                            }
-                        }
-                        Token::SingleQuotedString(s) => 
id_parts.push(Ident::with_quote('\'', s)),
-                        _ => {
-                            return self.expected("an identifier or a '*' after 
'.'", next_token);
-                        }
+            Token::Period => 
self.parse_compound_expr(Expr::Identifier(w.to_ident(w_span)), vec![]),
+            Token::LParen => {
+                let id_parts = vec![w.to_ident(w_span)];
+                // parse_comma_outer_join is used to parse the following 
pattern:
+                if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
+                    && self.consume_tokens(&[Token::LParen, Token::Plus, 
Token::RParen])
+                {
+                    Ok(Expr::OuterJoin(Box::new(
+                        match <[Ident; 1]>::try_from(id_parts) {
+                            Ok([ident]) => Expr::Identifier(ident),
+                            Err(parts) => Expr::CompoundIdentifier(parts),
+                        },
+                    )))
+                } else {
+                    let mut expr = self.parse_function(ObjectName(id_parts))?;
+                    // consume all period if it's a method chain
+                    if self.dialect.supports_methods() {

Review Comment:
   Oh, to clarify what I meant was that `try_parse_method` does this already
   ```rust
   if !self.dialect.supports_methods() {
        return Ok(expr);
   }
   ```
   
   So that this can be simplified as following (i.e without the extra `if 
self.dialect.supports_methods()`)
   ```rust
   expr = self.try_parse_method(expr)?;
   ```
   



##########
src/parser/mod.rs:
##########
@@ -1427,6 +1424,112 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`.
+    /// If all the fields are `Expr::Identifier`s, return an 
[Expr::CompoundIdentifier] instead.
+    /// If only the root exists, return the root.
+    /// If self supports [Dialect::supports_partiql], it will fall back when 
occurs [Token::LBracket] for JsonAccess parsing.
+    pub fn parse_compound_field_access(
+        &mut self,
+        root: Expr,
+        mut chain: Vec<AccessExpr>,
+    ) -> Result<Expr, ParserError> {
+        let mut ending_wildcard: Option<TokenWithSpan> = None;
+        let mut ending_lbracket = false;
+        while self.consume_token(&Token::Period) {
+            let next_token = self.next_token();
+            match next_token.token {
+                Token::Word(w) => {
+                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    chain.push(AccessExpr::Dot(expr));
+                    if self.consume_token(&Token::LBracket) {
+                        if self.dialect.supports_partiql() {
+                            ending_lbracket = true;
+                            break;
+                        } else {
+                            self.parse_multi_dim_subscript(&mut chain)?
+                        }
+                    }
+                }
+                Token::Mul => {
+                    // Postgres explicitly allows funcnm(tablenm.*) and the
+                    // function array_agg traverses this control flow
+                    if dialect_of!(self is PostgreSqlDialect) {
+                        ending_wildcard = Some(next_token);
+                        break;
+                    } else {
+                        return self.expected("an identifier after '.'", 
next_token);
+                    }
+                }
+                Token::SingleQuotedString(s) => {
+                    let expr = Expr::Identifier(Ident::with_quote('\'', s));
+                    chain.push(AccessExpr::Dot(expr));
+                }
+                _ => {
+                    return self.expected("an identifier or a '*' after '.'", 
next_token);
+                }
+            }
+        }
+
+        // if dialect supports partiql, we need to go back one Token::LBracket 
for the JsonAccess parsing
+        if self.dialect.supports_partiql() && ending_lbracket {
+            self.prev_token();
+        }
+
+        if let Some(wildcard_token) = ending_wildcard {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            Ok(Expr::QualifiedWildcard(
+                ObjectName(id_parts),
+                AttachedToken(wildcard_token),
+            ))
+        } else if self.consume_token(&Token::LParen) {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
+                && self.consume_tokens(&[Token::Plus, Token::RParen])
+            {
+                Ok(Expr::OuterJoin(Box::new(
+                    match <[Ident; 1]>::try_from(id_parts) {
+                        Ok([ident]) => Expr::Identifier(ident),
+                        Err(parts) => Expr::CompoundIdentifier(parts),
+                    },
+                )))
+            } else {
+                self.prev_token();
+                self.parse_function(ObjectName(id_parts))
+            }
+        } else {
+            if let Some(id_parts) = Self::exprs_to_idents(&root, &chain) {
+                return Ok(Expr::CompoundIdentifier(id_parts));
+            }
+            if chain.is_empty() {
+                return Ok(root);
+            }
+            Ok(Expr::CompoundFieldAccess {
+                root: Box::new(root),
+                access_chain: chain.clone(),
+            })
+        }
+    }
+
+    fn exprs_to_idents(root: &Expr, fields: &[AccessExpr]) -> 
Option<Vec<Ident>> {

Review Comment:
   Could we add a description of what the function does?



##########
src/parser/mod.rs:
##########
@@ -1427,6 +1426,112 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Try to parse an [Expr::CompoundExpr] like `a.b.c` or `a.b[1].c`.
+    /// If all the fields are `Expr::Identifier`s, return an 
[Expr::CompoundIdentifier] instead.
+    /// If only the root exists, return the root.
+    /// If self supports [Dialect::supports_partiql], it will fall back when 
occurs [Token::LBracket] for JsonAccess parsing.
+    pub fn parse_compound_expr(
+        &mut self,
+        root: Expr,
+        mut chain: Vec<AccessField>,
+    ) -> Result<Expr, ParserError> {
+        let mut ending_wildcard: Option<TokenWithSpan> = None;
+        let mut ending_lbracket = false;
+        while self.consume_token(&Token::Period) {
+            let next_token = self.next_token();
+            match next_token.token {
+                Token::Word(w) => {
+                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    chain.push(AccessField::Expr(expr));
+                    if self.consume_token(&Token::LBracket) {
+                        if self.dialect.supports_partiql() {
+                            ending_lbracket = true;
+                            break;
+                        } else {
+                            self.parse_multi_dim_subscript(&mut chain)?
+                        }
+                    }
+                }
+                Token::Mul => {
+                    // Postgres explicitly allows funcnm(tablenm.*) and the
+                    // function array_agg traverses this control flow
+                    if dialect_of!(self is PostgreSqlDialect) {
+                        ending_wildcard = Some(next_token);
+                        break;
+                    } else {
+                        return self.expected("an identifier after '.'", 
next_token);
+                    }
+                }
+                Token::SingleQuotedString(s) => {
+                    let expr = Expr::Identifier(Ident::with_quote('\'', s));
+                    chain.push(AccessField::Expr(expr));
+                }
+                _ => {
+                    return self.expected("an identifier or a '*' after '.'", 
next_token);
+                }
+            }
+        }
+
+        // if dialect supports partiql, we need to go back one Token::LBracket 
for the JsonAccess parsing
+        if self.dialect.supports_partiql() && ending_lbracket {
+            self.prev_token();
+        }
+
+        if let Some(wildcard_token) = ending_wildcard {

Review Comment:
   Ah yeah I agree the nesting isn't desirable here, and it makes sense to 
consider that behavior separate from this PR!



##########
src/parser/mod.rs:
##########
@@ -1427,6 +1424,112 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`.
+    /// If all the fields are `Expr::Identifier`s, return an 
[Expr::CompoundIdentifier] instead.
+    /// If only the root exists, return the root.
+    /// If self supports [Dialect::supports_partiql], it will fall back when 
occurs [Token::LBracket] for JsonAccess parsing.
+    pub fn parse_compound_field_access(
+        &mut self,
+        root: Expr,
+        mut chain: Vec<AccessExpr>,
+    ) -> Result<Expr, ParserError> {
+        let mut ending_wildcard: Option<TokenWithSpan> = None;
+        let mut ending_lbracket = false;
+        while self.consume_token(&Token::Period) {
+            let next_token = self.next_token();
+            match next_token.token {
+                Token::Word(w) => {
+                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    chain.push(AccessExpr::Dot(expr));
+                    if self.consume_token(&Token::LBracket) {
+                        if self.dialect.supports_partiql() {
+                            ending_lbracket = true;
+                            break;
+                        } else {
+                            self.parse_multi_dim_subscript(&mut chain)?
+                        }
+                    }
+                }
+                Token::Mul => {
+                    // Postgres explicitly allows funcnm(tablenm.*) and the
+                    // function array_agg traverses this control flow
+                    if dialect_of!(self is PostgreSqlDialect) {
+                        ending_wildcard = Some(next_token);
+                        break;
+                    } else {
+                        return self.expected("an identifier after '.'", 
next_token);
+                    }
+                }
+                Token::SingleQuotedString(s) => {
+                    let expr = Expr::Identifier(Ident::with_quote('\'', s));
+                    chain.push(AccessExpr::Dot(expr));
+                }
+                _ => {
+                    return self.expected("an identifier or a '*' after '.'", 
next_token);
+                }
+            }
+        }
+
+        // if dialect supports partiql, we need to go back one Token::LBracket 
for the JsonAccess parsing
+        if self.dialect.supports_partiql() && ending_lbracket {
+            self.prev_token();
+        }
+
+        if let Some(wildcard_token) = ending_wildcard {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            Ok(Expr::QualifiedWildcard(
+                ObjectName(id_parts),
+                AttachedToken(wildcard_token),
+            ))
+        } else if self.consume_token(&Token::LParen) {
+            let Some(id_parts) = Self::exprs_to_idents(&root, &chain) else {
+                return self.expected("an identifier or a '*' after '.'", 
self.peek_token());
+            };
+            if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
+                && self.consume_tokens(&[Token::Plus, Token::RParen])
+            {
+                Ok(Expr::OuterJoin(Box::new(
+                    match <[Ident; 1]>::try_from(id_parts) {
+                        Ok([ident]) => Expr::Identifier(ident),
+                        Err(parts) => Expr::CompoundIdentifier(parts),
+                    },
+                )))
+            } else {
+                self.prev_token();
+                self.parse_function(ObjectName(id_parts))
+            }
+        } else {
+            if let Some(id_parts) = Self::exprs_to_idents(&root, &chain) {
+                return Ok(Expr::CompoundIdentifier(id_parts));
+            }
+            if chain.is_empty() {
+                return Ok(root);
+            }
+            Ok(Expr::CompoundFieldAccess {
+                root: Box::new(root),
+                access_chain: chain.clone(),
+            })
+        }
+    }
+
+    fn exprs_to_idents(root: &Expr, fields: &[AccessExpr]) -> 
Option<Vec<Ident>> {

Review Comment:
   since the caller wouldn't need the copy of `root` and `fields` it would be 
nice to avoid the cloning? maybe something like this could work?
   ```rust
   fn exprs_to_idens(root: Expr, fields: Vec<AccessExprs>) -> 
Result<Vec<Ident>, (Expr, Vec<AccessExprs>)>
   ```
   so that if no conversion was made the caller gets back the same values they 
provided?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to