goldmedal commented on code in PR #1551:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1551#discussion_r1882455970


##########
src/parser/mod.rs:
##########
@@ -1427,6 +1426,112 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Try to parse an [Expr::CompoundExpr] like `a.b.c` or `a.b[1].c`.
+    /// If all the fields are `Expr::Identifier`s, return an 
[Expr::CompoundIdentifier] instead.
+    /// If only the root exists, return the root.
+    /// If self supports [Dialect::supports_partiql], it will fall back when 
occurs [Token::LBracket] for JsonAccess parsing.
+    pub fn parse_compound_expr(
+        &mut self,
+        root: Expr,
+        mut chain: Vec<AccessField>,
+    ) -> Result<Expr, ParserError> {
+        let mut ending_wildcard: Option<TokenWithSpan> = None;
+        let mut ending_lbracket = false;
+        while self.consume_token(&Token::Period) {
+            let next_token = self.next_token();
+            match next_token.token {
+                Token::Word(w) => {
+                    let expr = Expr::Identifier(w.to_ident(next_token.span));
+                    chain.push(AccessField::Expr(expr));
+                    if self.consume_token(&Token::LBracket) {
+                        if self.dialect.supports_partiql() {
+                            ending_lbracket = true;
+                            break;
+                        } else {
+                            self.parse_multi_dim_subscript(&mut chain)?
+                        }
+                    }
+                }
+                Token::Mul => {
+                    // Postgres explicitly allows funcnm(tablenm.*) and the
+                    // function array_agg traverses this control flow
+                    if dialect_of!(self is PostgreSqlDialect) {
+                        ending_wildcard = Some(next_token);
+                        break;
+                    } else {
+                        return self.expected("an identifier after '.'", 
next_token);
+                    }
+                }
+                Token::SingleQuotedString(s) => {
+                    let expr = Expr::Identifier(Ident::with_quote('\'', s));
+                    chain.push(AccessField::Expr(expr));
+                }
+                _ => {
+                    return self.expected("an identifier or a '*' after '.'", 
next_token);
+                }
+            }
+        }
+
+        // if dialect supports partiql, we need to go back one Token::LBracket 
for the JsonAccess parsing
+        if self.dialect.supports_partiql() && ending_lbracket {
+            self.prev_token();
+        }
+
+        if let Some(wildcard_token) = ending_wildcard {

Review Comment:
   @iffyio 
   I feel the idea of this loop is good but I found a problem 🤔 . If we try to 
invoke `parse_subexpr` recursively, we will still get the nested 
representation. Consider the following cases:
   
   ## The representation of abc[1].f1.f2
   The last field is a `CompoundIdentifier`.
   ```
   &root = Identifier(
       Ident {
           value: "abc",
           quote_style: None,
           span: Span(Location(0,0)..Location(0,0)),
       },
   )
   &chain = [
       Subscript(
           Index {
               index: Value(
                   Number(
                       "1",
                       false,
                   ),
               ),
           },
       ),
       Dot(
           CompoundIdentifier(
               [
                   Ident {
                       value: "f1",
                       quote_style: None,
                       span: Span(Location(0,0)..Location(0,0)),
                   },
                   Ident {
                       value: "f2",
                       quote_style: None,
                       span: Span(Location(0,0)..Location(0,0)),
                   },
               ],
           ),
       ),
   ]
   ```
   ## The representation of abc[1].f1.f2[1]
   The last field is a `CompoundFieldAccess` with another chain.
   ```
   &root = Identifier(
       Ident {
           value: "abc",
           quote_style: None,
           span: Span(Location(0,0)..Location(0,0)),
       },
   )
   &chain = [
       Subscript(
           Index {
               index: Value(
                   Number(
                       "1",
                       false,
                   ),
               ),
           },
       ),
       Dot(
           CompoundFieldAccess {
               root: Identifier(
                   Ident {
                       value: "f1",
                       quote_style: None,
                       span: Span(Location(0,0)..Location(0,0)),
                   },
               ),
               access_chain: [
                   Dot(
                       CompoundFieldAccess {
                           root: Identifier(
                               Ident {
                                   value: "f2",
                                   quote_style: None,
                                   span: Span(Location(0,0)..Location(0,0)),
                               },
                           ),
                           access_chain: [
                               Subscript(
                                   Index {
                                       index: Value(
                                           Number(
                                               "1",
                                               false,
                                           ),
                                       ),
                                   },
                               ),
                           ],
                       },
                   ),
               ],
           },
       ),
   ]
   ```
   It seems we should consider `CompoundIdentifier` and `CompoundFieldAccess` 
as a kind of the nested struct. 
   Ideally, the access_cahin shouldn't contain another nested struct. 
   The `parse_subexpr` shouldn't produce the nested struct 🤔. It makes the 
thing more complicated.
   I don't prefer to do It in this PR. The side effects will be too large. 
Maybe we can have a follow-up PR to refactor here.
   WDYT?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to