iffyio commented on code in PR #1576:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1576#discussion_r1873272299


##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
+                quote_start if self.dialect.is_delimited_identifier_start(ch) 
=> {
+                    let word = self.tokenize_quoted_identifier(quote_start, 
chars)?;
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                }
+                // special (quoted) identifier
                 quote_start
-                    if self.dialect.is_delimited_identifier_start(ch)
+                    if self
+                        .dialect
+                        .is_nested_delimited_identifier_start(quote_start)
                         && self
                             .dialect
-                            
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+                            
.nested_delimited_identifier(chars.peekable.clone())
+                            .is_some() =>
                 {
-                    let error_loc = chars.location();
-                    chars.next(); // consume the opening quote
+                    let (quote_start, nested_quote_start) = self
+                        .dialect
+                        .nested_delimited_identifier(chars.peekable.clone())
+                        .unwrap();

Review Comment:
   ```suggestion
   ```
   we can return an error here?



##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
         ch == '"' || ch == '`'
     }
 
-    /// Return the character used to quote identifiers.
-    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+    /// Determine if a character starts a potential nested quoted identifier.
+    /// RedShift support old way of quotation with `[` and it can cover even 
nested quoted identifier.

Review Comment:
   ```suggestion
       /// Example: RedShift supports the following quote styles to all mean 
the same thing:
       /// ```sql
       /// SELECT 1 AS foo;
       /// SELECT 1 AS "foo";
       /// SELECT 1 AS [foo];
       /// SELECT 1 AS ["foo"];
       /// ```
   ```
   Thinking something like this to clarify the behavior?



##########
tests/sqlparser_redshift.rs:
##########
@@ -279,6 +279,31 @@ fn test_redshift_json_path() {
         },
         expr_from_projection(only(&select.projection))
     );
+
+    let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM 
customer_orders_lineitem"#;
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        &Expr::JsonAccess {
+            value: Box::new(Expr::CompoundIdentifier(vec![
+                Ident::new("db1"),
+                Ident::new("sc1"),
+                Ident::new("tbl1"),
+                Ident::new("col1")
+            ])),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Bracket {
+                        key: Expr::Value(Value::Number("0".parse().unwrap(), 
false))

Review Comment:
   ```suggestion
                           key: Expr::Value(number("0"))
   ```



##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
+                quote_start if self.dialect.is_delimited_identifier_start(ch) 
=> {
+                    let word = self.tokenize_quoted_identifier(quote_start, 
chars)?;
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                }
+                // special (quoted) identifier
                 quote_start
-                    if self.dialect.is_delimited_identifier_start(ch)
+                    if self
+                        .dialect
+                        .is_nested_delimited_identifier_start(quote_start)
                         && self
                             .dialect
-                            
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+                            
.nested_delimited_identifier(chars.peekable.clone())
+                            .is_some() =>
                 {
-                    let error_loc = chars.location();
-                    chars.next(); // consume the opening quote
+                    let (quote_start, nested_quote_start) = self
+                        .dialect
+                        .nested_delimited_identifier(chars.peekable.clone())
+                        .unwrap();
+
+                    let Some(nested_quote_start) = nested_quote_start else {
+                        let word = 
self.tokenize_quoted_identifier(quote_start, chars)?;
+                        return Ok(Some(Token::make_word(&word, 
Some(quote_start))));
+                    };
+
+                    let mut word = vec![];
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let (s, last_char) = self.parse_quoted_ident(chars, 
quote_end);
+                    let nested_quote_end = 
Word::matching_end_quote(nested_quote_start);
+                    let error_loc = chars.location();
 
-                    if last_char == Some(quote_end) {
-                        Ok(Some(Token::make_word(&s, Some(quote_start))))
-                    } else {
-                        self.tokenizer_error(
+                    chars.next(); // skip the first delimiter
+                    word.push(peeking_take_while(chars, |ch| 
ch.is_whitespace()));
+                    if chars.peek() != Some(&nested_quote_start) {
+                        return self.tokenizer_error(
+                            error_loc,
+                            format!("Expected nested delimiter 
'{nested_quote_start}' before EOF."),
+                        );
+                    }
+                    word.push(format!("{nested_quote_start}"));

Review Comment:
   ```suggestion
                       word.push(nested_quote_start.into()));
   ```



##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
         ch == '"' || ch == '`'
     }
 
-    /// Return the character used to quote identifiers.
-    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+    /// Determine if a character starts a potential nested quoted identifier.
+    /// RedShift support old way of quotation with `[` and it can cover even 
nested quoted identifier.
+    fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
+        false
+    }
+
+    /// Determine if nested quoted characters are presented
+    fn nested_delimited_identifier(

Review Comment:
   ```suggestion
       fn peek_nested_delimited_identifier_quotes(
   ```



##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
+                quote_start if self.dialect.is_delimited_identifier_start(ch) 
=> {
+                    let word = self.tokenize_quoted_identifier(quote_start, 
chars)?;
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                }
+                // special (quoted) identifier

Review Comment:
   ```suggestion
                   // Potentially nested delimited (quoted) identifier
   ```



##########
src/dialect/redshift.rs:
##########
@@ -32,21 +32,36 @@ pub struct RedshiftSqlDialect {}
 // in the Postgres dialect, the query will be parsed as an array, while in the 
Redshift dialect it will
 // be a json path
 impl Dialect for RedshiftSqlDialect {
-    fn is_delimited_identifier_start(&self, ch: char) -> bool {
-        ch == '"' || ch == '['
+    fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
+        ch == '['
     }
 
-    /// Determine if quoted characters are proper for identifier
+    /// Determine if quoted characters are looks like special case of 
quotation begining with `[`.

Review Comment:
   Maybe we can copy over the suggested dialect method comment here since its 
already describing redshift.



##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
         _ => panic!(),
     }
 }
+
+#[test]
+fn test_parse_select_numbered_columns() {

Review Comment:
   thinking a couple cases we can include?
   ```sql
   SELECT 1 AS ["1];
   SELECT 1 AS [ " 1 " ];
   ```



##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
         ch == '"' || ch == '`'
     }
 
-    /// Return the character used to quote identifiers.
-    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+    /// Determine if a character starts a potential nested quoted identifier.
+    /// RedShift support old way of quotation with `[` and it can cover even 
nested quoted identifier.
+    fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
+        false
+    }
+
+    /// Determine if nested quoted characters are presented

Review Comment:
   ```suggestion
       /// Only applicable whenever 
[`Self::is_nested_delimited_identifier_start`] returns true
       /// If the next sequence of tokens potentially represent a nested 
identifier, then this method
       /// returns a tuple containing the outer quote style, and if present, 
the inner (nested) quote style.
       ///
       /// Example (Redshift):
       /// ```text
       /// `["foo"]` => (Some(`[`), Some(`"`))
       /// `[foo]` => (Some(`[`), None)
       /// `"foo"` => None
       /// ```
   ```
   Similarly, to clarify the behavior with an example since it wouldn't be 
intuitive otherwise



##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
         _ => panic!(),
     }
 }
+
+#[test]
+fn test_parse_select_numbered_columns() {
+    redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
+    // RedShift specific case - quoted identifier inside square bracket

Review Comment:
   ```suggestion
   ```
   this one seems to be already covered by `test_parse_create_numbered_columns`?



##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
+                quote_start if self.dialect.is_delimited_identifier_start(ch) 
=> {
+                    let word = self.tokenize_quoted_identifier(quote_start, 
chars)?;
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                }
+                // special (quoted) identifier
                 quote_start
-                    if self.dialect.is_delimited_identifier_start(ch)
+                    if self
+                        .dialect
+                        .is_nested_delimited_identifier_start(quote_start)
                         && self
                             .dialect
-                            
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+                            
.nested_delimited_identifier(chars.peekable.clone())
+                            .is_some() =>
                 {
-                    let error_loc = chars.location();
-                    chars.next(); // consume the opening quote
+                    let (quote_start, nested_quote_start) = self
+                        .dialect
+                        .nested_delimited_identifier(chars.peekable.clone())
+                        .unwrap();
+
+                    let Some(nested_quote_start) = nested_quote_start else {
+                        let word = 
self.tokenize_quoted_identifier(quote_start, chars)?;
+                        return Ok(Some(Token::make_word(&word, 
Some(quote_start))));
+                    };
+
+                    let mut word = vec![];
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let (s, last_char) = self.parse_quoted_ident(chars, 
quote_end);
+                    let nested_quote_end = 
Word::matching_end_quote(nested_quote_start);
+                    let error_loc = chars.location();
 
-                    if last_char == Some(quote_end) {
-                        Ok(Some(Token::make_word(&s, Some(quote_start))))
-                    } else {
-                        self.tokenizer_error(
+                    chars.next(); // skip the first delimiter
+                    word.push(peeking_take_while(chars, |ch| 
ch.is_whitespace()));
+                    if chars.peek() != Some(&nested_quote_start) {
+                        return self.tokenizer_error(
+                            error_loc,
+                            format!("Expected nested delimiter 
'{nested_quote_start}' before EOF."),
+                        );
+                    }
+                    word.push(format!("{nested_quote_start}"));
+                    
word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
+                    word.push(format!("{nested_quote_end}"));

Review Comment:
   ```suggestion
                       word.push(nested_quote_end.into());
   ```



##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
         _ => panic!(),
     }
 }
+
+#[test]
+fn test_parse_select_numbered_columns() {
+    redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
+    // RedShift specific case - quoted identifier inside square bracket
+    redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
+}
+
+#[test]
+fn test_parse_create_numbered_columns() {
+    redshift_and_generic().verified_stmt(
+        r#"CREATE TABLE test_table_1 ("1" INT, "d" VARCHAR(155), "2" DOUBLE 
PRECISION)"#,
+    );
+}

Review Comment:
   Can we incorporate this as a test case under the 
`parse_delimited_identifiers` test?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to