This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new e9ab4d6b Fix BigQuery hyphenated ObjectName with numbers (#1598)
e9ab4d6b is described below

commit e9ab4d6b94a81d4ed3e402750a5faf3860892c23
Author: Ayman Elkfrawy <[email protected]>
AuthorDate: Wed Dec 18 12:12:09 2024 -0800

    Fix BigQuery hyphenated ObjectName with numbers (#1598)
---
 src/parser/mod.rs           |  4 +++-
 src/tokenizer.rs            | 45 ++++++++++++++++++++++++++++++++++++++-------
 tests/sqlparser_bigquery.rs | 20 ++++++++++++++++++++
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 94d63cf8..c0aa0acb 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -8755,7 +8755,9 @@ impl<'a> Parser<'a> {
                             }
                             Token::Number(s, false) if s.chars().all(|c| 
c.is_ascii_digit()) => {
                                 ident.value.push_str(&s);
-                                true
+                                // If next token is period, then it is part of 
an ObjectName and we don't expect whitespace
+                                // after the number.
+                                !matches!(self.peek_token().token, 
Token::Period)
                             }
                             _ => {
                                 return self
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9269f4fe..3c2f70ed 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1144,15 +1144,29 @@ impl<'a> Tokenizer<'a> {
 
                     // match one period
                     if let Some('.') = chars.peek() {
-                        s.push('.');
-                        chars.next();
+                        // Check if this actually is a float point number
+                        let mut char_clone = chars.peekable.clone();
+                        char_clone.next();
+                        // Next char should be a digit, otherwise, it is not a 
float point number
+                        if char_clone
+                            .peek()
+                            .map(|c| c.is_ascii_digit())
+                            .unwrap_or(false)
+                        {
+                            s.push('.');
+                            chars.next();
+                        } else if !s.is_empty() {
+                            // Number might be part of period separated 
construct. Keep the period for next token
+                            // e.g. a-12.b
+                            return Ok(Some(Token::Number(s, false)));
+                        } else {
+                            // No number -> Token::Period
+                            chars.next();
+                            return Ok(Some(Token::Period));
+                        }
                     }
-                    s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
 
-                    // No number -> Token::Period
-                    if s == "." {
-                        return Ok(Some(Token::Period));
-                    }
+                    s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
 
                     let mut exponent_part = String::new();
                     // Parse exponent as number
@@ -2185,6 +2199,23 @@ mod tests {
         compare(expected, tokens);
     }
 
+    #[test]
+    fn tokenize_select_float_hyphenated_identifier() {
+        let sql = String::from("SELECT a-12.b");
+        let dialect = GenericDialect {};
+        let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+        let expected = vec![
+            Token::make_keyword("SELECT"),
+            Token::Whitespace(Whitespace::Space),
+            Token::make_word("a", None),
+            Token::Minus,
+            Token::Number(String::from("12"), false),
+            Token::Period,
+            Token::make_word("b", None),
+        ];
+        compare(expected, tokens);
+    }
+
     #[test]
     fn tokenize_clickhouse_double_equal() {
         let sql = String::from("SELECT foo=='1'");
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 0311eba1..c8173759 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -1504,6 +1504,26 @@ fn parse_hyphenated_table_identifiers() {
         "SELECT * FROM foo-bar AS f JOIN baz-qux AS b ON f.id = b.id",
     );
 
+    assert_eq!(
+        bigquery()
+            .verified_only_select_with_canonical(
+                "select * from foo-123.bar",
+                "SELECT * FROM foo-123.bar"
+            )
+            .from[0]
+            .relation,
+        TableFactor::Table {
+            name: ObjectName(vec![Ident::new("foo-123"), Ident::new("bar")]),
+            alias: None,
+            args: None,
+            with_hints: vec![],
+            version: None,
+            partitions: vec![],
+            with_ordinality: false,
+            json_path: None,
+        }
+    );
+
     assert_eq!(
         bigquery()
             .verified_only_select_with_canonical(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to