This is an automated email from the ASF dual-hosted git repository.
iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new e9ab4d6b Fix BigQuery hyphenated ObjectName with numbers (#1598)
e9ab4d6b is described below
commit e9ab4d6b94a81d4ed3e402750a5faf3860892c23
Author: Ayman Elkfrawy <[email protected]>
AuthorDate: Wed Dec 18 12:12:09 2024 -0800
Fix BigQuery hyphenated ObjectName with numbers (#1598)
---
src/parser/mod.rs | 4 +++-
src/tokenizer.rs | 45 ++++++++++++++++++++++++++++++++++++++-------
tests/sqlparser_bigquery.rs | 20 ++++++++++++++++++++
3 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 94d63cf8..c0aa0acb 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -8755,7 +8755,9 @@ impl<'a> Parser<'a> {
}
Token::Number(s, false) if s.chars().all(|c|
c.is_ascii_digit()) => {
ident.value.push_str(&s);
- true
+ // If next token is period, then it is part of
an ObjectName and we don't expect whitespace
+ // after the number.
+ !matches!(self.peek_token().token,
Token::Period)
}
_ => {
return self
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9269f4fe..3c2f70ed 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1144,15 +1144,29 @@ impl<'a> Tokenizer<'a> {
// match one period
if let Some('.') = chars.peek() {
- s.push('.');
- chars.next();
+ // Check if this actually is a float point number
+ let mut char_clone = chars.peekable.clone();
+ char_clone.next();
+ // Next char should be a digit, otherwise, it is not a
float point number
+ if char_clone
+ .peek()
+ .map(|c| c.is_ascii_digit())
+ .unwrap_or(false)
+ {
+ s.push('.');
+ chars.next();
+ } else if !s.is_empty() {
+ // Number might be part of period separated
construct. Keep the period for next token
+ // e.g. a-12.b
+ return Ok(Some(Token::Number(s, false)));
+ } else {
+ // No number -> Token::Period
+ chars.next();
+ return Ok(Some(Token::Period));
+ }
}
- s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
- // No number -> Token::Period
- if s == "." {
- return Ok(Some(Token::Period));
- }
+ s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
let mut exponent_part = String::new();
// Parse exponent as number
@@ -2185,6 +2199,23 @@ mod tests {
compare(expected, tokens);
}
+ #[test]
+ fn tokenize_select_float_hyphenated_identifier() {
+ let sql = String::from("SELECT a-12.b");
+ let dialect = GenericDialect {};
+ let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+ let expected = vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::make_word("a", None),
+ Token::Minus,
+ Token::Number(String::from("12"), false),
+ Token::Period,
+ Token::make_word("b", None),
+ ];
+ compare(expected, tokens);
+ }
+
#[test]
fn tokenize_clickhouse_double_equal() {
let sql = String::from("SELECT foo=='1'");
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 0311eba1..c8173759 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -1504,6 +1504,26 @@ fn parse_hyphenated_table_identifiers() {
"SELECT * FROM foo-bar AS f JOIN baz-qux AS b ON f.id = b.id",
);
+ assert_eq!(
+ bigquery()
+ .verified_only_select_with_canonical(
+ "select * from foo-123.bar",
+ "SELECT * FROM foo-123.bar"
+ )
+ .from[0]
+ .relation,
+ TableFactor::Table {
+ name: ObjectName(vec![Ident::new("foo-123"), Ident::new("bar")]),
+ alias: None,
+ args: None,
+ with_hints: vec![],
+ version: None,
+ partitions: vec![],
+ with_ordinality: false,
+ json_path: None,
+ }
+ );
+
assert_eq!(
bigquery()
.verified_only_select_with_canonical(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]