This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 7980c866 BigQuery: Fix column identifier reserved keywords list (#1678)
7980c866 is described below

commit 7980c866a3824af7e1937ffda274657b5dbae99d
Author: Ifeanyi Ubah <ifea...@validio.io>
AuthorDate: Wed Jan 29 12:03:55 2025 +0100

    BigQuery: Fix column identifier reserved keywords list (#1678)
---
 src/dialect/bigquery.rs     | 26 ++++++++++++++++++
 src/dialect/mod.rs          | 12 ++++++---
 src/parser/mod.rs           | 65 ++++++++++++++++++++++++++++++++-------------
 tests/sqlparser_bigquery.rs |  9 +++++++
 tests/sqlparser_common.rs   | 42 ++++++++++++++++++++---------
 5 files changed, 120 insertions(+), 34 deletions(-)

diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs
index b4575421..bb1a0d5c 100644
--- a/src/dialect/bigquery.rs
+++ b/src/dialect/bigquery.rs
@@ -16,6 +16,28 @@
 // under the License.
 
 use crate::dialect::Dialect;
+use crate::keywords::Keyword;
+use crate::parser::Parser;
+
+/// These keywords are disallowed as column identifiers. Such that
+/// `SELECT 5 AS <col> FROM T` is rejected by BigQuery.
+const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
+    Keyword::WITH,
+    Keyword::SELECT,
+    Keyword::WHERE,
+    Keyword::GROUP,
+    Keyword::HAVING,
+    Keyword::ORDER,
+    Keyword::LATERAL,
+    Keyword::LIMIT,
+    Keyword::FETCH,
+    Keyword::UNION,
+    Keyword::EXCEPT,
+    Keyword::INTERSECT,
+    Keyword::FROM,
+    Keyword::INTO,
+    Keyword::END,
+];
 
 /// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/)
 #[derive(Debug, Default)]
@@ -92,4 +114,8 @@ impl Dialect for BigQueryDialect {
     fn supports_timestamp_versioning(&self) -> bool {
         true
     }
+
+    fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
+        !RESERVED_FOR_COLUMN_ALIAS.contains(kw)
+    }
 }
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 817f5f32..b648869d 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -804,7 +804,7 @@ pub trait Dialect: Debug + Any {
         keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
     }
 
-    // Returns reserved keywords when looking to parse a [TableFactor].
+    /// Returns reserved keywords when looking to parse a `TableFactor`.
     /// See [Self::supports_from_trailing_commas]
     fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
         keywords::RESERVED_FOR_TABLE_FACTOR
@@ -844,11 +844,17 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns true if the specified keyword should be parsed as a column 
identifier.
+    /// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
+    fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
+        !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
+    }
+
     /// Returns true if the specified keyword should be parsed as a select 
item alias.
     /// When explicit is true, the keyword is preceded by an `AS` word. Parser 
is provided
     /// to enable looking ahead if needed.
-    fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut 
Parser) -> bool {
-        explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
+    fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut 
Parser) -> bool {
+        explicit || self.is_column_alias(kw, parser)
     }
 
     /// Returns true if the specified keyword should be parsed as a table 
factor alias.
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 179c120b..b0936007 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3951,7 +3951,7 @@ impl<'a> Parser<'a> {
         self.parse_comma_separated_with_trailing_commas(
             |p| p.parse_select_item(),
             trailing_commas,
-            None,
+            Self::is_reserved_for_column_alias,
         )
     }
 
@@ -3985,30 +3985,42 @@ impl<'a> Parser<'a> {
         self.parse_comma_separated_with_trailing_commas(
             Parser::parse_table_and_joins,
             trailing_commas,
-            Some(self.dialect.get_reserved_keywords_for_table_factor()),
+            |kw, _parser| {
+                self.dialect
+                    .get_reserved_keywords_for_table_factor()
+                    .contains(kw)
+            },
         )
     }
 
     /// Parse the comma of a comma-separated syntax element.
+    /// `R` is a predicate that should return true if the next
+    /// keyword is a reserved keyword.
     /// Allows for control over trailing commas
+    ///
     /// Returns true if there is a next element
-    fn is_parse_comma_separated_end_with_trailing_commas(
+    fn is_parse_comma_separated_end_with_trailing_commas<R>(
         &mut self,
         trailing_commas: bool,
-        reserved_keywords: Option<&[Keyword]>,
-    ) -> bool {
-        let reserved_keywords = 
reserved_keywords.unwrap_or(keywords::RESERVED_FOR_COLUMN_ALIAS);
+        is_reserved_keyword: &R,
+    ) -> bool
+    where
+        R: Fn(&Keyword, &mut Parser) -> bool,
+    {
         if !self.consume_token(&Token::Comma) {
             true
         } else if trailing_commas {
-            let token = self.peek_token().token;
-            match token {
-                Token::Word(ref kw) if reserved_keywords.contains(&kw.keyword) 
=> true,
+            let token = self.next_token().token;
+            let is_end = match token {
+                Token::Word(ref kw) if is_reserved_keyword(&kw.keyword, self) 
=> true,
                 Token::RParen | Token::SemiColon | Token::EOF | 
Token::RBracket | Token::RBrace => {
                     true
                 }
                 _ => false,
-            }
+            };
+            self.prev_token();
+
+            is_end
         } else {
             false
         }
@@ -4017,7 +4029,10 @@ impl<'a> Parser<'a> {
     /// Parse the comma of a comma-separated syntax element.
     /// Returns true if there is a next element
     fn is_parse_comma_separated_end(&mut self) -> bool {
-        
self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas,
 None)
+        self.is_parse_comma_separated_end_with_trailing_commas(
+            self.options.trailing_commas,
+            &Self::is_reserved_for_column_alias,
+        )
     }
 
     /// Parse a comma-separated list of 1+ items accepted by `F`
@@ -4025,26 +4040,33 @@ impl<'a> Parser<'a> {
     where
         F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
     {
-        self.parse_comma_separated_with_trailing_commas(f, 
self.options.trailing_commas, None)
+        self.parse_comma_separated_with_trailing_commas(
+            f,
+            self.options.trailing_commas,
+            Self::is_reserved_for_column_alias,
+        )
     }
 
-    /// Parse a comma-separated list of 1+ items accepted by `F`
-    /// Allows for control over trailing commas
-    fn parse_comma_separated_with_trailing_commas<T, F>(
+    /// Parse a comma-separated list of 1+ items accepted by `F`.
+    /// `R` is a predicate that should return true if the next
+    /// keyword is a reserved keyword.
+    /// Allows for control over trailing commas.
+    fn parse_comma_separated_with_trailing_commas<T, F, R>(
         &mut self,
         mut f: F,
         trailing_commas: bool,
-        reserved_keywords: Option<&[Keyword]>,
+        is_reserved_keyword: R,
     ) -> Result<Vec<T>, ParserError>
     where
         F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
+        R: Fn(&Keyword, &mut Parser) -> bool,
     {
         let mut values = vec![];
         loop {
             values.push(f(self)?);
             if self.is_parse_comma_separated_end_with_trailing_commas(
                 trailing_commas,
-                reserved_keywords,
+                &is_reserved_keyword,
             ) {
                 break;
             }
@@ -4118,6 +4140,13 @@ impl<'a> Parser<'a> {
         self.parse_comma_separated(f)
     }
 
+    /// Default implementation of a predicate that returns true if
+    /// the specified keyword is reserved for column alias.
+    /// See [Dialect::is_column_alias]
+    fn is_reserved_for_column_alias(kw: &Keyword, parser: &mut Parser) -> bool 
{
+        !parser.dialect.is_column_alias(kw, parser)
+    }
+
     /// Run a parser method `f`, reverting back to the current position if 
unsuccessful.
     /// Returns `None` if `f` returns an error
     pub fn maybe_parse<T, F>(&mut self, f: F) -> Result<Option<T>, ParserError>
@@ -9394,7 +9423,7 @@ impl<'a> Parser<'a> {
                 let cols = self.parse_comma_separated_with_trailing_commas(
                     Parser::parse_view_column,
                     self.dialect.supports_column_definition_trailing_commas(),
-                    None,
+                    Self::is_reserved_for_column_alias,
                 )?;
                 self.expect_token(&Token::RParen)?;
                 Ok(cols)
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index 921a37a8..853bffee 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -213,6 +213,15 @@ fn parse_raw_literal() {
     );
 }
 
+#[test]
+fn parse_big_query_non_reserved_column_alias() {
+    let sql = r#"SELECT OFFSET, EXPLAIN, ANALYZE, SORT, TOP, VIEW FROM T"#;
+    bigquery().verified_stmt(sql);
+
+    let sql = r#"SELECT 1 AS OFFSET, 2 AS EXPLAIN, 3 AS ANALYZE FROM T"#;
+    bigquery().verified_stmt(sql);
+}
+
 #[test]
 fn parse_delete_statement() {
     let sql = "DELETE \"table\" WHERE 1";
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 5dd74e1f..119adc7e 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -253,8 +253,13 @@ fn parse_insert_default_values() {
 
 #[test]
 fn parse_insert_select_returning() {
-    verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
-    let stmt = verified_stmt("INSERT INTO t SELECT x RETURNING x AS y");
+    // Dialects that support `RETURNING` as a column identifier do
+    // not support this syntax.
+    let dialects =
+        all_dialects_where(|d| !d.is_column_alias(&Keyword::RETURNING, &mut 
Parser::new(d)));
+
+    dialects.verified_stmt("INSERT INTO t SELECT 1 RETURNING 2");
+    let stmt = dialects.verified_stmt("INSERT INTO t SELECT x RETURNING x AS 
y");
     match stmt {
         Statement::Insert(Insert {
             returning: Some(ret),
@@ -6993,9 +6998,6 @@ fn parse_union_except_intersect_minus() {
     verified_stmt("SELECT 1 EXCEPT SELECT 2");
     verified_stmt("SELECT 1 EXCEPT ALL SELECT 2");
     verified_stmt("SELECT 1 EXCEPT DISTINCT SELECT 1");
-    verified_stmt("SELECT 1 MINUS SELECT 2");
-    verified_stmt("SELECT 1 MINUS ALL SELECT 2");
-    verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
     verified_stmt("SELECT 1 INTERSECT SELECT 2");
     verified_stmt("SELECT 1 INTERSECT ALL SELECT 2");
     verified_stmt("SELECT 1 INTERSECT DISTINCT SELECT 1");
@@ -7014,6 +7016,13 @@ fn parse_union_except_intersect_minus() {
     verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS 
x");
     verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 
8 AS x");
     verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 
AS y, 8 AS x");
+
+    // Dialects that support `MINUS` as column identifier
+    // do not support `MINUS` as a set operator.
+    let dialects = all_dialects_where(|d| !d.is_column_alias(&Keyword::MINUS, 
&mut Parser::new(d)));
+    dialects.verified_stmt("SELECT 1 MINUS SELECT 2");
+    dialects.verified_stmt("SELECT 1 MINUS ALL SELECT 2");
+    dialects.verified_stmt("SELECT 1 MINUS DISTINCT SELECT 1");
 }
 
 #[test]
@@ -7690,19 +7699,26 @@ fn parse_invalid_subquery_without_parens() {
 
 #[test]
 fn parse_offset() {
+    // Dialects that support `OFFSET` as column identifiers
+    // don't support this syntax.
+    let dialects =
+        all_dialects_where(|d| !d.is_column_alias(&Keyword::OFFSET, &mut 
Parser::new(d)));
+
     let expect = Some(Offset {
         value: Expr::Value(number("2")),
         rows: OffsetRows::Rows,
     });
-    let ast = verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
+    let ast = dialects.verified_query("SELECT foo FROM bar OFFSET 2 ROWS");
     assert_eq!(ast.offset, expect);
-    let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 OFFSET 2 
ROWS");
+    let ast = dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 
OFFSET 2 ROWS");
     assert_eq!(ast.offset, expect);
-    let ast = verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 2 ROWS");
+    let ast = dialects.verified_query("SELECT foo FROM bar ORDER BY baz OFFSET 
2 ROWS");
     assert_eq!(ast.offset, expect);
-    let ast = verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY baz 
OFFSET 2 ROWS");
+    let ast =
+        dialects.verified_query("SELECT foo FROM bar WHERE foo = 4 ORDER BY 
baz OFFSET 2 ROWS");
     assert_eq!(ast.offset, expect);
-    let ast = verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 
ROWS) OFFSET 2 ROWS");
+    let ast =
+        dialects.verified_query("SELECT foo FROM (SELECT * FROM bar OFFSET 2 
ROWS) OFFSET 2 ROWS");
     assert_eq!(ast.offset, expect);
     match *ast.body {
         SetExpr::Select(s) => match only(s.from).relation {
@@ -7713,7 +7729,7 @@ fn parse_offset() {
         },
         _ => panic!("Test broke"),
     }
-    let ast = verified_query("SELECT 'foo' OFFSET 0 ROWS");
+    let ast = dialects.verified_query("SELECT 'foo' OFFSET 0 ROWS");
     assert_eq!(
         ast.offset,
         Some(Offset {
@@ -7721,7 +7737,7 @@ fn parse_offset() {
             rows: OffsetRows::Rows,
         })
     );
-    let ast = verified_query("SELECT 'foo' OFFSET 1 ROW");
+    let ast = dialects.verified_query("SELECT 'foo' OFFSET 1 ROW");
     assert_eq!(
         ast.offset,
         Some(Offset {
@@ -7729,7 +7745,7 @@ fn parse_offset() {
             rows: OffsetRows::Row,
         })
     );
-    let ast = verified_query("SELECT 'foo' OFFSET 1");
+    let ast = dialects.verified_query("SELECT 'foo' OFFSET 1");
     assert_eq!(
         ast.offset,
         Some(Offset {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to