This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new dd7ba72a Add support of the ENUM8|ENUM16 for ClickHouse dialect (#1574)
dd7ba72a is described below

commit dd7ba72a0b2cd24e352b6078bed8edf1ad1253c4
Author: hulk <[email protected]>
AuthorDate: Thu Dec 5 22:59:07 2024 +0800

    Add support of the ENUM8|ENUM16 for ClickHouse dialect (#1574)
---
 src/ast/data_type.rs      | 32 ++++++++++++++---
 src/ast/mod.rs            |  2 +-
 src/keywords.rs           |  2 ++
 src/parser/mod.rs         | 91 ++++++++++++++++++++++++++++-------------------
 tests/sqlparser_common.rs | 87 ++++++++++++++++++++++++++++++++++++++++++--
 tests/sqlparser_mysql.rs  | 14 +++++---
 6 files changed, 179 insertions(+), 49 deletions(-)

diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs
index ccca7f4c..5b0239e1 100644
--- a/src/ast/data_type.rs
+++ b/src/ast/data_type.rs
@@ -25,10 +25,21 @@ use serde::{Deserialize, Serialize};
 #[cfg(feature = "visitor")]
 use sqlparser_derive::{Visit, VisitMut};
 
-use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
+use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, 
UnionField};
 
 use super::{value::escape_single_quote_string, ColumnDef};
 
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum EnumMember {
+    Name(String),
+    /// ClickHouse allows to specify an integer value for each enum value.
+    ///
+    /// 
[clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
+    NamedValue(String, Expr),
+}
+
 /// SQL data types
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -334,7 +345,7 @@ pub enum DataType {
     /// [clickhouse]: 
https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
     Nested(Vec<ColumnDef>),
     /// Enums
-    Enum(Vec<String>),
+    Enum(Vec<EnumMember>, Option<u8>),
     /// Set
     Set(Vec<String>),
     /// Struct
@@ -546,13 +557,24 @@ impl fmt::Display for DataType {
                     write!(f, "{}({})", ty, modifiers.join(", "))
                 }
             }
-            DataType::Enum(vals) => {
-                write!(f, "ENUM(")?;
+            DataType::Enum(vals, bits) => {
+                match bits {
+                    Some(bits) => write!(f, "ENUM{}", bits),
+                    None => write!(f, "ENUM"),
+                }?;
+                write!(f, "(")?;
                 for (i, v) in vals.iter().enumerate() {
                     if i != 0 {
                         write!(f, ", ")?;
                     }
-                    write!(f, "'{}'", escape_single_quote_string(v))?;
+                    match v {
+                        EnumMember::Name(name) => {
+                            write!(f, "'{}'", 
escape_single_quote_string(name))?
+                        }
+                        EnumMember::NamedValue(name, value) => {
+                            write!(f, "'{}' = {}", 
escape_single_quote_string(name), value)?
+                        }
+                    }
                 }
                 write!(f, ")")
             }
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 326375b5..f782b363 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -40,7 +40,7 @@ use sqlparser_derive::{Visit, VisitMut};
 use crate::tokenizer::Span;
 
 pub use self::data_type::{
-    ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, 
ExactNumberInfo,
+    ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, 
ExactNumberInfo,
     StructBracketKind, TimezoneInfo,
 };
 pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, 
SetConfigValue, Use};
diff --git a/src/keywords.rs b/src/keywords.rs
index e00e26a6..be3910f8 100644
--- a/src/keywords.rs
+++ b/src/keywords.rs
@@ -286,6 +286,8 @@ define_keywords!(
     ENFORCED,
     ENGINE,
     ENUM,
+    ENUM16,
+    ENUM8,
     EPHEMERAL,
     EPOCH,
     EQUALS,
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 7b175f1d..04a103c6 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1049,18 +1049,18 @@ impl<'a> Parser<'a> {
             | Keyword::CURRENT_USER
             | Keyword::SESSION_USER
             | Keyword::USER
-                if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
-            {
-                Ok(Some(Expr::Function(Function {
-                    name: ObjectName(vec![w.to_ident(w_span)]),
-                    parameters: FunctionArguments::None,
-                    args: FunctionArguments::None,
-                    null_treatment: None,
-                    filter: None,
-                    over: None,
-                    within_group: vec![],
-                })))
-            }
+            if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
+                {
+                    Ok(Some(Expr::Function(Function {
+                        name: ObjectName(vec![w.to_ident(w_span)]),
+                        parameters: FunctionArguments::None,
+                        args: FunctionArguments::None,
+                        null_treatment: None,
+                        filter: None,
+                        over: None,
+                        within_group: vec![],
+                    })))
+                }
             Keyword::CURRENT_TIMESTAMP
             | Keyword::CURRENT_TIME
             | Keyword::CURRENT_DATE
@@ -1075,18 +1075,18 @@ impl<'a> Parser<'a> {
             Keyword::TRY_CAST => 
Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
             Keyword::SAFE_CAST => 
Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
             Keyword::EXISTS
-                // Support parsing Databricks has a function named `exists`.
-                if !dialect_of!(self is DatabricksDialect)
-                    || matches!(
+            // Support parsing Databricks has a function named `exists`.
+            if !dialect_of!(self is DatabricksDialect)
+                || matches!(
                         self.peek_nth_token(1).token,
                         Token::Word(Word {
                             keyword: Keyword::SELECT | Keyword::WITH,
                             ..
                         })
                     ) =>
-            {
-                Ok(Some(self.parse_exists_expr(false)?))
-            }
+                {
+                    Ok(Some(self.parse_exists_expr(false)?))
+                }
             Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
             Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
             Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@@ -1103,22 +1103,22 @@ impl<'a> Parser<'a> {
                 Ok(Some(self.parse_array_expr(true)?))
             }
             Keyword::ARRAY
-                if self.peek_token() == Token::LParen
-                    && !dialect_of!(self is ClickHouseDialect | 
DatabricksDialect) =>
-            {
-                self.expect_token(&Token::LParen)?;
-                let query = self.parse_query()?;
-                self.expect_token(&Token::RParen)?;
-                Ok(Some(Expr::Function(Function {
-                    name: ObjectName(vec![w.to_ident(w_span)]),
-                    parameters: FunctionArguments::None,
-                    args: FunctionArguments::Subquery(query),
-                    filter: None,
-                    null_treatment: None,
-                    over: None,
-                    within_group: vec![],
-                })))
-            }
+            if self.peek_token() == Token::LParen
+                && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) 
=>
+                {
+                    self.expect_token(&Token::LParen)?;
+                    let query = self.parse_query()?;
+                    self.expect_token(&Token::RParen)?;
+                    Ok(Some(Expr::Function(Function {
+                        name: ObjectName(vec![w.to_ident(w_span)]),
+                        parameters: FunctionArguments::None,
+                        args: FunctionArguments::Subquery(query),
+                        filter: None,
+                        null_treatment: None,
+                        over: None,
+                        within_group: vec![],
+                    })))
+                }
             Keyword::NOT => Ok(Some(self.parse_not()?)),
             Keyword::MATCH if dialect_of!(self is MySqlDialect | 
GenericDialect) => {
                 Ok(Some(self.parse_match_against()?))
@@ -5023,7 +5023,7 @@ impl<'a> Parser<'a> {
                         return Err(ParserError::ParserError(format!("Expected: 
CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
                     }
                 }
-            },
+            }
         };
         Ok(owner)
     }
@@ -7997,6 +7997,23 @@ impl<'a> Parser<'a> {
         }
     }
 
+    pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, 
ParserError> {
+        self.expect_token(&Token::LParen)?;
+        let values = self.parse_comma_separated(|parser| {
+            let name = parser.parse_literal_string()?;
+            let e = if parser.consume_token(&Token::Eq) {
+                let value = parser.parse_number()?;
+                EnumMember::NamedValue(name, value)
+            } else {
+                EnumMember::Name(name)
+            };
+            Ok(e)
+        })?;
+        self.expect_token(&Token::RParen)?;
+
+        Ok(values)
+    }
+
     /// Parse a SQL datatype (in the context of a CREATE TABLE statement for 
example)
     pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
         let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -8235,7 +8252,9 @@ impl<'a> Parser<'a> {
                 Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
                     self.parse_exact_number_optional_precision_scale()?,
                 )),
-                Keyword::ENUM => 
Ok(DataType::Enum(self.parse_string_values()?)),
+                Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, 
None)),
+                Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, 
Some(8))),
+                Keyword::ENUM16 => 
Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
                 Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
                 Keyword::ARRAY => {
                     if dialect_of!(self is SnowflakeDialect) {
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index e8022380..61c742da 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -51,6 +51,7 @@ mod test_utils;
 use pretty_assertions::assert_eq;
 use sqlparser::ast::ColumnOption::Comment;
 use sqlparser::ast::Expr::{Identifier, UnaryOp};
+use sqlparser::ast::Value::Number;
 use sqlparser::test_utils::all_dialects_except;
 
 #[test]
@@ -9250,7 +9251,7 @@ fn parse_cache_table() {
             format!(
                 "CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 
'V1', 'K2' = 0.88) {sql}",
             )
-            .as_str()
+                .as_str()
         ),
         Statement::Cache {
             table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@@ -9275,7 +9276,7 @@ fn parse_cache_table() {
             format!(
                 "CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 
'V1', 'K2' = 0.88) AS {sql}",
             )
-            .as_str()
+                .as_str()
         ),
         Statement::Cache {
             table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@@ -11459,7 +11460,7 @@ fn parse_explain_with_option_list() {
             }),
         },
     ];
-    run_explain_analyze (
+    run_explain_analyze(
         all_dialects_where(|d| d.supports_explain_with_utility_options()),
         "EXPLAIN (ANALYZE, VERBOSE true, WAL OFF, FORMAT YAML, USER_DEF_NUM 
-100.1) SELECT sqrt(id) FROM foo",
         false,
@@ -12459,3 +12460,83 @@ fn parse_create_table_with_bit_types() {
         _ => unreachable!(),
     }
 }
+
+#[test]
+fn parse_create_table_with_enum_types() {
+    let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 
1, 'b' = 2), baz ENUM('a', 'b'))";
+    match all_dialects().verified_stmt(sql) {
+        Statement::CreateTable(CreateTable { name, columns, .. }) => {
+            assert_eq!(name.to_string(), "t0");
+            assert_eq!(
+                vec![
+                    ColumnDef {
+                        name: Ident::new("foo"),
+                        data_type: DataType::Enum(
+                            vec![
+                                EnumMember::NamedValue(
+                                    "a".to_string(),
+                                    Expr::Value(Number("1".parse().unwrap(), 
false))
+                                ),
+                                EnumMember::NamedValue(
+                                    "b".to_string(),
+                                    Expr::Value(Number("2".parse().unwrap(), 
false))
+                                )
+                            ],
+                            Some(8)
+                        ),
+                        collation: None,
+                        options: vec![],
+                    },
+                    ColumnDef {
+                        name: Ident::new("bar"),
+                        data_type: DataType::Enum(
+                            vec![
+                                EnumMember::NamedValue(
+                                    "a".to_string(),
+                                    Expr::Value(Number("1".parse().unwrap(), 
false))
+                                ),
+                                EnumMember::NamedValue(
+                                    "b".to_string(),
+                                    Expr::Value(Number("2".parse().unwrap(), 
false))
+                                )
+                            ],
+                            Some(16)
+                        ),
+                        collation: None,
+                        options: vec![],
+                    },
+                    ColumnDef {
+                        name: Ident::new("baz"),
+                        data_type: DataType::Enum(
+                            vec![
+                                EnumMember::Name("a".to_string()),
+                                EnumMember::Name("b".to_string())
+                            ],
+                            None
+                        ),
+                        collation: None,
+                        options: vec![],
+                    }
+                ],
+                columns
+            );
+        }
+        _ => unreachable!(),
+    }
+
+    // invalid case missing value for enum pair
+    assert_eq!(
+        all_dialects()
+            .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 
))")
+            .unwrap_err(),
+        ParserError::ParserError("Expected: a value, found: )".to_string())
+    );
+
+    // invalid case that name is not a string
+    assert_eq!(
+        all_dialects()
+            .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
+            .unwrap_err(),
+        ParserError::ParserError("Expected: literal string, found: 
2".to_string())
+    );
+}
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index f7a21f99..cac1af85 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -685,7 +685,7 @@ fn table_constraint_unique_primary_ctor(
 #[test]
 fn parse_create_table_primary_and_unique_key() {
     let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
-        .map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY 
AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
+        .map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY 
AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
 
     let index_type_display = [Some(KeyOrIndexDisplay::Key), None];
 
@@ -753,7 +753,7 @@ fn parse_create_table_primary_and_unique_key() {
 #[test]
 fn parse_create_table_primary_and_unique_key_with_index_options() {
     let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
-        .map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT 
constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE 
COMMENT 'MySQL allows')"));
+        .map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT 
constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE 
COMMENT 'MySQL allows')"));
 
     let index_type_display = [Some(KeyOrIndexDisplay::Index), None];
 
@@ -827,7 +827,7 @@ fn 
parse_create_table_primary_and_unique_key_with_index_type() {
 #[test]
 fn parse_create_table_primary_and_unique_key_characteristic_test() {
     let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
-        .map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr 
{key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
+        .map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr 
{key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
     for sql in &sqls {
         mysql_and_generic().verified_stmt(sql);
     }
@@ -890,7 +890,13 @@ fn parse_create_table_set_enum() {
                     },
                     ColumnDef {
                         name: Ident::new("baz"),
-                        data_type: DataType::Enum(vec!["a".to_string(), 
"b".to_string()]),
+                        data_type: DataType::Enum(
+                            vec![
+                                EnumMember::Name("a".to_string()),
+                                EnumMember::Name("b".to_string())
+                            ],
+                            None
+                        ),
                         collation: None,
                         options: vec![],
                     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to