kosiew commented on code in PR #22840:
URL: https://github.com/apache/datafusion/pull/22840#discussion_r3380381545
##########
datafusion/common/src/config.rs:
##########
@@ -323,87 +323,205 @@ config_namespace! {
}
}
-/// This is the SQL dialect used by DataFusion's parser.
-/// This mirrors
[sqlparser::dialect::Dialect](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html)
-/// trait in order to offer an easier API and avoid adding the `sqlparser`
dependency
-#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
-pub enum Dialect {
- #[default]
- Generic,
- MySQL,
- PostgreSQL,
- Hive,
- SQLite,
- Snowflake,
- Redshift,
- MsSQL,
- ClickHouse,
- BigQuery,
- Ansi,
- DuckDB,
- Databricks,
- Spark,
+/// Metadata for a SQL dialect supported by DataFusion configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub struct DialectInfo {
+ pub dialect: Dialect,
+ pub canonical_name: &'static str,
+ pub display_name: &'static str,
+ pub aliases: &'static [&'static str],
+}
+
+// Keep this key in sync with the `SqlParserOptions::dialect` config path.
+const SQL_PARSER_DIALECT_CONFIG_KEY: &str = "datafusion.sql_parser.dialect";
+
+macro_rules! dialect_display_list {
+ ($($display_name:literal),+ $(,)?) => {
+ dialect_display_list!(@acc [] $($display_name),+)
+ };
+ (@acc [$($acc:tt)*] $last:literal) => {
+ concat!($($acc)* $last)
+ };
+ (@acc [$($acc:tt)*] $next:literal, $($rest:literal),+) => {
+ dialect_display_list!(@acc [$($acc)* $next, ", ",] $($rest),+)
+ };
+}
+
+macro_rules! dialect_metadata {
+ (
+ default: $default_variant:ident;
+ $(
+ $variant:ident {
+ canonical_name: $canonical_name:literal,
+ display_name: $display_name:literal,
+ aliases: [$($alias:literal),* $(,)?],
+ }
+ ),+ $(,)?
+ ) => {
+ /// This is the SQL dialect used by DataFusion's parser.
+ /// This mirrors
[sqlparser::dialect::Dialect](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html)
+ /// trait in order to offer an easier API and avoid adding the
`sqlparser` dependency
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+ pub enum Dialect {
+ $($variant,)+
+ }
+
+ impl Default for Dialect {
+ fn default() -> Self {
+ Self::$default_variant
+ }
+ }
+
+ const DIALECT_INFOS: &[DialectInfo] = &[
+ $(
+ DialectInfo {
+ dialect: Dialect::$variant,
+ canonical_name: $canonical_name,
+ display_name: $display_name,
+ aliases: &[$($alias),*],
+ },
+ )+
+ ];
+
+ const AVAILABLE_DIALECTS: &str =
dialect_display_list!($($display_name),+);
+ const DIALECT_CONFIG_DESCRIPTION: &str = concat!(
+ "Configure the SQL dialect used by DataFusion's parser; supported
values include: ",
+ dialect_display_list!($($display_name),+),
+ "."
+ );
+ };
+}
+
+dialect_metadata! {
+ default: Generic;
+ Generic {
+ canonical_name: "generic",
+ display_name: "Generic",
+ aliases: [],
+ },
+ MySQL {
+ canonical_name: "mysql",
+ display_name: "MySQL",
+ aliases: [],
+ },
+ PostgreSQL {
+ canonical_name: "postgresql",
+ display_name: "PostgreSQL",
+ aliases: ["postgres"],
+ },
+ Hive {
+ canonical_name: "hive",
+ display_name: "Hive",
+ aliases: [],
+ },
+ SQLite {
+ canonical_name: "sqlite",
+ display_name: "SQLite",
+ aliases: [],
+ },
+ Snowflake {
+ canonical_name: "snowflake",
+ display_name: "Snowflake",
+ aliases: [],
+ },
+ Redshift {
+ canonical_name: "redshift",
+ display_name: "Redshift",
+ aliases: [],
+ },
+ MsSQL {
+ canonical_name: "mssql",
+ display_name: "MsSQL",
+ aliases: [],
+ },
+ ClickHouse {
+ canonical_name: "clickhouse",
+ display_name: "ClickHouse",
+ aliases: [],
+ },
+ BigQuery {
+ canonical_name: "bigquery",
+ display_name: "BigQuery",
+ aliases: [],
+ },
+ Ansi {
+ canonical_name: "ansi",
+ display_name: "Ansi",
+ aliases: [],
+ },
+ DuckDB {
+ canonical_name: "duckdb",
+ display_name: "DuckDB",
+ aliases: [],
+ },
+ Databricks {
+ canonical_name: "databricks",
+ display_name: "Databricks",
+ aliases: [],
+ },
+ Spark {
+ canonical_name: "spark",
+ display_name: "Spark",
+ aliases: ["sparksql"],
+ },
}
impl Dialect {
- /// List of all supported dialect names, for use in error messages.
- pub const AVAILABLE: &'static str = "Generic, MySQL, PostgreSQL, Hive,
SQLite, Snowflake, Redshift, \
- MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks, Spark";
+ /// Return metadata for all supported dialects.
+ pub fn metadata() -> &'static [DialectInfo] {
+ DIALECT_INFOS
+ }
+
+ /// Return all supported dialect names, for use in error messages.
+ pub fn available() -> &'static str {
+ AVAILABLE_DIALECTS
+ }
+
+ fn info(&self) -> &'static DialectInfo {
+ DIALECT_INFOS
+ .iter()
+ .find(|info| info.dialect == *self)
+ .expect("all Dialect variants are listed in DIALECT_INFOS")
+ }
}
impl AsRef<str> for Dialect {
fn as_ref(&self) -> &str {
- match self {
- Self::Generic => "generic",
- Self::MySQL => "mysql",
- Self::PostgreSQL => "postgresql",
- Self::Hive => "hive",
- Self::SQLite => "sqlite",
- Self::Snowflake => "snowflake",
- Self::Redshift => "redshift",
- Self::MsSQL => "mssql",
- Self::ClickHouse => "clickhouse",
- Self::BigQuery => "bigquery",
- Self::Ansi => "ansi",
- Self::DuckDB => "duckdb",
- Self::Databricks => "databricks",
- Self::Spark => "spark",
- }
+ self.info().canonical_name
}
}
impl FromStr for Dialect {
type Err = DataFusionError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
- let value = match s.to_ascii_lowercase().as_str() {
- "generic" => Self::Generic,
- "mysql" => Self::MySQL,
- "postgresql" | "postgres" => Self::PostgreSQL,
- "hive" => Self::Hive,
- "sqlite" => Self::SQLite,
- "snowflake" => Self::Snowflake,
- "redshift" => Self::Redshift,
- "mssql" => Self::MsSQL,
- "clickhouse" => Self::ClickHouse,
- "bigquery" => Self::BigQuery,
- "ansi" => Self::Ansi,
- "duckdb" => Self::DuckDB,
- "databricks" => Self::Databricks,
- "spark" | "sparksql" => Self::Spark,
- other => {
- return Err(DataFusionError::Configuration(format!(
- "Invalid Dialect: {other}. Expected one of: {}",
- Self::AVAILABLE
- )));
- }
- };
- Ok(value)
+ DIALECT_INFOS
Review Comment:
Nice improvement. One small readability suggestion: this could be written
with an early-return loop instead of `find(...).map(...).ok_or_else(...)`. The
behavior stays the same, but it keeps the happy path front and center and
avoids some of the nested closure flow.
```rust
for info in DIALECT_INFOS {
if info.canonical_name.eq_ignore_ascii_case(s)
|| info.aliases.iter().any(|alias| alias.eq_ignore_ascii_case(s))
{
return Ok(info.dialect);
}
}
Err(DataFusionError::Configuration(format!(
"Invalid Dialect: {s}. Expected one of: {}",
Self::available()
)))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]