This is an automated email from the ASF dual-hosted git repository.

ozankabak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b74c2d1f8 Improved ergonomy for `CREATE EXTERNAL TABLE OPTIONS`: 
Don't require quotations for simple namespaced keys like `foo.bar` (#10483)
5b74c2d1f8 is described below

commit 5b74c2d1f8923b8f4f7cf7a660459a80bd947790
Author: Mehmet Ozan Kabak <[email protected]>
AuthorDate: Mon May 13 21:18:29 2024 +0300

    Improved ergonomy for `CREATE EXTERNAL TABLE OPTIONS`: Don't require 
quotations for simple namespaced keys like `foo.bar` (#10483)
    
    * Don't require quotations for simple namespaced keys like foo.bar
    
    * Add comments clarifying parse error cases for unquoted namespaced keys
---
 datafusion/common/src/config.rs                    | 65 +++++++++-------------
 datafusion/core/src/execution/context/mod.rs       | 24 ++++----
 .../proto/tests/cases/roundtrip_logical_plan.rs    | 18 +++---
 datafusion/sql/src/parser.rs                       | 24 ++++++--
 .../test_files/create_external_table.slt           | 21 +++++--
 .../test_files/tpch/create_tables.slt.part         |  2 +-
 6 files changed, 84 insertions(+), 70 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 0f1d9b8f02..a4f937b6e2 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -130,9 +130,9 @@ macro_rules! config_namespace {
                     $(
                        stringify!($field_name) => self.$field_name.set(rem, 
value),
                     )*
-                    _ => return Err(DataFusionError::Configuration(format!(
+                    _ => return _config_err!(
                         "Config value \"{}\" not found on {}", key, 
stringify!($struct_name)
-                    )))
+                    )
                 }
             }
 
@@ -676,22 +676,17 @@ impl ConfigOptions {
 
     /// Set a configuration option
     pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
-        let (prefix, key) = key.split_once('.').ok_or_else(|| {
-            DataFusionError::Configuration(format!(
-                "could not find config namespace for key \"{key}\"",
-            ))
-        })?;
+        let Some((prefix, key)) = key.split_once('.') else {
+            return _config_err!("could not find config namespace for key 
\"{key}\"");
+        };
 
         if prefix == "datafusion" {
             return ConfigField::set(self, key, value);
         }
 
-        let e = self.extensions.0.get_mut(prefix);
-        let e = e.ok_or_else(|| {
-            DataFusionError::Configuration(format!(
-                "Could not find config namespace \"{prefix}\""
-            ))
-        })?;
+        let Some(e) = self.extensions.0.get_mut(prefix) else {
+            return _config_err!("Could not find config namespace 
\"{prefix}\"");
+        };
         e.0.set(key, value)
     }
 
@@ -1279,22 +1274,17 @@ impl TableOptions {
     ///
     /// A result indicating success or failure in setting the configuration 
option.
     pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
-        let (prefix, _) = key.split_once('.').ok_or_else(|| {
-            DataFusionError::Configuration(format!(
-                "could not find config namespace for key \"{key}\""
-            ))
-        })?;
+        let Some((prefix, _)) = key.split_once('.') else {
+            return _config_err!("could not find config namespace for key 
\"{key}\"");
+        };
 
         if prefix == "format" {
             return ConfigField::set(self, key, value);
         }
 
-        let e = self.extensions.0.get_mut(prefix);
-        let e = e.ok_or_else(|| {
-            DataFusionError::Configuration(format!(
-                "Could not find config namespace \"{prefix}\""
-            ))
-        })?;
+        let Some(e) = self.extensions.0.get_mut(prefix) else {
+            return _config_err!("Could not find config namespace 
\"{prefix}\"");
+        };
         e.0.set(key, value)
     }
 
@@ -1413,19 +1403,19 @@ impl ConfigField for TableParquetOptions {
     fn set(&mut self, key: &str, value: &str) -> Result<()> {
         // Determine if the key is a global, metadata, or column-specific 
setting
         if key.starts_with("metadata::") {
-            let k =
-                match key.split("::").collect::<Vec<_>>()[..] {
-                    [_meta] | [_meta, ""] => return 
Err(DataFusionError::Configuration(
+            let k = match key.split("::").collect::<Vec<_>>()[..] {
+                [_meta] | [_meta, ""] => {
+                    return _config_err!(
                         "Invalid metadata key provided, missing key in 
metadata::<key>"
-                            .to_string(),
-                    )),
-                    [_meta, k] => k.into(),
-                    _ => {
-                        return Err(DataFusionError::Configuration(format!(
+                    )
+                }
+                [_meta, k] => k.into(),
+                _ => {
+                    return _config_err!(
                         "Invalid metadata key provided, found too many '::' in 
\"{key}\""
-                    )))
-                    }
-                };
+                    )
+                }
+            };
             self.key_value_metadata.insert(k, Some(value.into()));
             Ok(())
         } else if key.contains("::") {
@@ -1498,10 +1488,7 @@ macro_rules! config_namespace_with_hashmap {
 
                         inner_value.set(inner_key, value)
                     }
-                    _ => Err(DataFusionError::Configuration(format!(
-                        "Unrecognized key '{}'.",
-                        key
-                    ))),
+                    _ => _config_err!("Unrecognized key '{key}'."),
                 }
             }
 
diff --git a/datafusion/core/src/execution/context/mod.rs 
b/datafusion/core/src/execution/context/mod.rs
index e69a249410..2fc1a19c33 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -23,6 +23,8 @@ use std::ops::ControlFlow;
 use std::sync::{Arc, Weak};
 
 use super::options::ReadOptions;
+#[cfg(feature = "array_expressions")]
+use crate::functions_array;
 use crate::{
     catalog::information_schema::{InformationSchemaProvider, 
INFORMATION_SCHEMA},
     catalog::listing_schema::ListingSchemaProvider,
@@ -53,53 +55,49 @@ use crate::{
     },
     optimizer::analyzer::{Analyzer, AnalyzerRule},
     optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
+    physical_expr::{create_physical_expr, PhysicalExpr},
     physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
     physical_plan::ExecutionPlan,
     physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
     variable::{VarProvider, VarType},
 };
-
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
 use crate::{functions, functions_aggregate};
 
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::Schema;
-use async_trait::async_trait;
-use chrono::{DateTime, Utc};
-use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
     alias::AliasGenerator,
     config::{ConfigExtension, TableOptions},
     exec_err, not_impl_err, plan_datafusion_err, plan_err,
-    tree_node::{TreeNodeRecursion, TreeNodeVisitor},
+    tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
     DFSchema, SchemaReference, TableReference,
 };
 use datafusion_execution::registry::SerializerRegistry;
 use datafusion_expr::{
+    expr_rewriter::FunctionRewrite,
     logical_plan::{DdlStatement, Statement},
+    simplify::SimplifyInfo,
     var_provider::is_system_variables,
     Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
 };
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use datafusion_sql::{
     parser::{CopyToSource, CopyToStatement, DFParser},
     planner::{object_name_to_table_reference, ContextProvider, ParserOptions, 
SqlToRel},
     ResolvedTableReference,
 };
-use parking_lot::RwLock;
 use sqlparser::dialect::dialect_from_str;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use parking_lot::RwLock;
 use url::Url;
 use uuid::Uuid;
 
-use crate::physical_expr::PhysicalExpr;
 pub use datafusion_execution::config::SessionConfig;
 pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr_rewriter::FunctionRewrite;
-use datafusion_expr::simplify::SimplifyInfo;
-use datafusion_optimizer::simplify_expressions::ExprSimplifier;
-use datafusion_physical_expr::create_physical_expr;
 
 mod avro;
 mod csv;
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs 
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index e5e57c0bc8..2927fd01d1 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -15,6 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Formatter};
+use std::sync::Arc;
+use std::vec;
+
 use arrow::array::{ArrayRef, FixedSizeListArray};
 use arrow::datatypes::{
     DataType, Field, Fields, Int32Type, IntervalDayTimeType, 
IntervalMonthDayNanoType,
@@ -24,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
 use datafusion::datasource::TableProvider;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::FunctionRegistry;
 use datafusion::functions_aggregate::covariance::{covar_pop, covar_samp};
 use datafusion::functions_aggregate::expr_fn::first_value;
 use datafusion::prelude::*;
@@ -51,16 +58,11 @@ use datafusion_proto::bytes::{
     logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
 };
 use datafusion_proto::logical_plan::to_proto::serialize_expr;
-use datafusion_proto::logical_plan::LogicalExtensionCodec;
-use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
+use datafusion_proto::logical_plan::{
+    from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+};
 use datafusion_proto::protobuf;
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt::{self, Debug, Formatter};
-use std::sync::Arc;
-use std::vec;
 
-use datafusion::execution::FunctionRegistry;
 use prost::Message;
 
 #[cfg(feature = "json")]
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index f61c9cda63..d09317271d 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -462,7 +462,21 @@ impl<'a> DFParser<'a> {
     pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
         let next_token = self.parser.next_token();
         match next_token.token {
-            Token::Word(Word { value, .. }) => Ok(value),
+            Token::Word(Word { value, .. }) => {
+                let mut parts = vec![value];
+                while self.parser.consume_token(&Token::Period) {
+                    let next_token = self.parser.next_token();
+                    if let Token::Word(Word { value, .. }) = next_token.token {
+                        parts.push(value);
+                    } else {
+                        // Unquoted namespaced keys have to conform to the 
syntax
+                        // "<WORD>[\.<WORD>]*". If we have a key that breaks 
this
+                        // pattern, error out:
+                        return self.parser.expected("key name", next_token);
+                    }
+                }
+                Ok(parts.join("."))
+            }
             Token::SingleQuotedString(s) => Ok(s),
             Token::DoubleQuotedString(s) => Ok(s),
             Token::EscapedStringLiteral(s) => Ok(s),
@@ -712,15 +726,15 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            return parser_err!("WITH HEADER ROW clause is no 
longer in use. Please use the OPTIONS clause with 'format.has_header' set 
appropriately, e.g., OPTIONS ('format.has_header' 'true')");
+                            return parser_err!("WITH HEADER ROW clause is no 
longer in use. Please use the OPTIONS clause with 'format.has_header' set 
appropriately, e.g., OPTIONS (format.has_header true)");
                         }
                     }
                     Keyword::DELIMITER => {
-                        return parser_err!("DELIMITER clause is no longer in 
use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, 
e.g., OPTIONS ('format.delimiter' ',')");
+                        return parser_err!("DELIMITER clause is no longer in 
use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, 
e.g., OPTIONS (format.delimiter ',')");
                     }
                     Keyword::COMPRESSION => {
                         self.parser.expect_keyword(Keyword::TYPE)?;
-                        return parser_err!("COMPRESSION TYPE clause is no 
longer in use. Please use the OPTIONS clause with 'format.compression' set 
appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
+                        return parser_err!("COMPRESSION TYPE clause is no 
longer in use. Please use the OPTIONS clause with 'format.compression' set 
appropriately, e.g., OPTIONS (format.compression gzip)");
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -933,7 +947,7 @@ mod tests {
         expect_parse_ok(sql, expected)?;
 
         // positive case with delimiter
-        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 
'foo.csv' OPTIONS ('format.delimiter' '|')";
+        let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 
'foo.csv' OPTIONS (format.delimiter '|')";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
             name: "t".into(),
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt 
b/datafusion/sqllogictest/test_files/create_external_table.slt
index fca177bb61..607c909fd6 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -190,8 +190,8 @@ LOCATION 
'test_files/scratch/create_external_table/manual_partitioning/';
 statement error DataFusion error: Error during planning: Option 
format.delimiter is specified multiple times
 CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
     'format.delimiter' '*',
-    'format.has_header' 'true', 
-    'format.delimiter' '|') 
+    'format.has_header' 'true',
+    'format.delimiter' '|')
 LOCATION 'foo.csv';
 
 # If a config does not belong to any namespace, we assume it is a 'format' 
option and apply the 'format' prefix for backwards compatibility.
@@ -201,7 +201,20 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
         r_name VARCHAR,
         r_comment VARCHAR,
         r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' 
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
 OPTIONS (
         'format.delimiter' '|',
-        'has_header' 'false');
\ No newline at end of file
+        'has_header' 'false');
+
+# Verify that we do not need quotations for simple namespaced keys.
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+        r_regionkey BIGINT,
+        r_name VARCHAR,
+        r_comment VARCHAR,
+        r_rev VARCHAR,
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
+OPTIONS (
+        format.delimiter '|',
+        has_header false,
+        compression gzip);
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part 
b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 111d247730..75bcbc198b 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
         r_name VARCHAR,
         r_comment VARCHAR,
         r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS 
('format.delimiter' '|');
\ No newline at end of file
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS 
('format.delimiter' '|');


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to