This is an automated email from the ASF dual-hosted git repository.
ozankabak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5b74c2d1f8 Improved ergonomy for `CREATE EXTERNAL TABLE OPTIONS`:
Don't require quotations for simple namespaced keys like `foo.bar` (#10483)
5b74c2d1f8 is described below
commit 5b74c2d1f8923b8f4f7cf7a660459a80bd947790
Author: Mehmet Ozan Kabak <[email protected]>
AuthorDate: Mon May 13 21:18:29 2024 +0300
Improved ergonomy for `CREATE EXTERNAL TABLE OPTIONS`: Don't require
quotations for simple namespaced keys like `foo.bar` (#10483)
* Don't require quotations for simple namespaced keys like foo.bar
* Add comments clarifying parse error cases for unquoted namespaced keys
---
datafusion/common/src/config.rs | 65 +++++++++-------------
datafusion/core/src/execution/context/mod.rs | 24 ++++----
.../proto/tests/cases/roundtrip_logical_plan.rs | 18 +++---
datafusion/sql/src/parser.rs | 24 ++++++--
.../test_files/create_external_table.slt | 21 +++++--
.../test_files/tpch/create_tables.slt.part | 2 +-
6 files changed, 84 insertions(+), 70 deletions(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 0f1d9b8f02..a4f937b6e2 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -130,9 +130,9 @@ macro_rules! config_namespace {
$(
stringify!($field_name) => self.$field_name.set(rem,
value),
)*
- _ => return Err(DataFusionError::Configuration(format!(
+ _ => return _config_err!(
"Config value \"{}\" not found on {}", key,
stringify!($struct_name)
- )))
+ )
}
}
@@ -676,22 +676,17 @@ impl ConfigOptions {
/// Set a configuration option
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
- let (prefix, key) = key.split_once('.').ok_or_else(|| {
- DataFusionError::Configuration(format!(
- "could not find config namespace for key \"{key}\"",
- ))
- })?;
+ let Some((prefix, key)) = key.split_once('.') else {
+ return _config_err!("could not find config namespace for key
\"{key}\"");
+ };
if prefix == "datafusion" {
return ConfigField::set(self, key, value);
}
- let e = self.extensions.0.get_mut(prefix);
- let e = e.ok_or_else(|| {
- DataFusionError::Configuration(format!(
- "Could not find config namespace \"{prefix}\""
- ))
- })?;
+ let Some(e) = self.extensions.0.get_mut(prefix) else {
+ return _config_err!("Could not find config namespace
\"{prefix}\"");
+ };
e.0.set(key, value)
}
@@ -1279,22 +1274,17 @@ impl TableOptions {
///
/// A result indicating success or failure in setting the configuration
option.
pub fn set(&mut self, key: &str, value: &str) -> Result<()> {
- let (prefix, _) = key.split_once('.').ok_or_else(|| {
- DataFusionError::Configuration(format!(
- "could not find config namespace for key \"{key}\""
- ))
- })?;
+ let Some((prefix, _)) = key.split_once('.') else {
+ return _config_err!("could not find config namespace for key
\"{key}\"");
+ };
if prefix == "format" {
return ConfigField::set(self, key, value);
}
- let e = self.extensions.0.get_mut(prefix);
- let e = e.ok_or_else(|| {
- DataFusionError::Configuration(format!(
- "Could not find config namespace \"{prefix}\""
- ))
- })?;
+ let Some(e) = self.extensions.0.get_mut(prefix) else {
+ return _config_err!("Could not find config namespace
\"{prefix}\"");
+ };
e.0.set(key, value)
}
@@ -1413,19 +1403,19 @@ impl ConfigField for TableParquetOptions {
fn set(&mut self, key: &str, value: &str) -> Result<()> {
// Determine if the key is a global, metadata, or column-specific
setting
if key.starts_with("metadata::") {
- let k =
- match key.split("::").collect::<Vec<_>>()[..] {
- [_meta] | [_meta, ""] => return
Err(DataFusionError::Configuration(
+ let k = match key.split("::").collect::<Vec<_>>()[..] {
+ [_meta] | [_meta, ""] => {
+ return _config_err!(
"Invalid metadata key provided, missing key in
metadata::<key>"
- .to_string(),
- )),
- [_meta, k] => k.into(),
- _ => {
- return Err(DataFusionError::Configuration(format!(
+ )
+ }
+ [_meta, k] => k.into(),
+ _ => {
+ return _config_err!(
"Invalid metadata key provided, found too many '::' in
\"{key}\""
- )))
- }
- };
+ )
+ }
+ };
self.key_value_metadata.insert(k, Some(value.into()));
Ok(())
} else if key.contains("::") {
@@ -1498,10 +1488,7 @@ macro_rules! config_namespace_with_hashmap {
inner_value.set(inner_key, value)
}
- _ => Err(DataFusionError::Configuration(format!(
- "Unrecognized key '{}'.",
- key
- ))),
+ _ => _config_err!("Unrecognized key '{key}'."),
}
}
diff --git a/datafusion/core/src/execution/context/mod.rs
b/datafusion/core/src/execution/context/mod.rs
index e69a249410..2fc1a19c33 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -23,6 +23,8 @@ use std::ops::ControlFlow;
use std::sync::{Arc, Weak};
use super::options::ReadOptions;
+#[cfg(feature = "array_expressions")]
+use crate::functions_array;
use crate::{
catalog::information_schema::{InformationSchemaProvider,
INFORMATION_SCHEMA},
catalog::listing_schema::ListingSchemaProvider,
@@ -53,53 +55,49 @@ use crate::{
},
optimizer::analyzer::{Analyzer, AnalyzerRule},
optimizer::optimizer::{Optimizer, OptimizerConfig, OptimizerRule},
+ physical_expr::{create_physical_expr, PhysicalExpr},
physical_optimizer::optimizer::{PhysicalOptimizer, PhysicalOptimizerRule},
physical_plan::ExecutionPlan,
physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner},
variable::{VarProvider, VarType},
};
-
-#[cfg(feature = "array_expressions")]
-use crate::functions_array;
use crate::{functions, functions_aggregate};
use arrow::datatypes::{DataType, SchemaRef};
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
-use async_trait::async_trait;
-use chrono::{DateTime, Utc};
-use datafusion_common::tree_node::TreeNode;
use datafusion_common::{
alias::AliasGenerator,
config::{ConfigExtension, TableOptions},
exec_err, not_impl_err, plan_datafusion_err, plan_err,
- tree_node::{TreeNodeRecursion, TreeNodeVisitor},
+ tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor},
DFSchema, SchemaReference, TableReference,
};
use datafusion_execution::registry::SerializerRegistry;
use datafusion_expr::{
+ expr_rewriter::FunctionRewrite,
logical_plan::{DdlStatement, Statement},
+ simplify::SimplifyInfo,
var_provider::is_system_variables,
Expr, ExprSchemable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
};
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
use datafusion_sql::{
parser::{CopyToSource, CopyToStatement, DFParser},
planner::{object_name_to_table_reference, ContextProvider, ParserOptions,
SqlToRel},
ResolvedTableReference,
};
-use parking_lot::RwLock;
use sqlparser::dialect::dialect_from_str;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use parking_lot::RwLock;
use url::Url;
use uuid::Uuid;
-use crate::physical_expr::PhysicalExpr;
pub use datafusion_execution::config::SessionConfig;
pub use datafusion_execution::TaskContext;
pub use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr_rewriter::FunctionRewrite;
-use datafusion_expr::simplify::SimplifyInfo;
-use datafusion_optimizer::simplify_expressions::ExprSimplifier;
-use datafusion_physical_expr::create_physical_expr;
mod avro;
mod csv;
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index e5e57c0bc8..2927fd01d1 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::{self, Debug, Formatter};
+use std::sync::Arc;
+use std::vec;
+
use arrow::array::{ArrayRef, FixedSizeListArray};
use arrow::datatypes::{
DataType, Field, Fields, Int32Type, IntervalDayTimeType,
IntervalMonthDayNanoType,
@@ -24,6 +30,7 @@ use datafusion::datasource::provider::TableProviderFactory;
use datafusion::datasource::TableProvider;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion::execution::FunctionRegistry;
use datafusion::functions_aggregate::covariance::{covar_pop, covar_samp};
use datafusion::functions_aggregate::expr_fn::first_value;
use datafusion::prelude::*;
@@ -51,16 +58,11 @@ use datafusion_proto::bytes::{
logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec,
};
use datafusion_proto::logical_plan::to_proto::serialize_expr;
-use datafusion_proto::logical_plan::LogicalExtensionCodec;
-use datafusion_proto::logical_plan::{from_proto, DefaultLogicalExtensionCodec};
+use datafusion_proto::logical_plan::{
+ from_proto, DefaultLogicalExtensionCodec, LogicalExtensionCodec,
+};
use datafusion_proto::protobuf;
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt::{self, Debug, Formatter};
-use std::sync::Arc;
-use std::vec;
-use datafusion::execution::FunctionRegistry;
use prost::Message;
#[cfg(feature = "json")]
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index f61c9cda63..d09317271d 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -462,7 +462,21 @@ impl<'a> DFParser<'a> {
pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
let next_token = self.parser.next_token();
match next_token.token {
- Token::Word(Word { value, .. }) => Ok(value),
+ Token::Word(Word { value, .. }) => {
+ let mut parts = vec![value];
+ while self.parser.consume_token(&Token::Period) {
+ let next_token = self.parser.next_token();
+ if let Token::Word(Word { value, .. }) = next_token.token {
+ parts.push(value);
+ } else {
+ // Unquoted namespaced keys have to conform to the
syntax
+ // "<WORD>[\.<WORD>]*". If we have a key that breaks
this
+ // pattern, error out:
+ return self.parser.expected("key name", next_token);
+ }
+ }
+ Ok(parts.join("."))
+ }
Token::SingleQuotedString(s) => Ok(s),
Token::DoubleQuotedString(s) => Ok(s),
Token::EscapedStringLiteral(s) => Ok(s),
@@ -712,15 +726,15 @@ impl<'a> DFParser<'a> {
} else {
self.parser.expect_keyword(Keyword::HEADER)?;
self.parser.expect_keyword(Keyword::ROW)?;
- return parser_err!("WITH HEADER ROW clause is no
longer in use. Please use the OPTIONS clause with 'format.has_header' set
appropriately, e.g., OPTIONS ('format.has_header' 'true')");
+ return parser_err!("WITH HEADER ROW clause is no
longer in use. Please use the OPTIONS clause with 'format.has_header' set
appropriately, e.g., OPTIONS (format.has_header true)");
}
}
Keyword::DELIMITER => {
- return parser_err!("DELIMITER clause is no longer in
use. Please use the OPTIONS clause with 'format.delimiter' set appropriately,
e.g., OPTIONS ('format.delimiter' ',')");
+ return parser_err!("DELIMITER clause is no longer in
use. Please use the OPTIONS clause with 'format.delimiter' set appropriately,
e.g., OPTIONS (format.delimiter ',')");
}
Keyword::COMPRESSION => {
self.parser.expect_keyword(Keyword::TYPE)?;
- return parser_err!("COMPRESSION TYPE clause is no
longer in use. Please use the OPTIONS clause with 'format.compression' set
appropriately, e.g., OPTIONS ('format.compression' 'gzip')");
+ return parser_err!("COMPRESSION TYPE clause is no
longer in use. Please use the OPTIONS clause with 'format.compression' set
appropriately, e.g., OPTIONS (format.compression gzip)");
}
Keyword::PARTITIONED => {
self.parser.expect_keyword(Keyword::BY)?;
@@ -933,7 +947,7 @@ mod tests {
expect_parse_ok(sql, expected)?;
// positive case with delimiter
- let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION
'foo.csv' OPTIONS ('format.delimiter' '|')";
+ let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION
'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt
b/datafusion/sqllogictest/test_files/create_external_table.slt
index fca177bb61..607c909fd6 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -190,8 +190,8 @@ LOCATION
'test_files/scratch/create_external_table/manual_partitioning/';
statement error DataFusion error: Error during planning: Option
format.delimiter is specified multiple times
CREATE EXTERNAL TABLE t STORED AS CSV OPTIONS (
'format.delimiter' '*',
- 'format.has_header' 'true',
- 'format.delimiter' '|')
+ 'format.has_header' 'true',
+ 'format.delimiter' '|')
LOCATION 'foo.csv';
# If a config does not belong to any namespace, we assume it is a 'format'
option and apply the 'format' prefix for backwards compatibility.
@@ -201,7 +201,20 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
'format.delimiter' '|',
- 'has_header' 'false');
\ No newline at end of file
+ 'has_header' 'false');
+
+# Verify that we do not need quotations for simple namespaced keys.
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+ r_regionkey BIGINT,
+ r_name VARCHAR,
+ r_comment VARCHAR,
+ r_rev VARCHAR,
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
+OPTIONS (
+ format.delimiter '|',
+ has_header false,
+ compression gzip);
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 111d247730..75bcbc198b 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -121,4 +121,4 @@ CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
-) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS
('format.delimiter' '|');
\ No newline at end of file
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS
('format.delimiter' '|');
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]