This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 14972e6ae4 Fix COPY TO failing on passing format options through CLI
(#9709)
14972e6ae4 is described below
commit 14972e6ae4be799450d1fbb81073fa0e1cbe57bc
Author: Kunal Kundu <[email protected]>
AuthorDate: Thu Mar 21 05:24:04 2024 +0530
Fix COPY TO failing on passing format options through CLI (#9709)
* Fix COPY TO failing on passing format options through CLI
* fix clippy lint error
---
datafusion-cli/src/exec.rs | 20 ++++++++++++++++++--
datafusion/common/src/file_options/file_type.rs | 14 ++++++++++++++
2 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index ea765ee8ec..4e374a4c00 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -40,6 +40,7 @@ use datafusion::prelude::SessionContext;
use datafusion::sql::parser::{DFParser, Statement};
use datafusion::sql::sqlparser::dialect::dialect_from_str;
+use datafusion_common::FileType;
use rustyline::error::ReadlineError;
use rustyline::Editor;
use tokio::signal;
@@ -257,15 +258,23 @@ async fn create_plan(
// datafusion-cli specific options before passing through to datafusion.
Otherwise, datafusion
// will raise Configuration errors.
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
- register_object_store_and_config_extensions(ctx, &cmd.location,
&cmd.options)
- .await?;
+ register_object_store_and_config_extensions(
+ ctx,
+ &cmd.location,
+ &cmd.options,
+ None,
+ )
+ .await?;
}
if let LogicalPlan::Copy(copy_to) = &mut plan {
+ let format: FileType = (©_to.format_options).into();
+
register_object_store_and_config_extensions(
ctx,
©_to.output_url,
©_to.options,
+ Some(format),
)
.await?;
}
@@ -303,6 +312,7 @@ pub(crate) async fn
register_object_store_and_config_extensions(
ctx: &SessionContext,
location: &String,
options: &HashMap<String, String>,
+ format: Option<FileType>,
) -> Result<()> {
// Parse the location URL to extract the scheme and other components
let table_path = ListingTableUrl::parse(location)?;
@@ -318,6 +328,9 @@ pub(crate) async fn
register_object_store_and_config_extensions(
// Clone and modify the default table options based on the provided options
let mut table_options = ctx.state().default_table_options().clone();
+ if let Some(format) = format {
+ table_options.set_file_format(format);
+ }
table_options.alter_with_string_hash_map(options)?;
// Retrieve the appropriate object store based on the scheme, URL, and
modified table options
@@ -347,6 +360,7 @@ mod tests {
&ctx,
&cmd.location,
&cmd.options,
+ None,
)
.await?;
} else {
@@ -367,10 +381,12 @@ mod tests {
let plan = ctx.state().create_logical_plan(sql).await?;
if let LogicalPlan::Copy(cmd) = &plan {
+ let format: FileType = (&cmd.format_options).into();
register_object_store_and_config_extensions(
&ctx,
&cmd.output_url,
&cmd.options,
+ Some(format),
)
.await?;
} else {
diff --git a/datafusion/common/src/file_options/file_type.rs
b/datafusion/common/src/file_options/file_type.rs
index 812cb02a5f..fc0bb74456 100644
--- a/datafusion/common/src/file_options/file_type.rs
+++ b/datafusion/common/src/file_options/file_type.rs
@@ -20,6 +20,7 @@
use std::fmt::{self, Display};
use std::str::FromStr;
+use crate::config::FormatOptions;
use crate::error::{DataFusionError, Result};
/// The default file extension of arrow files
@@ -55,6 +56,19 @@ pub enum FileType {
JSON,
}
+impl From<&FormatOptions> for FileType {
+ fn from(value: &FormatOptions) -> Self {
+ match value {
+ FormatOptions::CSV(_) => FileType::CSV,
+ FormatOptions::JSON(_) => FileType::JSON,
+ #[cfg(feature = "parquet")]
+ FormatOptions::PARQUET(_) => FileType::PARQUET,
+ FormatOptions::AVRO => FileType::AVRO,
+ FormatOptions::ARROW => FileType::ARROW,
+ }
+ }
+}
+
impl GetExt for FileType {
fn get_ext(&self) -> String {
match self {