This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0088c28254 Remove FORMAT <..> backwards compatibility options from
COPY (#9985)
0088c28254 is described below
commit 0088c28254ea7ab1fae66b5f1bfcc66e0c9aa7a7
Author: Kunal Kundu <[email protected]>
AuthorDate: Tue Apr 9 02:01:41 2024 +0530
Remove FORMAT <..> backwards compatibility options from COPY (#9985)
* Revert "Add test for reading back file created with FORMAT options
(#9753)"
This reverts commit b50f3aad043da9de613f422f20f7aa916ce55776.
* Revert "support format in options of COPY command (#9744)"
This reverts commit 40fb1b859be4dd399922c498d49b9b847874af2b.
* update docs and example to remove old syntax
---
datafusion/sql/src/parser.rs | 4 +-
datafusion/sql/src/statement.rs | 12 ++----
datafusion/sql/tests/sql_integration.rs | 12 ------
datafusion/sqllogictest/test_files/copy.slt | 59 -----------------------------
docs/source/user-guide/sql/dml.md | 3 +-
5 files changed, 7 insertions(+), 83 deletions(-)
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 67fa1325ee..5a999ab21d 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -87,11 +87,11 @@ impl fmt::Display for ExplainStatement {
///
/// ```sql
/// COPY lineitem TO 'lineitem'
-/// (format parquet,
+/// STORED AS PARQUET (
/// partitions 16,
/// row_group_limit_rows 100000,
/// row_group_limit_bytes 200000
-/// )
+/// )
///
/// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
/// ```
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 6b89f89aac..1bb024733c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -850,7 +850,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
return plan_err!("Unsupported Value in COPY statement {}",
value);
}
};
- if !(key.contains('.') || key == "format") {
+ if !(&key.contains('.')) {
// If config does not belong to any namespace, assume it is
// a format option and apply the format prefix for backwards
// compatibility.
@@ -866,16 +866,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
FileType::from_str(&file_type).map_err(|_| {
DataFusionError::Configuration(format!("Unknown FileType {}",
file_type))
})?
- } else if let Some(format) = options.remove("format") {
- // try to infer file format from the "format" key in options
- FileType::from_str(&format)
- .map_err(|e| DataFusionError::Configuration(format!("{}", e)))?
} else {
let e = || {
DataFusionError::Configuration(
- "Format not explicitly set and unable to get file
extension! Use STORED AS to define file format."
- .to_string(),
- )
+ "Format not explicitly set and unable to get file extension!
Use STORED AS to define file format."
+ .to_string(),
+ )
};
// try to infer file format from file extension
let extension: &str = &Path::new(&statement.target)
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index f2f188105f..e923a15372 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -444,18 +444,6 @@ CopyTo: format=csv output_url=output.csv options: ()
quick_test(sql, plan);
}
-#[test]
-fn plan_copy_stored_as_priority() {
- let sql = "COPY (select * from (values (1))) to 'output/' STORED AS CSV
OPTIONS (format json)";
- let plan = r#"
-CopyTo: format=csv output_url=output/ options: (format json)
- Projection: column1
- Values: (Int64(1))
- "#
- .trim();
- quick_test(sql, plan);
-}
-
#[test]
fn plan_insert() {
let sql =
diff --git a/datafusion/sqllogictest/test_files/copy.slt
b/datafusion/sqllogictest/test_files/copy.slt
index 95b6d29db4..75f1ccb07a 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -514,65 +514,6 @@ OPTIONS (
);
-# Format Options Support with format in OPTIONS
-#
-# i.e. COPY { table_name | query } TO 'file_name' OPTIONS (format
<format-name>, ...)
-
-# Ensure that the format is set in the OPTIONS, not extension
-query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy/foo.dat'
-OPTIONS (format parquet);
-----
-1
-
-statement ok
-CREATE EXTERNAL TABLE foo_dat STORED AS PARQUET LOCATION
'test_files/scratch/copy/foo.dat';
-
-query I
-select * from foo_dat;
-----
-1
-
-statement ok
-DROP TABLE foo_dat;
-
-
-query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy'
-OPTIONS (format parquet);
-----
-1
-
-query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy/'
-OPTIONS (format parquet, compression 'zstd(10)');
-----
-1
-
-query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy/'
-OPTIONS (format json, compression gzip);
-----
-1
-
-query I
-COPY (select * from (values (1))) to 'test_files/scratch/copy/'
-OPTIONS (
- format csv,
- has_header false,
- compression xz,
- datetime_format '%FT%H:%M:%S.%9f',
- delimiter ';',
- null_value 'NULLVAL'
-);
-----
-1
-
-query error DataFusion error: Invalid or Unsupported Configuration: This
feature is not implemented: Unknown FileType: NOTVALIDFORMAT
-COPY (select * from (values (1))) to 'test_files/scratch/copy/'
-OPTIONS (format notvalidformat, compression 'zstd(5)');
-
-
# Error cases:
# Copy from table with options
diff --git a/docs/source/user-guide/sql/dml.md
b/docs/source/user-guide/sql/dml.md
index 666e86b460..42e0c8054c 100644
--- a/docs/source/user-guide/sql/dml.md
+++ b/docs/source/user-guide/sql/dml.md
@@ -44,8 +44,7 @@ separate hive-style directories.
The output format is determined by the first match of the following rules:
1. Value of `STORED AS`
-2. Value of the `OPTION (FORMAT ..)`
-3. Filename extension (e.g. `foo.parquet` implies `PARQUET` format)
+2. Filename extension (e.g. `foo.parquet` implies `PARQUET` format)
For a detailed list of valid OPTIONS, see [Write Options](write_options).