This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c095fee4a1 fix: pass `quote` parameter to CSV writer (#10671)
c095fee4a1 is described below
commit c095fee4a1675f37385656c110ae7aa553fb1a55
Author: Artem Medvedev <[email protected]>
AuthorDate: Sun May 26 12:17:13 2024 +0200
fix: pass `quote` parameter to CSV writer (#10671)
Closes #10670
---
datafusion/common/src/file_options/csv_writer.rs | 1 +
datafusion/sqllogictest/test_files/csv_files.slt | 41 ++++++++++++++++++++++++
2 files changed, 42 insertions(+)
diff --git a/datafusion/common/src/file_options/csv_writer.rs
b/datafusion/common/src/file_options/csv_writer.rs
index 2904ea0f8f..4f948a29ad 100644
--- a/datafusion/common/src/file_options/csv_writer.rs
+++ b/datafusion/common/src/file_options/csv_writer.rs
@@ -51,6 +51,7 @@ impl TryFrom<&CsvOptions> for CsvWriterOptions {
fn try_from(value: &CsvOptions) -> Result<Self> {
let mut builder = WriterBuilder::default()
.with_header(value.has_header.unwrap_or(false))
+ .with_quote(value.quote)
.with_delimiter(value.delimiter);
if let Some(v) = &value.date_format {
diff --git a/datafusion/sqllogictest/test_files/csv_files.slt
b/datafusion/sqllogictest/test_files/csv_files.slt
index 50477e1dab..f581fa9abc 100644
--- a/datafusion/sqllogictest/test_files/csv_files.slt
+++ b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -161,3 +161,44 @@ physical_plan
01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST]
02)--SortExec: expr=[int_col@0 ASC NULLS LAST], preserve_partitioning=[true]
03)----CsvExec: file_groups={2 groups:
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/1.csv],
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/csv_files/csv_partitions/2.csv]]},
projection=[int_col, string_col, bigint_col, partition_col], has_header=false
+
+
+# ensure that correct quote character is used when writing to csv
+statement ok
+CREATE TABLE table_with_necessary_quoting (
+ int_col INT,
+ string_col TEXT,
+) AS VALUES
+(1, 'e|e|e'),
+(2, 'f|f|f'),
+(3, 'g|g|g'),
+(4, 'h|h|h');
+
+# quote is required because `|` is delimiter and part of the data
+query IT
+COPY table_with_necessary_quoting TO
'test_files/scratch/csv_files/table_with_necessary_quoting.csv'
+STORED AS csv
+OPTIONS ('format.quote' '~',
+ 'format.delimiter' '|',
+ 'format.has_header' 'true');
+----
+4
+
+# read the stored csv file with quote character
+statement ok
+CREATE EXTERNAL TABLE stored_table_with_necessary_quoting (
+c1 VARCHAR,
+c2 VARCHAR
+) STORED AS CSV
+LOCATION 'test_files/scratch/csv_files/table_with_necessary_quoting.csv'
+OPTIONS ('format.quote' '~',
+ 'format.delimiter' '|',
+ 'format.has_header' 'true');
+
+query TT
+select * from stored_table_with_necessary_quoting;
+----
+1 e|e|e
+2 f|f|f
+3 g|g|g
+4 h|h|h
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]