This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 692ea24357 Update Default Parquet Write Compression (#7692)
692ea24357 is described below
commit 692ea24357d32b1242c476f0ed33498c815ac921
Author: Devin D'Angelo <[email protected]>
AuthorDate: Sat Sep 30 01:22:52 2023 -0400
Update Default Parquet Write Compression (#7692)
* update compression default
* fix tests
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/common/src/config.rs | 2 +-
datafusion/sqllogictest/test_files/information_schema.slt | 2 +-
docs/source/user-guide/configs.md | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b34c64ff88..261c2bf435 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -307,7 +307,7 @@ config_namespace! {
/// lzo, brotli(level), lz4, zstd(level), and lz4_raw.
/// These values are not case sensitive. If NULL, uses
/// default parquet writer setting
- pub compression: Option<String>, default = None
+ pub compression: Option<String>, default = Some("zstd(3)".into())
/// Sets if dictionary encoding is enabled. If NULL, uses
/// default parquet writer setting
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt
b/datafusion/sqllogictest/test_files/information_schema.slt
index f909010216..12aa9089a0 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -156,7 +156,7 @@ datafusion.execution.parquet.bloom_filter_enabled false
datafusion.execution.parquet.bloom_filter_fpp NULL
datafusion.execution.parquet.bloom_filter_ndv NULL
datafusion.execution.parquet.column_index_truncate_length NULL
-datafusion.execution.parquet.compression NULL
+datafusion.execution.parquet.compression zstd(3)
datafusion.execution.parquet.created_by datafusion
datafusion.execution.parquet.data_page_row_count_limit 18446744073709551615
datafusion.execution.parquet.data_pagesize_limit 1048576
diff --git a/docs/source/user-guide/configs.md
b/docs/source/user-guide/configs.md
index 7fe229b4d3..638ac5a36b 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -58,7 +58,7 @@ Environment variables are read during `SessionConfig`
initialisation so they mus
| datafusion.execution.parquet.data_pagesize_limit | 1048576
| Sets best effort maximum size of data page in bytes
[...]
| datafusion.execution.parquet.write_batch_size | 1024
| Sets write_batch_size in bytes
[...]
| datafusion.execution.parquet.writer_version | 1.0
| Sets parquet writer version valid values are "1.0" and "2.0"
[...]
-| datafusion.execution.parquet.compression | NULL
| Sets default parquet compression codec Valid values are:
uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and
lz4_raw. These values are not case sensitive. If NULL, uses default parquet
writer setting
[...]
+| datafusion.execution.parquet.compression | zstd(3)
| Sets default parquet compression codec Valid values are:
uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and
lz4_raw. These values are not case sensitive. If NULL, uses default parquet
writer setting
[...]
| datafusion.execution.parquet.dictionary_enabled | NULL
| Sets if dictionary encoding is enabled. If NULL, uses default
parquet writer setting
[...]
| datafusion.execution.parquet.dictionary_page_size_limit | 1048576
| Sets best effort maximum dictionary page size, in bytes
[...]
| datafusion.execution.parquet.statistics_enabled | NULL
| Sets if statistics are enabled for any column Valid values are:
"none", "chunk", and "page" These values are not case sensitive. If NULL, uses
default parquet writer setting
[...]