This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e8ab555b76 Minor: Add ParquetExec::table_parquet_options accessor
(#9909)
e8ab555b76 is described below
commit e8ab555b76368ce61a9720d79fdf2cd2615de5e3
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Apr 2 11:48:10 2024 -0400
Minor: Add ParquetExec::table_parquet_options accessor (#9909)
---
datafusion/common/src/config.rs | 2 ++
.../src/datasource/physical_plan/parquet/mod.rs | 32 +++++++++++++---------
2 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 968d8215ca..30ab9a339b 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1362,6 +1362,8 @@ impl TableOptions {
}
}
+/// Options that control how Parquet files are read, including global options
+/// that apply to all columns and optional column-specific overrides
#[derive(Clone, Default, Debug, PartialEq)]
pub struct TableParquetOptions {
/// Global Parquet options that propagates to all columns.
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 377dad5cee..c4a888f546 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -89,9 +89,10 @@ pub struct ParquetExec {
metadata_size_hint: Option<usize>,
/// Optional user defined parquet file reader factory
parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
+ /// Cached plan properties such as equivalence properties, ordering,
partitioning, etc.
cache: PlanProperties,
- /// Parquet Options
- parquet_options: TableParquetOptions,
+ /// Options for reading Parquet files
+ table_parquet_options: TableParquetOptions,
}
impl ParquetExec {
@@ -100,7 +101,7 @@ impl ParquetExec {
base_config: FileScanConfig,
predicate: Option<Arc<dyn PhysicalExpr>>,
metadata_size_hint: Option<usize>,
- parquet_options: TableParquetOptions,
+ table_parquet_options: TableParquetOptions,
) -> Self {
debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate:
{:?}, limit: {:?}",
base_config.file_groups, base_config.projection, predicate,
base_config.limit);
@@ -155,15 +156,20 @@ impl ParquetExec {
metadata_size_hint,
parquet_file_reader_factory: None,
cache,
- parquet_options,
+ table_parquet_options,
}
}
- /// Ref to the base configs
+ /// [`FileScanConfig`] that controls this scan (such as which files to
read)
pub fn base_config(&self) -> &FileScanConfig {
&self.base_config
}
+ /// Options passed to the parquet reader for this scan
+ pub fn table_parquet_options(&self) -> &TableParquetOptions {
+ &self.table_parquet_options
+ }
+
/// Optional predicate.
pub fn predicate(&self) -> Option<&Arc<dyn PhysicalExpr>> {
self.predicate.as_ref()
@@ -197,13 +203,13 @@ impl ParquetExec {
///
/// [`Expr`]: datafusion_expr::Expr
pub fn with_pushdown_filters(mut self, pushdown_filters: bool) -> Self {
- self.parquet_options.global.pushdown_filters = pushdown_filters;
+ self.table_parquet_options.global.pushdown_filters = pushdown_filters;
self
}
/// Return the value described in [`Self::with_pushdown_filters`]
fn pushdown_filters(&self) -> bool {
- self.parquet_options.global.pushdown_filters
+ self.table_parquet_options.global.pushdown_filters
}
/// If true, the `RowFilter` made by `pushdown_filters` may try to
@@ -213,13 +219,13 @@ impl ParquetExec {
///
/// [`Expr`]: datafusion_expr::Expr
pub fn with_reorder_filters(mut self, reorder_filters: bool) -> Self {
- self.parquet_options.global.reorder_filters = reorder_filters;
+ self.table_parquet_options.global.reorder_filters = reorder_filters;
self
}
/// Return the value described in [`Self::with_reorder_filters`]
fn reorder_filters(&self) -> bool {
- self.parquet_options.global.reorder_filters
+ self.table_parquet_options.global.reorder_filters
}
/// If enabled, the reader will read the page index
@@ -227,24 +233,24 @@ impl ParquetExec {
/// via `RowSelector` and `RowFilter` by
/// eliminating unnecessary IO and decoding
pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
- self.parquet_options.global.enable_page_index = enable_page_index;
+ self.table_parquet_options.global.enable_page_index =
enable_page_index;
self
}
/// Return the value described in [`Self::with_enable_page_index`]
fn enable_page_index(&self) -> bool {
- self.parquet_options.global.enable_page_index
+ self.table_parquet_options.global.enable_page_index
}
/// If enabled, the reader will read by the bloom filter
pub fn with_enable_bloom_filter(mut self, enable_bloom_filter: bool) ->
Self {
- self.parquet_options.global.bloom_filter_enabled = enable_bloom_filter;
+ self.table_parquet_options.global.bloom_filter_enabled =
enable_bloom_filter;
self
}
/// Return the value described in [`Self::with_enable_bloom_filter`]
fn enable_bloom_filter(&self) -> bool {
- self.parquet_options.global.bloom_filter_enabled
+ self.table_parquet_options.global.bloom_filter_enabled
}
fn output_partitioning_helper(file_config: &FileScanConfig) ->
Partitioning {