This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e8ab555b76 Minor: Add ParquetExec::table_parquet_options accessor 
(#9909)
e8ab555b76 is described below

commit e8ab555b76368ce61a9720d79fdf2cd2615de5e3
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Apr 2 11:48:10 2024 -0400

    Minor: Add ParquetExec::table_parquet_options accessor (#9909)
---
 datafusion/common/src/config.rs                    |  2 ++
 .../src/datasource/physical_plan/parquet/mod.rs    | 32 +++++++++++++---------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 968d8215ca..30ab9a339b 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -1362,6 +1362,8 @@ impl TableOptions {
     }
 }
 
+/// Options that control how Parquet files are read, including global options
+/// that apply to all columns and optional column-specific overrides
 #[derive(Clone, Default, Debug, PartialEq)]
 pub struct TableParquetOptions {
     /// Global Parquet options that propagates to all columns.
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs 
b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 377dad5cee..c4a888f546 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -89,9 +89,10 @@ pub struct ParquetExec {
     metadata_size_hint: Option<usize>,
     /// Optional user defined parquet file reader factory
     parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
+    /// Cached plan properties such as equivalence properties, ordering, 
partitioning, etc.
     cache: PlanProperties,
-    /// Parquet Options
-    parquet_options: TableParquetOptions,
+    /// Options for reading Parquet files
+    table_parquet_options: TableParquetOptions,
 }
 
 impl ParquetExec {
@@ -100,7 +101,7 @@ impl ParquetExec {
         base_config: FileScanConfig,
         predicate: Option<Arc<dyn PhysicalExpr>>,
         metadata_size_hint: Option<usize>,
-        parquet_options: TableParquetOptions,
+        table_parquet_options: TableParquetOptions,
     ) -> Self {
         debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: 
{:?}, limit: {:?}",
         base_config.file_groups, base_config.projection, predicate, 
base_config.limit);
@@ -155,15 +156,20 @@ impl ParquetExec {
             metadata_size_hint,
             parquet_file_reader_factory: None,
             cache,
-            parquet_options,
+            table_parquet_options,
         }
     }
 
-    /// Ref to the base configs
+    /// [`FileScanConfig`] that controls this scan (such as which files to 
read)
     pub fn base_config(&self) -> &FileScanConfig {
         &self.base_config
     }
 
+    /// Options passed to the parquet reader for this scan
+    pub fn table_parquet_options(&self) -> &TableParquetOptions {
+        &self.table_parquet_options
+    }
+
     /// Optional predicate.
     pub fn predicate(&self) -> Option<&Arc<dyn PhysicalExpr>> {
         self.predicate.as_ref()
@@ -197,13 +203,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_pushdown_filters(mut self, pushdown_filters: bool) -> Self {
-        self.parquet_options.global.pushdown_filters = pushdown_filters;
+        self.table_parquet_options.global.pushdown_filters = pushdown_filters;
         self
     }
 
     /// Return the value described in [`Self::with_pushdown_filters`]
     fn pushdown_filters(&self) -> bool {
-        self.parquet_options.global.pushdown_filters
+        self.table_parquet_options.global.pushdown_filters
     }
 
     /// If true, the `RowFilter` made by `pushdown_filters` may try to
@@ -213,13 +219,13 @@ impl ParquetExec {
     ///
     /// [`Expr`]: datafusion_expr::Expr
     pub fn with_reorder_filters(mut self, reorder_filters: bool) -> Self {
-        self.parquet_options.global.reorder_filters = reorder_filters;
+        self.table_parquet_options.global.reorder_filters = reorder_filters;
         self
     }
 
     /// Return the value described in [`Self::with_reorder_filters`]
     fn reorder_filters(&self) -> bool {
-        self.parquet_options.global.reorder_filters
+        self.table_parquet_options.global.reorder_filters
     }
 
     /// If enabled, the reader will read the page index
@@ -227,24 +233,24 @@ impl ParquetExec {
     /// via `RowSelector` and `RowFilter` by
     /// eliminating unnecessary IO and decoding
     pub fn with_enable_page_index(mut self, enable_page_index: bool) -> Self {
-        self.parquet_options.global.enable_page_index = enable_page_index;
+        self.table_parquet_options.global.enable_page_index = 
enable_page_index;
         self
     }
 
     /// Return the value described in [`Self::with_enable_page_index`]
     fn enable_page_index(&self) -> bool {
-        self.parquet_options.global.enable_page_index
+        self.table_parquet_options.global.enable_page_index
     }
 
     /// If enabled, the reader will read by the bloom filter
     pub fn with_enable_bloom_filter(mut self, enable_bloom_filter: bool) -> 
Self {
-        self.parquet_options.global.bloom_filter_enabled = enable_bloom_filter;
+        self.table_parquet_options.global.bloom_filter_enabled = 
enable_bloom_filter;
         self
     }
 
     /// Return the value described in [`Self::with_enable_bloom_filter`]
     fn enable_bloom_filter(&self) -> bool {
-        self.parquet_options.global.bloom_filter_enabled
+        self.table_parquet_options.global.bloom_filter_enabled
     }
 
     fn output_partitioning_helper(file_config: &FileScanConfig) -> 
Partitioning {

Reply via email to