adriangb commented on code in PR #15769:
URL: https://github.com/apache/datafusion/pull/15769#discussion_r2051060245


##########
datafusion/datasource-parquet/src/file_format.rs:
##########
@@ -453,29 +451,8 @@ impl FileFormat for ParquetFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn supports_filters_pushdown(
-        &self,
-        file_schema: &Schema,
-        table_schema: &Schema,
-        filters: &[&Expr],
-    ) -> Result<FilePushdownSupport> {
-        if !self.options().global.pushdown_filters {
-            return Ok(FilePushdownSupport::NoSupport);
-        }
-
-        let all_supported = filters.iter().all(|filter| {
-            can_expr_be_pushed_down_with_schemas(filter, file_schema, 
table_schema)
-        });
-
-        Ok(if all_supported {
-            FilePushdownSupport::Supported
-        } else {
-            FilePushdownSupport::NotSupportedForFilter
-        })
-    }
-
     fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ParquetSource::default())
+        todo!() // need access of file schema?

Review Comment:
   This poses an issue.
   
   TLDR is that in order to know if it can absorb a filter as exact 
`ParquetSource` needs to know not only the filter but also the file schema it's 
applied to (in particular to get the type of the columns since it can't handle 
structs).



##########
datafusion/datasource-parquet/src/source.rs:
##########
@@ -587,4 +604,40 @@ impl FileSource for ParquetSource {
             }
         }
     }
+
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        config: &datafusion_common::config::ConfigOptions,
+    ) -> datafusion_common::Result<FilterPushdownResult<Arc<dyn FileSource>>> {
+        let mut conf = self.clone();
+        let filters = fd.filters.clone();
+        let predicate = match conf.predicate {
+            Some(predicate) => 
conjunction(std::iter::once(predicate).chain(filters.into_iter())),
+            None => conjunction(filters.into_iter()),
+        };
+        conf.predicate = Some(predicate);
+        let remaining_description = if 
config.execution.parquet.pushdown_filters {
+            let mut remaining_filters = fd.filters.clone();
+            for filter in &remaining_filters {
+                if can_expr_be_pushed_down_with_schemas(filter, 
&conf.file_schema) {

Review Comment:
   This is where we need the file schema



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to