alamb commented on code in PR #17703:
URL: https://github.com/apache/datafusion/pull/17703#discussion_r2368556894


##########
datafusion/physical-plan/src/filter.rs:
##########
@@ -716,7 +716,7 @@ impl RecordBatchStream for FilterExecStream {
 }
 
 /// Return the equals Column-Pairs and Non-equals Column-Pairs
-pub fn collect_columns_from_predicate(
+fn collect_columns_from_predicate(

Review Comment:
   maybe you can leave a `#deprecated` version of the old name (that just 
calles the new name) as described in 
https://datafusion.apache.org/contributor-guide/api-health.html#deprecation-guidelines



##########
datafusion/datasource/src/file_scan_config.rs:
##########
@@ -764,24 +751,24 @@ impl FileScanConfig {
         eq_properties: &mut EquivalenceProperties,
         schema: &Schema,
     ) -> Result<()> {
-        macro_rules! ignore_dangling_col {
-            ($col:expr) => {
-                if let Some(col) = $col.as_any().downcast_ref::<Column>() {
-                    if schema.index_of(col.name()).is_err() {
-                        continue;
+        // Gather valid equality pairs from the filter expression
+        let equal_pairs = 
split_conjunction(&filter).into_iter().filter_map(|expr| {
+            // Ignore any binary expressions that reference non-existent 
columns in the current schema
+            // (e.g. due to unnecessary projections being removed)
+            reassign_expr_columns(Arc::clone(expr), schema)
+                .ok()

Review Comment:
   is this `ok` actually required? Does anything fail if we remove it?
   
   It seems suspicious to me (though I see it is how the old code works too)



##########
datafusion/physical-expr/src/utils/mod.rs:
##########
@@ -238,22 +238,26 @@ pub fn collect_columns(expr: &Arc<dyn PhysicalExpr>) -> 
HashSet<Column> {
     columns
 }
 
-/// Re-assign column indices referenced in predicate according to given schema.
-/// This may be helpful when dealing with projections.
-pub fn reassign_predicate_columns(
-    pred: Arc<dyn PhysicalExpr>,
+/// Re-assign indices of [`Column`]s within the given [`PhysicalExpr`] 
according to
+/// the provided [`Schema`].
+///
+/// This can be useful when attempting to map an expression onto a different 
schema.
+///
+/// # Errors
+///
+/// This function will return an error if any column in the expression cannot 
be found
+/// in the provided schema.
+pub fn reassign_expr_columns(
+    expr: Arc<dyn PhysicalExpr>,
     schema: &Schema,
-    ignore_not_found: bool,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    pred.transform_down(|expr| {
-        let expr_any = expr.as_any();
-
-        if let Some(column) = expr_any.downcast_ref::<Column>() {
+    expr.transform_down(|expr| {
+        if let Some(column) = expr.as_any().downcast_ref::<Column>() {
             let index = match schema.index_of(column.name()) {
                 Ok(idx) => idx,
-                Err(_) if ignore_not_found => usize::MAX,
                 Err(e) => return Err(e.into()),
             };

Review Comment:
   You could further simplify this to 
   
   ```suggestion
   let index = match schema.index_of(column.name())?;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to