alamb commented on code in PR #12943:
URL: https://github.com/apache/datafusion/pull/12943#discussion_r1817818906


##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -628,6 +627,103 @@ fn infer_join_predicates(
         .collect::<Result<Vec<_>>>()
 }
 
+/// Check whether the given expression depends only on the columns in the 
given schema.
+/// Meaning for a certain

Review Comment:
   this sentence appears to end abruptly



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -628,6 +627,103 @@ fn infer_join_predicates(
         .collect::<Result<Vec<_>>>()
 }
 
+/// Check whether the given expression depends only on the columns in the 
given schema.
+/// Meaning for a certain
+fn expr_depends_only_on_schema(schema: &DFSchema, expr: &Expr) -> bool {
+    let mut is_applicable = true;
+    expr.apply(|expr| match expr {
+        Expr::Column(column) => {
+            is_applicable &= schema.has_column(column);
+            if is_applicable {
+                Ok(TreeNodeRecursion::Jump)
+            } else {
+                Ok(TreeNodeRecursion::Stop)
+            }
+        }
+        Expr::Literal(_)
+        | Expr::Alias(_)
+        | Expr::OuterReferenceColumn(_, _)
+        | Expr::ScalarVariable(_, _)
+        | Expr::Not(_)
+        | Expr::IsNotNull(_)
+        | Expr::IsNull(_)
+        | Expr::IsTrue(_)
+        | Expr::IsFalse(_)
+        | Expr::IsUnknown(_)
+        | Expr::IsNotTrue(_)
+        | Expr::IsNotFalse(_)
+        | Expr::IsNotUnknown(_)
+        | Expr::Negative(_)
+        | Expr::Cast(_)
+        | Expr::TryCast(_)
+        | Expr::BinaryExpr(_)
+        | Expr::Between(_)
+        | Expr::Like(_)
+        | Expr::SimilarTo(_)
+        | Expr::InList(_)
+        | Expr::Exists(_)
+        | Expr::InSubquery(_)
+        | Expr::ScalarSubquery(_)
+        | Expr::GroupingSet(_)
+        | Expr::Case(_) => Ok(TreeNodeRecursion::Continue),
+
+        Expr::ScalarFunction(scalar_function) => {
+            match scalar_function.func.signature().volatility {
+                Volatility::Immutable => Ok(TreeNodeRecursion::Continue),
+                // TODO: Stable functions could be `applicable`, but that 
would require access to the context
+                Volatility::Stable | Volatility::Volatile => {
+                    is_applicable = false;
+                    Ok(TreeNodeRecursion::Stop)
+                }
+            }
+        }
+
+        // TODO other expressions are not handled yet:
+        // - AGGREGATE and WINDOW should not end up in filter conditions, 
except maybe in some edge cases
+        // - Can `Wildcard` be considered as a `Literal`?
+        // - ScalarVariable could be `applicable`, but that would require 
access to the context
+        Expr::AggregateFunction { .. }
+        | Expr::WindowFunction { .. }
+        | Expr::Wildcard { .. }
+        | Expr::Unnest { .. }
+        | Expr::Placeholder(_) => {
+            is_applicable = false;
+            Ok(TreeNodeRecursion::Stop)
+        }
+    })
+    .unwrap();
+    is_applicable
+}
+
+fn check_if_expr_depends_only_on_distinct_on_columns(
+    distinct_on: &DistinctOn,
+    expr: &Expr,
+) -> Result<bool> {
+    let distinct_on_cols: HashSet<&Column> = distinct_on
+        .on_expr
+        .iter()
+        .flat_map(|e| e.column_refs())
+        .collect();
+    let distinct_on_input_schema = distinct_on.input.schema();
+    let distinct_on_qualified_fields: Vec<_> = distinct_on_cols

Review Comment:
   there is a lot of copying going on in this code 
(`qualified_field_from_column` etc)
   
   Is there any way to avoid this? I wonder if we could check each reference in 
the filter expression and see if it had any column that wasn't in the distinct 
on clause



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -628,6 +627,103 @@ fn infer_join_predicates(
         .collect::<Result<Vec<_>>>()
 }
 
+/// Check whether the given expression depends only on the columns in the 
given schema.
+/// Meaning for a certain

Review Comment:
   Also this appears to be a copy of 
https://github.com/apache/datafusion/blob/b098893a34f83f1a1df290168377d7622938b3f5/datafusion/core/src/datasource/listing/helpers.rs#L55-L54
   
   Perhaps instead of copy/pasting the code, you could move it to a common 
location (e.g. datafusion-expr)?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to