This is an automated email from the ASF dual-hosted git repository.

findepi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new fdc54b7a98 Fix predicate simplification for incompatible types in 
push_down_filter (#17521)
fdc54b7a98 is described below

commit fdc54b7a9891c4ebe9722778569354fb4ac34b00
Author: Adrian Garcia Badaracco <1755071+adria...@users.noreply.github.com>
AuthorDate: Fri Sep 12 08:14:59 2025 -0500

    Fix predicate simplification for incompatible types in push_down_filter 
(#17521)
    
    * Fix predicate simplification for incompatible types in push_down_filter
    
    Handle the case where scalar values with incompatible types (e.g.,
    TimestampMicrosecond vs Time64Nanosecond) cannot be compared during
    predicate simplification. The fix catches the comparison error in
    find_most_restrictive_predicate and returns None to indicate we can't
    simplify when types are incompatible, preventing the "Uncomparable values"
    error.
    
    Fixes #17512
    
    🤖 Generated with [Claude Code](https://claude.ai/code)
    
    Co-Authored-By: Claude <nore...@anthropic.com>
    
    * minimize diff
    
    * use proposed fix, add test
    
    * revert other changes
    
    ---------
    
    Co-authored-by: Claude <nore...@anthropic.com>
---
 .../simplify_expressions/simplify_predicates.rs    | 97 +++++++++++++++++++++-
 .../sqllogictest/test_files/push_down_filter.slt   | 25 ++++++
 2 files changed, 119 insertions(+), 3 deletions(-)

diff --git 
a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs 
b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
index 9d9e840636..131404e607 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
@@ -26,7 +26,7 @@
 //! encompasses the former, resulting in fewer checks during query execution.
 
 use datafusion_common::{Column, Result, ScalarValue};
-use datafusion_expr::{BinaryExpr, Cast, Expr, Operator};
+use datafusion_expr::{BinaryExpr, Expr, Operator};
 use std::collections::BTreeMap;
 
 /// Simplifies a list of predicates by removing redundancies.
@@ -239,8 +239,99 @@ fn find_most_restrictive_predicate(
 fn extract_column_from_expr(expr: &Expr) -> Option<Column> {
     match expr {
         Expr::Column(col) => Some(col.clone()),
-        // Handle cases where the column might be wrapped in a cast or other 
operation
-        Expr::Cast(Cast { expr, .. }) => extract_column_from_expr(expr),
         _ => None,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::DataType;
+    use datafusion_expr::{cast, col, lit};
+
+    #[test]
+    fn test_simplify_predicates_with_cast() {
+        // Test that predicates on cast expressions are not grouped with 
predicates on the raw column
+        // a < 5 AND CAST(a AS varchar) < 'abc' AND a < 6
+        // Should simplify to:
+        // a < 5 AND CAST(a AS varchar) < 'abc'
+
+        let predicates = vec![
+            col("a").lt(lit(5i32)),
+            cast(col("a"), DataType::Utf8).lt(lit("abc")),
+            col("a").lt(lit(6i32)),
+        ];
+
+        let result = simplify_predicates(predicates).unwrap();
+
+        // Should have 2 predicates: a < 5 and CAST(a AS varchar) < 'abc'
+        assert_eq!(result.len(), 2);
+
+        // Check that the cast predicate is preserved
+        let has_cast_predicate = result.iter().any(|p| {
+            matches!(p, Expr::BinaryExpr(BinaryExpr { 
+                left, 
+                op: Operator::Lt, 
+                right 
+            }) if matches!(left.as_ref(), Expr::Cast(_)) && right == 
&Box::new(lit("abc")))
+        });
+        assert!(has_cast_predicate, "Cast predicate should be preserved");
+
+        // Check that we have the more restrictive column predicate (a < 5)
+        let has_column_predicate = result.iter().any(|p| {
+            matches!(p, Expr::BinaryExpr(BinaryExpr { 
+                left, 
+                op: Operator::Lt, 
+                right 
+            }) if left == &Box::new(col("a")) && right == &Box::new(lit(5i32)))
+        });
+        assert!(has_column_predicate, "Should have a < 5 predicate");
+    }
+
+    #[test]
+    fn test_extract_column_ignores_cast() {
+        // Test that extract_column_from_expr does not extract columns from 
cast expressions
+        let cast_expr = cast(col("a"), DataType::Utf8);
+        assert_eq!(extract_column_from_expr(&cast_expr), None);
+
+        // Test that it still extracts from direct column references
+        let col_expr = col("a");
+        assert_eq!(extract_column_from_expr(&col_expr), 
Some(Column::from("a")));
+    }
+
+    #[test]
+    fn test_simplify_predicates_direct_columns_only() {
+        // Test that only predicates on direct columns are simplified together
+        let predicates = vec![
+            col("a").lt(lit(5i32)),
+            col("a").lt(lit(3i32)),
+            col("b").gt(lit(10i32)),
+            col("b").gt(lit(20i32)),
+        ];
+
+        let result = simplify_predicates(predicates).unwrap();
+
+        // Should have 2 predicates: a < 3 and b > 20 (most restrictive for 
each column)
+        assert_eq!(result.len(), 2);
+
+        // Check for a < 3
+        let has_a_predicate = result.iter().any(|p| {
+            matches!(p, Expr::BinaryExpr(BinaryExpr { 
+                left, 
+                op: Operator::Lt, 
+                right 
+            }) if left == &Box::new(col("a")) && right == &Box::new(lit(3i32)))
+        });
+        assert!(has_a_predicate, "Should have a < 3 predicate");
+
+        // Check for b > 20
+        let has_b_predicate = result.iter().any(|p| {
+            matches!(p, Expr::BinaryExpr(BinaryExpr { 
+                left, 
+                op: Operator::Gt, 
+                right 
+            }) if left == &Box::new(col("b")) && right == 
&Box::new(lit(20i32)))
+        });
+        assert!(has_b_predicate, "Should have b > 20 predicate");
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt 
b/datafusion/sqllogictest/test_files/push_down_filter.slt
index a7e3338df3..3a6faf4654 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -395,3 +395,28 @@ order by t1.k, t2.v;
 ----
 1 1 1
 10000000 10000000 10000000
+
+# Regression test for https://github.com/apache/datafusion/issues/17512
+
+query I
+COPY (
+    SELECT arrow_cast('2025-01-01T00:00:00Z'::timestamptz, 
'Timestamp(Microsecond, Some("UTC"))') AS start_timestamp
+)
+TO 'test_files/scratch/push_down_filter/17512.parquet'
+STORED AS PARQUET;
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE records STORED AS PARQUET LOCATION 
'test_files/scratch/push_down_filter/17512.parquet';
+
+query I
+SELECT 1
+FROM (
+    SELECT start_timestamp
+    FROM records
+    WHERE start_timestamp <= '2025-01-01T00:00:00Z'::timestamptz
+) AS t
+WHERE t.start_timestamp::time < '00:00:01'::time;
+----
+1


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to