Dandandan commented on a change in pull request #561:
URL: https://github.com/apache/arrow-datafusion/pull/561#discussion_r651243733



##########
File path: datafusion/src/physical_optimizer/pruning.rs
##########
@@ -553,12 +553,14 @@ fn build_predicate_expression(
     let corrected_op = expr_builder.correct_operator(op);
     let statistics_expr = match corrected_op {
         Operator::NotEq => {
-            // column != literal => (min, max) = literal => min > literal || 
literal > max
+            // column != literal => (min, max) = literal =>
+            // !(min != literal && max != literal) ==>
+            // min != literal || literal != max
             let min_column_expr = expr_builder.min_column_expr()?;
             let max_column_expr = expr_builder.max_column_expr()?;
             min_column_expr
-                .gt(expr_builder.scalar_expr().clone())
-                .or(expr_builder.scalar_expr().clone().gt(max_column_expr))
+                .not_eq(expr_builder.scalar_expr().clone())
+                .or(expr_builder.scalar_expr().clone().not_eq(max_column_expr))

Review comment:
       No - did some extra check - this seems the right thing.

##########
File path: datafusion/src/physical_optimizer/pruning.rs
##########
@@ -1190,6 +1192,34 @@ mod tests {
         assert_eq!(result, expected);
     }
 
+    #[test]
+    fn prune_not_eq_data() {
+        let schema = Arc::new(Schema::new(vec![Field::new("s1", 
DataType::Utf8, true)]));
+
+        // Prune using s2 != 'M'
+        let expr = col("s1").not_eq(lit("M"));
+
+        let statistics = TestStatistics::new().with(
+            "s1",
+            ContainerStats::new_utf8(
+                vec![Some("A"), Some("A"), Some("N"), Some("M"), None, 
Some("A")], // min

Review comment:
       This will also do the right thing 👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to