alamb commented on a change in pull request #1163:
URL: https://github.com/apache/arrow-datafusion/pull/1163#discussion_r749708025



##########
File path: datafusion/src/physical_optimizer/pruning.rs
##########
@@ -1374,36 +1374,24 @@ mod tests {
 
     #[test]
     fn prune_bool_column_eq_true() {
-        let (schema, statistics, _, _) = bool_setup();
+        let (schema, statistics, expected_true, _) = bool_setup();
 
         // b1 = true
         let expr = col("b1").eq(lit(true));
         let p = PruningPredicate::try_new(&expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap_err();
-        assert!(
-            result.to_string().contains(
-                "Data type Boolean not supported for scalar operation 'lt_eq' 
on dyn array"
-            ),
-            "{}",
-            result
-        )
+        let result = p.prune(&statistics).unwrap();
+        assert_eq!(result, expected_true);

Review comment:
       pruning works for boolean columns now

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -49,6 +53,62 @@ use super::coercion::{
     eq_coercion, like_coercion, numerical_coercion, order_coercion, 
string_coercion,
 };
 
+// Simple (low performance) kernels until optimized kernels are added to arrow
+// TODO: file arrow-rs ticket to track this feature
+
+fn is_distinct_from_bool(
+    left: &BooleanArray,
+    right: &BooleanArray,
+) -> Result<BooleanArray> {
+    // Different from `eq` beacause not eq_bool because null == null
+    Ok(left
+        .iter()
+        .zip(right.iter())
+        .map(|(left, right)| Some(left != right))
+        .collect())
+}
+
+fn is_not_distinct_from_bool(
+    left: &BooleanArray,
+    right: &BooleanArray,
+) -> Result<BooleanArray> {
+    Ok(left
+        .iter()
+        .zip(right.iter())
+        .map(|(left, right)| Some(left == right))
+        .collect())
+}
+
+#[allow(clippy::bool_comparison)]

Review comment:
       Clippy said `left < right` was clearer if expressed as `!left && right` 
which I happen to disagree with in this case, so I ignored the clippy lint

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -49,6 +53,62 @@ use super::coercion::{
     eq_coercion, like_coercion, numerical_coercion, order_coercion, 
string_coercion,
 };
 
+// Simple (low performance) kernels until optimized kernels are added to arrow
+// TODO: file arrow-rs ticket to track this feature

Review comment:
       Question to reviewers: Are these useful kernels to have in arrow? Or are 
they so special purpose we should leave them just in DataFusion?

##########
File path: datafusion/src/physical_plan/expressions/binary.rs
##########
@@ -276,6 +377,7 @@ macro_rules! binary_array_op_scalar {
             DataType::Date64 => {
                 compute_op_scalar!($LEFT, $RIGHT, $OP, Date64Array)
             }
+            DataType::Boolean => compute_bool_op_scalar!($LEFT, $RIGHT, $OP, 
BooleanArray),

Review comment:
       adding this line and the one below it adds all the new support, which is 
kind of cool! It is terrifying how many functions end up being called :)

##########
File path: datafusion/src/physical_plan/file_format/parquet.rs
##########
@@ -714,12 +714,14 @@ mod tests {
     }
 
     #[test]
-    fn row_group_predicate_builder_unsupported_type() -> Result<()> {
+    fn row_group_predicate_builder_null_expr() -> Result<()> {
         use crate::logical_plan::{col, lit};
-        // test row group predicate with unsupported statistics type (boolean)
-        // where a null array is generated for some statistics columns
-        // int > 1 and bool = true => c1_max > 1 and null
-        let expr = col("c1").gt(lit(15)).and(col("c2").eq(lit(true)));
+        // test row group predicate with an unknown (Null) expr

Review comment:
       now bool stats don't result in null columns, so I needed to use a 
constant to get the same effect




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to