(arrow-datafusion) branch main updated: Minor: reduce code duplication in PruningPredicate test (#8441)

alamb Thu, 04 Jan 2024 14:21:00 -0800

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new e5036d0e76 Minor: reduce code duplication in PruningPredicate test 
(#8441)
e5036d0e76 is described below

commit e5036d0e760b637724e8ac59c32924f126311d39
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Jan 4 17:20:46 2024 -0500

    Minor: reduce code duplication in PruningPredicate test (#8441)
---
 datafusion/core/src/physical_optimizer/pruning.rs | 612 +++++++++++-----------
 1 file changed, 310 insertions(+), 302 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/pruning.rs 
b/datafusion/core/src/physical_optimizer/pruning.rs
index 06cfc72824..1b68553a89 100644
--- a/datafusion/core/src/physical_optimizer/pruning.rs
+++ b/datafusion/core/src/physical_optimizer/pruning.rs
@@ -2017,54 +2017,52 @@ mod tests {
             DataType::Decimal128(9, 2),
             true,
         )]));
-        // s1 > 5
-        let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2)));
-        let expr = logical2physical(&expr, &schema);
-        // If the data is written by spark, the physical data type is INT32 in 
the parquet
-        // So we use the INT32 type of statistic.
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_i32(
-                vec![Some(0), Some(4), None, Some(3)], // min
-                vec![Some(5), Some(6), Some(4), None], // max
+
+        prune_with_expr(
+            // s1 > 5
+            col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 9, 2))),
+            &schema,
+            // If the data is written by spark, the physical data type is 
INT32 in the parquet
+            // So we use the INT32 type of statistic.
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_i32(
+                    vec![Some(0), Some(4), None, Some(3)], // min
+                    vec![Some(5), Some(6), Some(4), None], // max
+                ),
             ),
+            &[false, true, false, true],
         );
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, false, true];
-        assert_eq!(result, expected);
 
-        // with cast column to other type
-        let expr = cast(col("s1"), DataType::Decimal128(14, 3))
-            .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3)));
-        let expr = logical2physical(&expr, &schema);
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_i32(
-                vec![Some(0), Some(4), None, Some(3)], // min
-                vec![Some(5), Some(6), Some(4), None], // max
+        prune_with_expr(
+            // with cast column to other type
+            cast(col("s1"), DataType::Decimal128(14, 3))
+                .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))),
+            &schema,
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_i32(
+                    vec![Some(0), Some(4), None, Some(3)], // min
+                    vec![Some(5), Some(6), Some(4), None], // max
+                ),
             ),
+            &[false, true, false, true],
         );
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, false, true];
-        assert_eq!(result, expected);
 
-        // with try cast column to other type
-        let expr = try_cast(col("s1"), DataType::Decimal128(14, 3))
-            .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3)));
-        let expr = logical2physical(&expr, &schema);
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_i32(
-                vec![Some(0), Some(4), None, Some(3)], // min
-                vec![Some(5), Some(6), Some(4), None], // max
+        prune_with_expr(
+            // with try cast column to other type
+            try_cast(col("s1"), DataType::Decimal128(14, 3))
+                .gt(lit(ScalarValue::Decimal128(Some(5000), 14, 3))),
+            &schema,
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_i32(
+                    vec![Some(0), Some(4), None, Some(3)], // min
+                    vec![Some(5), Some(6), Some(4), None], // max
+                ),
             ),
+            &[false, true, false, true],
         );
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, false, true];
-        assert_eq!(result, expected);
 
         // decimal(18,2)
         let schema = Arc::new(Schema::new(vec![Field::new(
@@ -2072,22 +2070,21 @@ mod tests {
             DataType::Decimal128(18, 2),
             true,
         )]));
-        // s1 > 5
-        let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 18, 
2)));
-        let expr = logical2physical(&expr, &schema);
-        // If the data is written by spark, the physical data type is INT64 in 
the parquet
-        // So we use the INT32 type of statistic.
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_i64(
-                vec![Some(0), Some(4), None, Some(3)], // min
-                vec![Some(5), Some(6), Some(4), None], // max
+        prune_with_expr(
+            // s1 > 5
+            col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 18, 2))),
+            &schema,
+            // If the data is written by spark, the physical data type is 
INT64 in the parquet
+            // So we use the INT32 type of statistic.
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_i64(
+                    vec![Some(0), Some(4), None, Some(3)], // min
+                    vec![Some(5), Some(6), Some(4), None], // max
+                ),
             ),
+            &[false, true, false, true],
         );
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, false, true];
-        assert_eq!(result, expected);
 
         // decimal(23,2)
         let schema = Arc::new(Schema::new(vec![Field::new(
@@ -2095,22 +2092,22 @@ mod tests {
             DataType::Decimal128(23, 2),
             true,
         )]));
-        // s1 > 5
-        let expr = col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 23, 
2)));
-        let expr = logical2physical(&expr, &schema);
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_decimal128(
-                vec![Some(0), Some(400), None, Some(300)], // min
-                vec![Some(500), Some(600), Some(400), None], // max
-                23,
-                2,
+
+        prune_with_expr(
+            // s1 > 5
+            col("s1").gt(lit(ScalarValue::Decimal128(Some(500), 23, 2))),
+            &schema,
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_decimal128(
+                    vec![Some(0), Some(400), None, Some(300)], // min
+                    vec![Some(500), Some(600), Some(400), None], // max
+                    23,
+                    2,
+                ),
             ),
+            &[false, true, false, true],
         );
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, false, true];
-        assert_eq!(result, expected);
     }
 
     #[test]
@@ -2120,10 +2117,6 @@ mod tests {
             Field::new("s2", DataType::Int32, true),
         ]));
 
-        // Prune using s2 > 5
-        let expr = col("s2").gt(lit(5));
-        let expr = logical2physical(&expr, &schema);
-
         let statistics = TestStatistics::new().with(
             "s2",
             ContainerStats::new_i32(
@@ -2131,53 +2124,50 @@ mod tests {
                 vec![Some(5), Some(6), None, None],    // max
             ),
         );
+        prune_with_expr(
+            // Prune using s2 > 5
+            col("s2").gt(lit(5)),
+            &schema,
+            &statistics,
+            // s2 [0, 5] ==> no rows should pass
+            // s2 [4, 6] ==> some rows could pass
+            // No stats for s2 ==> some rows could pass
+            // s2 [3, None] (null max) ==> some rows could pass
+            &[false, true, true, true],
+        );
 
-        // s2 [0, 5] ==> no rows should pass
-        // s2 [4, 6] ==> some rows could pass
-        // No stats for s2 ==> some rows could pass
-        // s2 [3, None] (null max) ==> some rows could pass
-
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, true, true];
-        assert_eq!(result, expected);
-
-        // filter with cast
-        let expr = cast(col("s2"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(5))));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![false, true, true, true];
-        assert_eq!(result, expected);
+        prune_with_expr(
+            // filter with cast
+            cast(col("s2"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(5)))),
+            &schema,
+            &statistics,
+            &[false, true, true, true],
+        );
     }
 
     #[test]
     fn prune_not_eq_data() {
         let schema = Arc::new(Schema::new(vec![Field::new("s1", 
DataType::Utf8, true)]));
 
-        // Prune using s2 != 'M'
-        let expr = col("s1").not_eq(lit("M"));
-        let expr = logical2physical(&expr, &schema);
-
-        let statistics = TestStatistics::new().with(
-            "s1",
-            ContainerStats::new_utf8(
-                vec![Some("A"), Some("A"), Some("N"), Some("M"), None, 
Some("A")], // min
-                vec![Some("Z"), Some("L"), Some("Z"), Some("M"), None, None],  
    // max
+        prune_with_expr(
+            // Prune using s2 != 'M'
+            col("s1").not_eq(lit("M")),
+            &schema,
+            &TestStatistics::new().with(
+                "s1",
+                ContainerStats::new_utf8(
+                    vec![Some("A"), Some("A"), Some("N"), Some("M"), None, 
Some("A")], // min
+                    vec![Some("Z"), Some("L"), Some("Z"), Some("M"), None, 
None], // max
+                ),
             ),
+            // s1 [A, Z] ==> might have values that pass predicate
+            // s1 [A, L] ==> all rows pass the predicate
+            // s1 [N, Z] ==> all rows pass the predicate
+            // s1 [M, M] ==> all rows do not pass the predicate
+            // No stats for s2 ==> some rows could pass
+            // s2 [3, None] (null max) ==> some rows could pass
+            &[true, true, true, false, true, true],
         );
-
-        // s1 [A, Z] ==> might have values that pass predicate
-        // s1 [A, L] ==> all rows pass the predicate
-        // s1 [N, Z] ==> all rows pass the predicate
-        // s1 [M, M] ==> all rows do not pass the predicate
-        // No stats for s2 ==> some rows could pass
-        // s2 [3, None] (null max) ==> some rows could pass
-
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        let expected = vec![true, true, true, false, true, true];
-        assert_eq!(result, expected);
     }
 
     /// Creates setup for boolean chunk pruning
@@ -2216,69 +2206,75 @@ mod tests {
     fn prune_bool_const_expr() {
         let (schema, statistics, _, _) = bool_setup();
 
-        // true
-        let expr = lit(true);
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, vec![true, true, true, true, true]);
+        prune_with_expr(
+            // true
+            lit(true),
+            &schema,
+            &statistics,
+            &[true, true, true, true, true],
+        );
 
-        // false
-        // constant literals that do NOT refer to any columns are currently 
not evaluated at all, hence the result is
-        // "all true"
-        let expr = lit(false);
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, vec![true, true, true, true, true]);
+        prune_with_expr(
+            // false
+            // constant literals that do NOT refer to any columns are 
currently not evaluated at all, hence the result is
+            // "all true"
+            lit(false),
+            &schema,
+            &statistics,
+            &[true, true, true, true, true],
+        );
     }
 
     #[test]
     fn prune_bool_column() {
         let (schema, statistics, expected_true, _) = bool_setup();
 
-        // b1
-        let expr = col("b1");
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_true);
+        prune_with_expr(
+            // b1
+            col("b1"),
+            &schema,
+            &statistics,
+            &expected_true,
+        );
     }
 
     #[test]
     fn prune_bool_not_column() {
         let (schema, statistics, _, expected_false) = bool_setup();
 
-        // !b1
-        let expr = col("b1").not();
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_false);
+        prune_with_expr(
+            // !b1
+            col("b1").not(),
+            &schema,
+            &statistics,
+            &expected_false,
+        );
     }
 
     #[test]
     fn prune_bool_column_eq_true() {
         let (schema, statistics, expected_true, _) = bool_setup();
 
-        // b1 = true
-        let expr = col("b1").eq(lit(true));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_true);
+        prune_with_expr(
+            // b1 = true
+            col("b1").eq(lit(true)),
+            &schema,
+            &statistics,
+            &expected_true,
+        );
     }
 
     #[test]
     fn prune_bool_not_column_eq_true() {
         let (schema, statistics, _, expected_false) = bool_setup();
 
-        // !b1 = true
-        let expr = col("b1").not().eq(lit(true));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_false);
+        prune_with_expr(
+            // !b1 = true
+            col("b1").not().eq(lit(true)),
+            &schema,
+            &statistics,
+            &expected_false,
+        );
     }
 
     /// Creates a setup for chunk pruning, modeling a int32 column "i"
@@ -2313,21 +2309,18 @@ mod tests {
         // i [-11, -1] ==>  no rows can pass (not keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> unknown (must keep)
-        let expected_ret = vec![true, true, false, true, true];
+        let expected_ret = &[true, true, false, true, true];
 
         // i > 0
-        let expr = col("i").gt(lit(0));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(col("i").gt(lit(0)), &schema, &statistics, 
expected_ret);
 
         // -i < 0
-        let expr = Expr::Negative(Box::new(col("i"))).lt(lit(0));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            Expr::Negative(Box::new(col("i"))).lt(lit(0)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2340,21 +2333,23 @@ mod tests {
         // i [-11, -1] ==>  all rows must pass (must keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> no rows can pass (not keep)
-        let expected_ret = vec![true, false, true, true, false];
+        let expected_ret = &[true, false, true, true, false];
 
-        // i <= 0
-        let expr = col("i").lt_eq(lit(0));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i <= 0
+            col("i").lt_eq(lit(0)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // -i >= 0
-        let expr = Expr::Negative(Box::new(col("i"))).gt_eq(lit(0));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // -i >= 0
+            Expr::Negative(Box::new(col("i"))).gt_eq(lit(0)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2367,37 +2362,39 @@ mod tests {
         // i [-11, -1] ==>  no rows could pass in theory (conservatively keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> no rows can pass (conservatively keep)
-        let expected_ret = vec![true, true, true, true, true];
+        let expected_ret = &[true, true, true, true, true];
 
-        // cast(i as utf8) <= 0
-        let expr = cast(col("i"), DataType::Utf8).lt_eq(lit("0"));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // cast(i as utf8) <= 0
+            cast(col("i"), DataType::Utf8).lt_eq(lit("0")),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // try_cast(i as utf8) <= 0
-        let expr = try_cast(col("i"), DataType::Utf8).lt_eq(lit("0"));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // try_cast(i as utf8) <= 0
+            try_cast(col("i"), DataType::Utf8).lt_eq(lit("0")),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // cast(-i as utf8) >= 0
-        let expr =
-            cast(Expr::Negative(Box::new(col("i"))), 
DataType::Utf8).gt_eq(lit("0"));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // cast(-i as utf8) >= 0
+            cast(Expr::Negative(Box::new(col("i"))), 
DataType::Utf8).gt_eq(lit("0")),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // try_cast(-i as utf8) >= 0
-        let expr =
-            try_cast(Expr::Negative(Box::new(col("i"))), 
DataType::Utf8).gt_eq(lit("0"));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // try_cast(-i as utf8) >= 0
+            try_cast(Expr::Negative(Box::new(col("i"))), 
DataType::Utf8).gt_eq(lit("0")),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2410,14 +2407,15 @@ mod tests {
         // i [-11, -1] ==>  no rows can pass (not keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> no rows can pass (not keep)
-        let expected_ret = vec![true, false, false, true, false];
+        let expected_ret = &[true, false, false, true, false];
 
-        // i = 0
-        let expr = col("i").eq(lit(0));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i = 0
+            col("i").eq(lit(0)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2430,19 +2428,21 @@ mod tests {
         // i [-11, -1] ==>  no rows can pass (not keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> no rows can pass (not keep)
-        let expected_ret = vec![true, false, false, true, false];
+        let expected_ret = &[true, false, false, true, false];
 
-        let expr = cast(col("i"), DataType::Int64).eq(lit(0i64));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            cast(col("i"), DataType::Int64).eq(lit(0i64)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        let expr = try_cast(col("i"), DataType::Int64).eq(lit(0i64));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            try_cast(col("i"), DataType::Int64).eq(lit(0i64)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2458,13 +2458,14 @@ mod tests {
         // i [-11, -1] ==>  no rows can pass (could keep)
         // i [NULL, NULL]  ==> unknown (keep)
         // i [1, NULL]  ==> no rows can pass (could keep)
-        let expected_ret = vec![true, true, true, true, true];
+        let expected_ret = &[true, true, true, true, true];
 
-        let expr = cast(col("i"), DataType::Utf8).eq(lit("0"));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            cast(col("i"), DataType::Utf8).eq(lit("0")),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2477,21 +2478,23 @@ mod tests {
         // i [-11, -1] ==>  no rows can pass (not keep)
         // i [NULL, NULL]  ==> unknown (must keep)
         // i [1, NULL]  ==> all rows must pass (must keep)
-        let expected_ret = vec![true, true, false, true, true];
+        let expected_ret = &[true, true, false, true, true];
 
-        // i > -1
-        let expr = col("i").gt(lit(-1));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i > -1
+            col("i").gt(lit(-1)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // -i < 1
-        let expr = Expr::Negative(Box::new(col("i"))).lt(lit(1));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // -i < 1
+            Expr::Negative(Box::new(col("i"))).lt(lit(1)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2500,14 +2503,15 @@ mod tests {
 
         // Expression "i IS NULL" when there are no null statistics,
         // should all be kept
-        let expected_ret = vec![true, true, true, true, true];
+        let expected_ret = &[true, true, true, true, true];
 
-        // i IS NULL, no null statistics
-        let expr = col("i").is_null();
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i IS NULL, no null statistics
+            col("i").is_null(),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
         // provide null counts for each column
         let statistics = statistics.with_null_counts(
@@ -2521,51 +2525,55 @@ mod tests {
             ],
         );
 
-        let expected_ret = vec![false, true, true, true, false];
+        let expected_ret = &[false, true, true, true, false];
 
-        // i IS NULL, with actual null statistcs
-        let expr = col("i").is_null();
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i IS NULL, with actual null statistcs
+            col("i").is_null(),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
     fn prune_cast_column_scalar() {
         // The data type of column i is INT32
         let (schema, statistics) = int32_setup();
-        let expected_ret = vec![true, true, false, true, true];
+        let expected_ret = &[true, true, false, true, true];
 
-        // i > int64(0)
-        let expr = col("i").gt(cast(lit(ScalarValue::Int64(Some(0))), 
DataType::Int32));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // i > int64(0)
+            col("i").gt(cast(lit(ScalarValue::Int64(Some(0))), 
DataType::Int32)),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // cast(i as int64) > int64(0)
-        let expr = cast(col("i"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(0))));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // cast(i as int64) > int64(0)
+            cast(col("i"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // try_cast(i as int64) > int64(0)
-        let expr =
-            try_cast(col("i"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(0))));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema.clone()).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // try_cast(i as int64) > int64(0)
+            try_cast(col("i"), 
DataType::Int64).gt(lit(ScalarValue::Int64(Some(0)))),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
 
-        // `-cast(i as int64) < 0` convert to `cast(i as int64) > -0`
-        let expr = Expr::Negative(Box::new(cast(col("i"), DataType::Int64)))
-            .lt(lit(ScalarValue::Int64(Some(0))));
-        let expr = logical2physical(&expr, &schema);
-        let p = PruningPredicate::try_new(expr, schema).unwrap();
-        let result = p.prune(&statistics).unwrap();
-        assert_eq!(result, expected_ret);
+        prune_with_expr(
+            // `-cast(i as int64) < 0` convert to `cast(i as int64) > -0`
+            Expr::Negative(Box::new(cast(col("i"), DataType::Int64)))
+                .lt(lit(ScalarValue::Int64(Some(0)))),
+            &schema,
+            &statistics,
+            expected_ret,
+        );
     }
 
     #[test]
@@ -2721,7 +2729,7 @@ mod tests {
             &schema,
             &statistics,
             // rule out containers ('false) where we know foo is not present
-            vec![true, false, true, true, false, true, true, false, true],
+            &[true, false, true, true, false, true, true, false, true],
         );
 
         // s1 = 'bar'
@@ -2730,7 +2738,7 @@ mod tests {
             &schema,
             &statistics,
             // rule out containers where we know bar is not present
-            vec![true, true, true, false, false, false, true, true, true],
+            &[true, true, true, false, false, false, true, true, true],
         );
 
         // s1 = 'baz' (unknown value)
@@ -2739,7 +2747,7 @@ mod tests {
             &schema,
             &statistics,
             // can't rule out anything
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 = 'foo' AND s1 = 'bar'
@@ -2750,7 +2758,7 @@ mod tests {
             // logically this predicate can't possibly be true (the column 
can't
             // take on both values) but we could rule it out if the stats tell
             // us that both values are not present
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 = 'foo' OR s1 = 'bar'
@@ -2759,7 +2767,7 @@ mod tests {
             &schema,
             &statistics,
             // can rule out containers that we know contain neither foo nor bar
-            vec![true, true, true, true, true, true, false, false, false],
+            &[true, true, true, true, true, true, false, false, false],
         );
 
         // s1 = 'foo' OR s1 = 'baz'
@@ -2768,7 +2776,7 @@ mod tests {
             &schema,
             &statistics,
             // can't rule out anything container
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 = 'foo' OR s1 = 'bar' OR s1 = 'baz'
@@ -2781,7 +2789,7 @@ mod tests {
             &statistics,
             // can rule out any containers based on knowledge of s1 and `foo`,
             // `bar` and (`foo`, `bar`)
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 != foo
@@ -2790,7 +2798,7 @@ mod tests {
             &schema,
             &statistics,
             // rule out containers we know for sure only contain foo
-            vec![false, true, true, false, true, true, false, true, true],
+            &[false, true, true, false, true, true, false, true, true],
         );
 
         // s1 != bar
@@ -2799,7 +2807,7 @@ mod tests {
             &schema,
             &statistics,
             // rule out when we know for sure s1 has the value bar
-            vec![false, false, false, true, true, true, true, true, true],
+            &[false, false, false, true, true, true, true, true, true],
         );
 
         // s1 != foo AND s1 != bar
@@ -2810,7 +2818,7 @@ mod tests {
             &schema,
             &statistics,
             // can rule out any container where we know s1 does not have 
either 'foo' or 'bar'
-            vec![true, true, true, false, false, false, true, true, true],
+            &[true, true, true, false, false, false, true, true, true],
         );
 
         // s1 != foo AND s1 != bar AND s1 != baz
@@ -2822,7 +2830,7 @@ mod tests {
             &schema,
             &statistics,
             // can't rule out any container based on  knowledge of s1,s2
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 != foo OR s1 != bar
@@ -2833,7 +2841,7 @@ mod tests {
             &schema,
             &statistics,
             // cant' rule out anything based on contains information
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 != foo OR s1 != bar OR s1 != baz
@@ -2845,7 +2853,7 @@ mod tests {
             &schema,
             &statistics,
             // cant' rule out anything based on contains information
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
     }
 
@@ -2907,7 +2915,7 @@ mod tests {
             &schema,
             &statistics,
             // rule out containers where we know s1 is not present
-            vec![true, false, true, true, false, true, true, false, true],
+            &[true, false, true, true, false, true, true, false, true],
         );
 
         // s1 = 'foo' OR s2 = 'bar'
@@ -2917,7 +2925,7 @@ mod tests {
             &schema,
             &statistics,
             //  can't rule out any container (would need to prove that s1 != 
foo AND s2 != bar)
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 = 'foo' AND s2 != 'bar'
@@ -2928,7 +2936,7 @@ mod tests {
             // can only rule out container where we know either:
             // 1. s1 doesn't have the value 'foo` or
             // 2. s2 has only the value of 'bar'
-            vec![false, false, false, true, false, true, true, false, true],
+            &[false, false, false, true, false, true, true, false, true],
         );
 
         // s1 != 'foo' AND s2 != 'bar'
@@ -2941,7 +2949,7 @@ mod tests {
             // Can  rule out any container where we know either
             // 1. s1 has only the value 'foo'
             // 2. s2 has only the value 'bar'
-            vec![false, false, false, false, true, true, false, true, true],
+            &[false, false, false, false, true, true, false, true, true],
         );
 
         // s1 != 'foo' AND (s2 = 'bar' OR s2 = 'baz')
@@ -2953,7 +2961,7 @@ mod tests {
             &statistics,
             // Can rule out any container where we know s1 has only the value
             // 'foo'. Can't use knowledge of s2 and bar to rule out anything
-            vec![false, true, true, false, true, true, false, true, true],
+            &[false, true, true, false, true, true, false, true, true],
         );
 
         // s1 like '%foo%bar%'
@@ -2962,7 +2970,7 @@ mod tests {
             &schema,
             &statistics,
             // cant rule out anything with information we know
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
 
         // s1 like '%foo%bar%' AND s2 = 'bar'
@@ -2973,7 +2981,7 @@ mod tests {
             &schema,
             &statistics,
             // can rule out any container where we know s2 does not have the 
value 'bar'
-            vec![true, true, true, false, false, false, true, true, true],
+            &[true, true, true, false, false, false, true, true, true],
         );
 
         // s1 like '%foo%bar%' OR s2 = 'bar'
@@ -2983,7 +2991,7 @@ mod tests {
             &statistics,
             // can't rule out anything (we would have to prove that both the
             // like and the equality must be false)
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
     }
 
@@ -3055,7 +3063,7 @@ mod tests {
             // 1. 0 is outside the min/max range of i
             // 1. s does not contain foo
             // (range is false, and contained  is false)
-            vec![true, false, true, false, false, false, true, false, true],
+            &[true, false, true, false, false, false, true, false, true],
         );
 
         // i = 0 and s != 'foo'
@@ -3066,7 +3074,7 @@ mod tests {
             // Can rule out containers where either:
             // 1. 0 is outside the min/max range of i
             // 2. s only contains foo
-            vec![false, false, false, true, false, true, true, false, true],
+            &[false, false, false, true, false, true, true, false, true],
         );
 
         // i = 0 OR s = 'foo'
@@ -3076,7 +3084,7 @@ mod tests {
             &statistics,
             // in theory could rule out containers if we had min/max values for
             // s as well. But in this case we don't so we can't rule out 
anything
-            vec![true, true, true, true, true, true, true, true, true],
+            &[true, true, true, true, true, true, true, true, true],
         );
     }
 
@@ -3091,7 +3099,7 @@ mod tests {
         expr: Expr,
         schema: &SchemaRef,
         statistics: &TestStatistics,
-        expected: Vec<bool>,
+        expected: &[bool],
     ) {
         println!("Pruning with expr: {}", expr);
         let expr = logical2physical(&expr, schema);

(arrow-datafusion) branch main updated: Minor: reduce code duplication in PruningPredicate test (#8441)

Reply via email to