(datafusion-sandbox) 04/44: Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" (#20105)

blaginin Fri, 06 Feb 2026 06:59:01 -0800

This is an automated email from the ASF dual-hosted git repository.

blaginin pushed a commit to branch annarose/dict-coercion
in repository https://gitbox.apache.org/repos/asf/datafusion-sandbox.git


commit d28a03c5833f3ef65448e09314583a3f13dc0133
Author: Pepijn Van Eeckhoudt <[email protected]>
AuthorDate: Tue Feb 3 00:33:58 2026 +0100

    Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of 
zeroes" corresponds to "number of times protection is needed" (#20105)
    
    ## Which issue does this PR close?
    
    - Related to #11570.
    
    ## Rationale for this change
    
    The `case_when` microbenchmark that covers the pattern `CASE WHEN d != 0
    THEN n / d ELSE NULL END` pattern is parameterised over the percentage
    of zeroes in the `d` column. The benchmark uses the condition `d > 0`
    rather than `d != 0` though which is a bit misleading. In the '0%
    zeroes' run one would expect the else branch to never be taken, but
    because slightly less than 50% of the `d` values is negative, it's still
    taken 50% of the time.
    
    This PR adjust the benchmark to use `d != 0` instead.
    
    ## What changes are included in this PR?
    
    - Adjust the divide by zero benchmark to use `d != 0` as condition
    - Remove the duplicate benchmark, the div-by-zero variant is sufficient
    to compare changes across branches
    - Add a couple of SLTs to cover the `CASE` pattern
    
    ## Are these changes tested?
    
    Manual testing
    
    ## Are there any user-facing changes?
    
    No
---
 datafusion/physical-expr/benches/case_when.rs | 31 +--------------------------
 datafusion/sqllogictest/test_files/case.slt   | 21 ++++++++++++++++++
 2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/datafusion/physical-expr/benches/case_when.rs 
b/datafusion/physical-expr/benches/case_when.rs
index d9b1b5657..33931a2ba 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -564,7 +564,6 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, 
batch_size: usize) {
 
         let numerator_col = col("numerator", &batch.schema()).unwrap();
         let divisor_col = col("divisor", &batch.schema()).unwrap();
-        let divisor_copy_col = col("divisor_copy", &batch.schema()).unwrap();
 
         // DivideByZeroProtection: WHEN condition checks `divisor_col > 0` and 
division
         // uses `divisor_col` as divisor. Since the checked column matches the 
divisor,
@@ -578,35 +577,7 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, 
batch_size: usize) {
             |b| {
                 let when = Arc::new(BinaryExpr::new(
                     Arc::clone(&divisor_col),
-                    Operator::Gt,
-                    lit(0i32),
-                ));
-                let then = Arc::new(BinaryExpr::new(
-                    Arc::clone(&numerator_col),
-                    Operator::Divide,
-                    Arc::clone(&divisor_col),
-                ));
-                let else_null: Arc<dyn PhysicalExpr> = 
lit(ScalarValue::Int32(None));
-                let expr =
-                    Arc::new(case(None, vec![(when, then)], 
Some(else_null)).unwrap());
-
-                b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
-            },
-        );
-
-        // ExpressionOrExpression: WHEN condition checks `divisor_copy_col > 
0` but
-        // division uses `divisor_col` as divisor. Since the checked column 
does NOT
-        // match the divisor, this falls back to ExpressionOrExpression 
evaluation.
-        group.bench_function(
-            format!(
-                "{} rows, {}% zeros: ExpressionOrExpression",
-                batch_size,
-                (zero_percentage * 100.0) as i32
-            ),
-            |b| {
-                let when = Arc::new(BinaryExpr::new(
-                    Arc::clone(&divisor_copy_col),
-                    Operator::Gt,
+                    Operator::NotEq,
                     lit(0i32),
                 ));
                 let then = Arc::new(BinaryExpr::new(
diff --git a/datafusion/sqllogictest/test_files/case.slt 
b/datafusion/sqllogictest/test_files/case.slt
index 8e0ee08d9..8bb17b57f 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -621,6 +621,27 @@ a
 b
 c
 
+query I
+SELECT CASE WHEN d != 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), 
(1, -1)) t(n,d)
+----
+1
+NULL
+-1
+
+query I
+SELECT CASE WHEN d > 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), 
(1, -1)) t(n,d)
+----
+1
+NULL
+NULL
+
+query I
+SELECT CASE WHEN d < 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), 
(1, -1)) t(n,d)
+----
+NULL
+NULL
+-1
+
 # EvalMethod::WithExpression using subset of all selected columns in case 
expression
 query III
 SELECT CASE a1 WHEN 1 THEN a1 WHEN 2 THEN a2 WHEN 3 THEN b END, b, c


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-sandbox) 04/44: Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" (#20105)

Reply via email to