This is an automated email from the ASF dual-hosted git repository. blaginin pushed a commit to branch annarose/dict-coercion in repository https://gitbox.apache.org/repos/asf/datafusion-sandbox.git
commit d28a03c5833f3ef65448e09314583a3f13dc0133 Author: Pepijn Van Eeckhoudt <[email protected]> AuthorDate: Tue Feb 3 00:33:58 2026 +0100 Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" (#20105) ## Which issue does this PR close? - Related to #11570. ## Rationale for this change The `case_when` microbenchmark that covers the pattern `CASE WHEN d != 0 THEN n / d ELSE NULL END` pattern is parameterised over the percentage of zeroes in the `d` column. The benchmark uses the condition `d > 0` rather than `d != 0` though which is a bit misleading. In the '0% zeroes' run one would expect the else branch to never be taken, but because slightly less than 50% of the `d` values is negative, it's still taken 50% of the time. This PR adjust the benchmark to use `d != 0` instead. ## What changes are included in this PR? - Adjust the divide by zero benchmark to use `d != 0` as condition - Remove the duplicate benchmark, the div-by-zero variant is sufficient to compare changes across branches - Add a couple of SLTs to cover the `CASE` pattern ## Are these changes tested? Manual testing ## Are there any user-facing changes? No --- datafusion/physical-expr/benches/case_when.rs | 31 +-------------------------- datafusion/sqllogictest/test_files/case.slt | 21 ++++++++++++++++++ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs index d9b1b5657..33931a2ba 100644 --- a/datafusion/physical-expr/benches/case_when.rs +++ b/datafusion/physical-expr/benches/case_when.rs @@ -564,7 +564,6 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) { let numerator_col = col("numerator", &batch.schema()).unwrap(); let divisor_col = col("divisor", &batch.schema()).unwrap(); - let divisor_copy_col = col("divisor_copy", &batch.schema()).unwrap(); // DivideByZeroProtection: WHEN condition checks `divisor_col > 0` and division // uses `divisor_col` as divisor. Since the checked column matches the divisor, @@ -578,35 +577,7 @@ fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) { |b| { let when = Arc::new(BinaryExpr::new( Arc::clone(&divisor_col), - Operator::Gt, - lit(0i32), - )); - let then = Arc::new(BinaryExpr::new( - Arc::clone(&numerator_col), - Operator::Divide, - Arc::clone(&divisor_col), - )); - let else_null: Arc<dyn PhysicalExpr> = lit(ScalarValue::Int32(None)); - let expr = - Arc::new(case(None, vec![(when, then)], Some(else_null)).unwrap()); - - b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap())) - }, - ); - - // ExpressionOrExpression: WHEN condition checks `divisor_copy_col > 0` but - // division uses `divisor_col` as divisor. Since the checked column does NOT - // match the divisor, this falls back to ExpressionOrExpression evaluation. - group.bench_function( - format!( - "{} rows, {}% zeros: ExpressionOrExpression", - batch_size, - (zero_percentage * 100.0) as i32 - ), - |b| { - let when = Arc::new(BinaryExpr::new( - Arc::clone(&divisor_copy_col), - Operator::Gt, + Operator::NotEq, lit(0i32), )); let then = Arc::new(BinaryExpr::new( diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 8e0ee08d9..8bb17b57f 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -621,6 +621,27 @@ a b c +query I +SELECT CASE WHEN d != 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d) +---- +1 +NULL +-1 + +query I +SELECT CASE WHEN d > 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d) +---- +1 +NULL +NULL + +query I +SELECT CASE WHEN d < 0 THEN n / d ELSE NULL END FROM (VALUES (1, 1), (1, 0), (1, -1)) t(n,d) +---- +NULL +NULL +-1 + # EvalMethod::WithExpression using subset of all selected columns in case expression query III SELECT CASE a1 WHEN 1 THEN a1 WHEN 2 THEN a2 WHEN 3 THEN b END, b, c --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
