This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b6e55d7e9c feat: Optimize CASE expression for usage where then and
else values are literals (#11553)
b6e55d7e9c is described below
commit b6e55d7e9cf17cfd1dcf633350cc6d205608ecd0
Author: Andy Grove <[email protected]>
AuthorDate: Mon Jul 22 09:51:40 2024 -0600
feat: Optimize CASE expression for usage where then and else values are
literals (#11553)
* Optimize CASE expression for usage where then and else values are literals
* add slt test
* add more test cases
---
datafusion/physical-expr/src/expressions/case.rs | 44 +++++++++++++++++
datafusion/sqllogictest/test_files/case.slt | 60 +++++++++++++++++++++++-
2 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/datafusion/physical-expr/src/expressions/case.rs
b/datafusion/physical-expr/src/expressions/case.rs
index 521a7ed9ac..b428d562bd 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -57,6 +57,11 @@ enum EvalMethod {
///
/// CASE WHEN condition THEN column [ELSE NULL] END
InfallibleExprOrNull,
+ /// This is a specialization for a specific use case where we can take a
fast path
+ /// if there is just one when/then pair and both the `then` and `else`
expressions
+ /// are literal values
+ /// CASE WHEN condition THEN literal ELSE literal END
+ ScalarOrScalar,
}
/// The CASE expression is similar to a series of nested if/else and there are
two forms that
@@ -140,6 +145,12 @@ impl CaseExpr {
&& else_expr.is_none()
{
EvalMethod::InfallibleExprOrNull
+ } else if when_then_expr.len() == 1
+ && when_then_expr[0].1.as_any().is::<Literal>()
+ && else_expr.is_some()
+ && else_expr.as_ref().unwrap().as_any().is::<Literal>()
+ {
+ EvalMethod::ScalarOrScalar
} else {
EvalMethod::NoExpression
};
@@ -344,6 +355,38 @@ impl CaseExpr {
internal_err!("predicate did not evaluate to an array")
}
}
+
+ fn scalar_or_scalar(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+ let return_type = self.data_type(&batch.schema())?;
+
+ // evaluate when expression
+ let when_value = self.when_then_expr[0].0.evaluate(batch)?;
+ let when_value = when_value.into_array(batch.num_rows())?;
+ let when_value = as_boolean_array(&when_value).map_err(|e| {
+ DataFusionError::Context(
+ "WHEN expression did not return a BooleanArray".to_string(),
+ Box::new(e),
+ )
+ })?;
+
+ // Treat 'NULL' as false value
+ let when_value = match when_value.null_count() {
+ 0 => Cow::Borrowed(when_value),
+ _ => Cow::Owned(prep_null_mask_filter(when_value)),
+ };
+
+ // evaluate then_value
+ let then_value = self.when_then_expr[0].1.evaluate(batch)?;
+ let then_value = Scalar::new(then_value.into_array(1)?);
+
+ // keep `else_expr`'s data type and return type consistent
+ let e = self.else_expr.as_ref().unwrap();
+ let expr = try_cast(Arc::clone(e), &batch.schema(),
return_type.clone())
+ .unwrap_or_else(|_| Arc::clone(e));
+ let else_ = Scalar::new(expr.evaluate(batch)?.into_array(1)?);
+
+ Ok(ColumnarValue::Array(zip(&when_value, &then_value, &else_)?))
+ }
}
impl PhysicalExpr for CaseExpr {
@@ -406,6 +449,7 @@ impl PhysicalExpr for CaseExpr {
// Specialization for CASE WHEN expr THEN column [ELSE NULL]
END
self.case_column_or_null(batch)
}
+ EvalMethod::ScalarOrScalar => self.scalar_or_scalar(batch),
}
}
diff --git a/datafusion/sqllogictest/test_files/case.slt
b/datafusion/sqllogictest/test_files/case.slt
index fac1042bb6..70063b88fb 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -17,7 +17,7 @@
# create test data
statement ok
-create table foo (a int, b int) as values (1, 2), (3, 4), (5, 6);
+create table foo (a int, b int) as values (1, 2), (3, 4), (5, 6), (null,
null), (6, null), (null, 7);
# CASE WHEN with condition
query T
@@ -26,6 +26,9 @@ SELECT CASE a WHEN 1 THEN 'one' WHEN 3 THEN 'three' ELSE '?'
END FROM foo
one
three
?
+?
+?
+?
# CASE WHEN with no condition
query I
@@ -34,6 +37,9 @@ SELECT CASE WHEN a > 2 THEN a ELSE b END FROM foo
2
3
5
+NULL
+6
+7
# column or explicit null
query I
@@ -42,6 +48,9 @@ SELECT CASE WHEN a > 2 THEN b ELSE null END FROM foo
NULL
4
6
+NULL
+NULL
+7
# column or implicit null
query I
@@ -50,3 +59,52 @@ SELECT CASE WHEN a > 2 THEN b END FROM foo
NULL
4
6
+NULL
+NULL
+7
+
+# scalar or scalar (string)
+query T
+SELECT CASE WHEN a > 2 THEN 'even' ELSE 'odd' END FROM foo
+----
+odd
+even
+even
+odd
+even
+odd
+
+# scalar or scalar (int)
+query I
+SELECT CASE WHEN a > 2 THEN 1 ELSE 0 END FROM foo
+----
+0
+1
+1
+0
+1
+0
+
+# predicate binary expression with scalars (does not make much sense because
the expression in
+# this case is always false, so this expression could be rewritten as a
literal 0 during planning
+query I
+SELECT CASE WHEN 1 > 2 THEN 1 ELSE 0 END FROM foo
+----
+0
+0
+0
+0
+0
+0
+
+# predicate using boolean literal (does not make much sense because the
expression in
+# this case is always false, so this expression could be rewritten as a
literal 0 during planning
+query I
+SELECT CASE WHEN false THEN 1 ELSE 0 END FROM foo
+----
+0
+0
+0
+0
+0
+0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]