pepijnve commented on code in PR #18329:
URL: https://github.com/apache/datafusion/pull/18329#discussion_r2477597522
##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -556,63 +651,61 @@ impl CaseExpr {
};
if when_then_expr.is_empty() {
- exec_err!("There must be at least one WHEN clause")
- } else {
- let eval_method = if expr.is_some() {
- EvalMethod::WithExpression
- } else if when_then_expr.len() == 1
- && is_cheap_and_infallible(&(when_then_expr[0].1))
- && else_expr.is_none()
- {
- EvalMethod::InfallibleExprOrNull
- } else if when_then_expr.len() == 1
- && when_then_expr[0].1.as_any().is::<Literal>()
- && else_expr.is_some()
- && else_expr.as_ref().unwrap().as_any().is::<Literal>()
- {
- EvalMethod::ScalarOrScalar
- } else if when_then_expr.len() == 1 && else_expr.is_some() {
- EvalMethod::ExpressionOrExpression
- } else {
- EvalMethod::NoExpression
- };
-
- Ok(Self {
- expr,
- when_then_expr,
- else_expr,
- eval_method,
- })
+ return exec_err!("There must be at least one WHEN clause");
}
+
+ let body = CaseBody {
+ expr,
+ when_then_expr,
+ else_expr,
+ };
+
+ let eval_method = if body.expr.is_some() {
+ EvalMethod::WithExpression(body.project()?)
+ } else if body.when_then_expr.len() == 1
+ && is_cheap_and_infallible(&(body.when_then_expr[0].1))
+ && body.else_expr.is_none()
+ {
+ EvalMethod::InfallibleExprOrNull
+ } else if body.when_then_expr.len() == 1
+ && body.when_then_expr[0].1.as_any().is::<Literal>()
+ && body.else_expr.is_some()
+ && body.else_expr.as_ref().unwrap().as_any().is::<Literal>()
+ {
+ EvalMethod::ScalarOrScalar
+ } else if body.when_then_expr.len() == 1 && body.else_expr.is_some() {
+ EvalMethod::ExpressionOrExpression(body.project()?)
+ } else {
+ EvalMethod::NoExpression(body.project()?)
+ };
+
+ Ok(Self { body, eval_method })
}
/// Optional base expression that can be compared to literal values in the
"when" expressions
pub fn expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
- self.expr.as_ref()
+ self.body.expr.as_ref()
}
/// One or more when/then expressions
pub fn when_then_expr(&self) -> &[WhenThen] {
- &self.when_then_expr
+ &self.body.when_then_expr
}
/// Optional "else" expression
pub fn else_expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
- self.else_expr.as_ref()
+ self.body.else_expr.as_ref()
}
}
-impl CaseExpr {
- /// This function evaluates the form of CASE that matches an expression to
fixed values.
- ///
- /// CASE expression
- /// WHEN value THEN result
- /// [WHEN ...]
- /// [ELSE result]
- /// END
- fn case_when_with_expr(&self, batch: &RecordBatch) ->
Result<ColumnarValue> {
- let return_type = self.data_type(&batch.schema())?;
- let mut result_builder = ResultBuilder::new(&return_type,
batch.num_rows());
+impl CaseBody {
+ /// See [CaseExpr::case_when_with_expr].
+ fn case_when_with_expr(
+ &self,
+ batch: &RecordBatch,
+ return_type: &DataType,
+ ) -> Result<ColumnarValue> {
+ let mut result_builder = ResultBuilder::new(return_type,
batch.num_rows());
// `remainder_rows` contains the indices of the rows that need to be
evaluated
let mut remainder_rows: ArrayRef =
Review Comment:
Indeed. What's easy to miss in the original code is that even though it was
tracking remaining rows with a bit mask, by calling
`PhysicalExpr#evaluate_selection` you still pay the cost of carrying/copying
because there are no true selection vector based conditional evaluation
implementations. `evaluate_selection` just hides the filtering/scattering cost
from sight.
This led me to wonder if `evaluate_selection` is actually useful. In
DataFusion itself `case` is the only user of it and at the moment the only
remaining usage is in `expr_or_expr`. Even there its usage is dubious. I did a
little experiment in https://github.com/pepijnve/datafusion/tree/expr_or_expr
where I avoid the 'scatter' cost using an unaligned zip implementation and get
better performance that way.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]