pepijnve commented on code in PR #18329:
URL: https://github.com/apache/datafusion/pull/18329#discussion_r2475657066
##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -556,63 +651,61 @@ impl CaseExpr {
};
if when_then_expr.is_empty() {
- exec_err!("There must be at least one WHEN clause")
- } else {
- let eval_method = if expr.is_some() {
- EvalMethod::WithExpression
- } else if when_then_expr.len() == 1
- && is_cheap_and_infallible(&(when_then_expr[0].1))
- && else_expr.is_none()
- {
- EvalMethod::InfallibleExprOrNull
- } else if when_then_expr.len() == 1
- && when_then_expr[0].1.as_any().is::<Literal>()
- && else_expr.is_some()
- && else_expr.as_ref().unwrap().as_any().is::<Literal>()
- {
- EvalMethod::ScalarOrScalar
- } else if when_then_expr.len() == 1 && else_expr.is_some() {
- EvalMethod::ExpressionOrExpression
- } else {
- EvalMethod::NoExpression
- };
-
- Ok(Self {
- expr,
- when_then_expr,
- else_expr,
- eval_method,
- })
+ return exec_err!("There must be at least one WHEN clause");
}
+
+ let body = CaseBody {
+ expr,
+ when_then_expr,
+ else_expr,
+ };
+
+ let eval_method = if body.expr.is_some() {
+ EvalMethod::WithExpression(body.project()?)
+ } else if body.when_then_expr.len() == 1
+ && is_cheap_and_infallible(&(body.when_then_expr[0].1))
+ && body.else_expr.is_none()
+ {
+ EvalMethod::InfallibleExprOrNull
+ } else if body.when_then_expr.len() == 1
+ && body.when_then_expr[0].1.as_any().is::<Literal>()
+ && body.else_expr.is_some()
+ && body.else_expr.as_ref().unwrap().as_any().is::<Literal>()
+ {
+ EvalMethod::ScalarOrScalar
+ } else if body.when_then_expr.len() == 1 && body.else_expr.is_some() {
+ EvalMethod::ExpressionOrExpression(body.project()?)
+ } else {
+ EvalMethod::NoExpression(body.project()?)
+ };
+
+ Ok(Self { body, eval_method })
}
/// Optional base expression that can be compared to literal values in the
"when" expressions
pub fn expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
- self.expr.as_ref()
+ self.body.expr.as_ref()
}
/// One or more when/then expressions
pub fn when_then_expr(&self) -> &[WhenThen] {
- &self.when_then_expr
+ &self.body.when_then_expr
}
/// Optional "else" expression
pub fn else_expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
- self.else_expr.as_ref()
+ self.body.else_expr.as_ref()
}
}
-impl CaseExpr {
- /// This function evaluates the form of CASE that matches an expression to
fixed values.
- ///
- /// CASE expression
- /// WHEN value THEN result
- /// [WHEN ...]
- /// [ELSE result]
- /// END
- fn case_when_with_expr(&self, batch: &RecordBatch) ->
Result<ColumnarValue> {
- let return_type = self.data_type(&batch.schema())?;
- let mut result_builder = ResultBuilder::new(&return_type,
batch.num_rows());
+impl CaseBody {
+ /// See [CaseExpr::case_when_with_expr].
+ fn case_when_with_expr(
+ &self,
+ batch: &RecordBatch,
+ return_type: &DataType,
+ ) -> Result<ColumnarValue> {
+ let mut result_builder = ResultBuilder::new(return_type,
batch.num_rows());
// `remainder_rows` contains the indices of the rows that need to be
evaluated
let mut remainder_rows: ArrayRef =
Review Comment:
I've been living in this little corner of DataFusion for too long already it
seems 😄 The reason for this PR is indeed that:
- we need to filter `RecordBatch` values
- filtering a `RecordBatch` filters all columns of the batch
- the when, then, and else expressions may only reference a few of the
columns of the batch
Any time spent filtering the unreferenced columns is unnecessary work.
The comment you suggested will hopefully clarify that for future readers.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]