alamb commented on code in PR #2819:
URL: https://github.com/apache/arrow-datafusion/pull/2819#discussion_r912346317


##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -291,12 +292,67 @@ impl PhysicalExpr for CaseExpr {
 /// Create a CASE expression
 pub fn case(
     expr: Option<Arc<dyn PhysicalExpr>>,
-    when_thens: &[WhenThen],
+    when_thens: Vec<WhenThen>,
     else_expr: Option<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
+    // all the result of then and else should be convert to a common data type,
+    // if they can be coercible to a common data type, return error.
+    let coerce_type = get_case_common_type(&when_thens, else_expr.clone(), 
input_schema);
+    let (when_thens, else_expr) = match coerce_type {
+        None => Err(DataFusionError::Plan(format!(
+            "Can't get a common type for then {:?} and else {:?} expression",
+            when_thens, else_expr
+        ))),
+        Some(data_type) => {
+            // cast then expr
+            let left = when_thens
+                .into_iter()
+                .map(|(when, then)| {
+                    let then = try_cast(then, input_schema, 
data_type.clone()).unwrap();
+                    (when, then)
+                })
+                .collect::<Vec<WhenThen>>();
+            let right = match else_expr {
+                None => None,
+                Some(expr) => Some(try_cast(expr, input_schema, 
data_type.clone())?),
+            };
+            Ok((left, right))
+        }
+    }?;
+
     Ok(Arc::new(CaseExpr::try_new(expr, when_thens, else_expr)?))
 }
 
+fn get_case_common_type(
+    when_thens: &[WhenThen],
+    else_expr: Option<Arc<dyn PhysicalExpr>>,

Review Comment:
   ```suggestion
       else_expr: Option<&PhysicalExpr>,
   ```
   
   I wonder if there is a reason it needs to get a copy, or would a reference 
work too?



##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -635,4 +704,67 @@ mod tests {
             RecordBatch::try_from_iter(vec![("load4", Arc::new(load4) as 
ArrayRef)])?;
         Ok(batch)
     }
+
+    #[test]
+    fn case_test_incompatible() -> Result<()> {
+        // 1 then is int64
+        // 2 then is boolean
+        let batch = case_test_batch()?;
+        let schema = batch.schema();
+
+        // CASE WHEN a = 'foo' THEN 123 WHEN a = 'bar' THEN true END
+        let when1 = binary(
+            col("a", &schema)?,
+            Operator::Eq,
+            lit(ScalarValue::Utf8(Some("foo".to_string()))),

Review Comment:
   It would be nice if the the `lit` in the physical expr could use 
`lit("foo")` as well. But I tried it and it doesn't work 
   
   ```suggestion
               lit("foo"),
   ```
   
   



##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -76,7 +77,7 @@ impl CaseExpr {
     /// Create a new CASE WHEN expression
     pub fn try_new(
         expr: Option<Arc<dyn PhysicalExpr>>,
-        when_then_expr: &[WhenThen],
+        when_then_expr: Vec<WhenThen>,

Review Comment:
   👍 



##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -291,12 +292,67 @@ impl PhysicalExpr for CaseExpr {
 /// Create a CASE expression
 pub fn case(
     expr: Option<Arc<dyn PhysicalExpr>>,
-    when_thens: &[WhenThen],
+    when_thens: Vec<WhenThen>,
     else_expr: Option<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
+    // all the result of then and else should be convert to a common data type,
+    // if they can be coercible to a common data type, return error.
+    let coerce_type = get_case_common_type(&when_thens, else_expr.clone(), 
input_schema);
+    let (when_thens, else_expr) = match coerce_type {
+        None => Err(DataFusionError::Plan(format!(
+            "Can't get a common type for then {:?} and else {:?} expression",
+            when_thens, else_expr
+        ))),
+        Some(data_type) => {
+            // cast then expr
+            let left = when_thens
+                .into_iter()
+                .map(|(when, then)| {
+                    let then = try_cast(then, input_schema, 
data_type.clone()).unwrap();
+                    (when, then)
+                })
+                .collect::<Vec<WhenThen>>();

Review Comment:
   Are we sure this will always return without error (aka that this will not 
panic)?
   
   Since this function can return an `Result` anyways, would it be better to 
return it. Something like (untested):
   
    ```suggestion
               let left = when_thens
                   .into_iter()
                   .map(|(when, then)| {
                       let then = try_cast(then, input_schema, 
data_type.clone())?
                       Ok((when, then))
                   })
                   .collect::<Result<Vec<WhenThen>>>()?;
   ```



##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -291,12 +292,67 @@ impl PhysicalExpr for CaseExpr {
 /// Create a CASE expression
 pub fn case(
     expr: Option<Arc<dyn PhysicalExpr>>,
-    when_thens: &[WhenThen],
+    when_thens: Vec<WhenThen>,
     else_expr: Option<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
+    // all the result of then and else should be convert to a common data type,
+    // if they can be coercible to a common data type, return error.
+    let coerce_type = get_case_common_type(&when_thens, else_expr.clone(), 
input_schema);
+    let (when_thens, else_expr) = match coerce_type {
+        None => Err(DataFusionError::Plan(format!(
+            "Can't get a common type for then {:?} and else {:?} expression",
+            when_thens, else_expr
+        ))),
+        Some(data_type) => {
+            // cast then expr
+            let left = when_thens
+                .into_iter()
+                .map(|(when, then)| {
+                    let then = try_cast(then, input_schema, 
data_type.clone()).unwrap();
+                    (when, then)
+                })
+                .collect::<Vec<WhenThen>>();
+            let right = match else_expr {
+                None => None,
+                Some(expr) => Some(try_cast(expr, input_schema, 
data_type.clone())?),
+            };
+            Ok((left, right))
+        }
+    }?;
+
     Ok(Arc::new(CaseExpr::try_new(expr, when_thens, else_expr)?))
 }
 
+fn get_case_common_type(
+    when_thens: &[WhenThen],
+    else_expr: Option<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
+) -> Option<DataType> {
+    let thens_type = when_thens
+        .iter()
+        .map(|when_then| {
+            let data_type = &when_then.1.data_type(input_schema).unwrap();
+            data_type.clone()
+        })
+        .collect::<Vec<_>>();
+    let else_type = match else_expr {
+        None => {
+            // case when then exprs must have one then value
+            thens_type[0].clone()
+        }
+        Some(else_phy_expr) => else_phy_expr.data_type(input_schema).unwrap(),
+    };
+    thens_type
+        .iter()
+        .fold(Some(else_type), |left, right_type| match left {
+            None => None,
+            // TODO: now just use the `equal` coercion rule for case when. If 
find the issue, and

Review Comment:
   👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to