This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new fb2221c43 combine all comparison coercion rules (#2901)
fb2221c43 is described below
commit fb2221c43d3367e876430e687ad6f1783cb79075
Author: Andy Grove <[email protected]>
AuthorDate: Thu Jul 14 16:27:45 2022 -0600
combine all comparison coercion rules (#2901)
---
datafusion/core/src/physical_plan/planner.rs | 2 --
datafusion/expr/src/binary_rule.rs | 35 ++++++------------------
datafusion/physical-expr/src/expressions/case.rs | 4 +--
datafusion/physical-expr/src/planner.rs | 4 +--
4 files changed, 13 insertions(+), 32 deletions(-)
diff --git a/datafusion/core/src/physical_plan/planner.rs
b/datafusion/core/src/physical_plan/planner.rs
index 820808a08..90ff7a448 100644
--- a/datafusion/core/src/physical_plan/planner.rs
+++ b/datafusion/core/src/physical_plan/planner.rs
@@ -1763,8 +1763,6 @@ mod tests {
async fn errors() -> Result<()> {
let bool_expr = col("c1").eq(col("c1"));
let cases = vec![
- // utf8 < u32
- col("c1").lt(col("c2")),
// utf8 AND utf8
col("c1").and(col("c1")),
// u8 AND u8
diff --git a/datafusion/expr/src/binary_rule.rs
b/datafusion/expr/src/binary_rule.rs
index 5b404d8a2..21e62344c 100644
--- a/datafusion/expr/src/binary_rule.rs
+++ b/datafusion/expr/src/binary_rule.rs
@@ -84,12 +84,13 @@ pub fn coerce_types(
(DataType::Boolean, DataType::Boolean) => Some(DataType::Boolean),
_ => None,
},
- // logical equality operators have their own rules, and always return
a boolean
- Operator::Eq | Operator::NotEq => comparison_eq_coercion(lhs_type,
rhs_type),
- // order-comparison operators have their own rules
- Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
- comparison_order_coercion(lhs_type, rhs_type)
- }
+ // logical comparison operators have their own rules, and always
return a boolean
+ Operator::Eq
+ | Operator::NotEq
+ | Operator::Lt
+ | Operator::Gt
+ | Operator::GtEq
+ | Operator::LtEq => comparison_coercion(lhs_type, rhs_type),
// "like" operators operate on strings and always return a boolean
Operator::Like | Operator::NotLike => like_coercion(lhs_type,
rhs_type),
// date +/- interval returns date
@@ -150,11 +151,8 @@ fn bitwise_coercion(left_type: &DataType, right_type:
&DataType) -> Option<DataT
}
}
-/// Get the coerced data type for `eq` or `not eq` operation
-pub fn comparison_eq_coercion(
- lhs_type: &DataType,
- rhs_type: &DataType,
-) -> Option<DataType> {
+/// Get the coerced data type for comparison operations such as `eq`, `not
eq`, `lt`, `lteq`, `gt`, and `gteq`.
+pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
if lhs_type == rhs_type {
// same type => equality is possible
return Some(lhs_type.clone());
@@ -167,21 +165,6 @@ pub fn comparison_eq_coercion(
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
}
-fn comparison_order_coercion(
- lhs_type: &DataType,
- rhs_type: &DataType,
-) -> Option<DataType> {
- if lhs_type == rhs_type {
- // same type => all good
- return Some(lhs_type.clone());
- }
- comparison_binary_numeric_coercion(lhs_type, rhs_type)
- .or_else(|| string_coercion(lhs_type, rhs_type))
- .or_else(|| dictionary_coercion(lhs_type, rhs_type, true))
- .or_else(|| temporal_coercion(lhs_type, rhs_type))
- .or_else(|| null_coercion(lhs_type, rhs_type))
-}
-
fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
diff --git a/datafusion/physical-expr/src/expressions/case.rs
b/datafusion/physical-expr/src/expressions/case.rs
index a05013e02..cf4f7defe 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -25,7 +25,7 @@ use arrow::compute::{and, eq_dyn, is_null, not, or,
or_kleene};
use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;
use datafusion_common::{DataFusionError, Result};
-use datafusion_expr::binary_rule::comparison_eq_coercion;
+use datafusion_expr::binary_rule::comparison_coercion;
use datafusion_expr::ColumnarValue;
type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
@@ -350,7 +350,7 @@ fn get_case_common_type(
None => None,
// TODO: now just use the `equal` coercion rule for case when. If
find the issue, and
// refactor again.
- Some(left_type) => comparison_eq_coercion(&left_type, right_type),
+ Some(left_type) => comparison_coercion(&left_type, right_type),
})
}
diff --git a/datafusion/physical-expr/src/planner.rs
b/datafusion/physical-expr/src/planner.rs
index 570f438e2..a7f774073 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -25,7 +25,7 @@ use crate::{
};
use arrow::datatypes::{DataType, Schema};
use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue};
-use datafusion_expr::binary_rule::comparison_eq_coercion;
+use datafusion_expr::binary_rule::comparison_coercion;
use datafusion_expr::{Expr, Operator};
use std::sync::Arc;
@@ -354,7 +354,7 @@ fn get_coerce_type(expr_type: &DataType, list_type:
&[DataType]) -> Option<DataT
match left {
None => None,
// TODO refactor a framework to do the data type coercion
- Some(left_type) => comparison_eq_coercion(&left_type,
right_type),
+ Some(left_type) => comparison_coercion(&left_type, right_type),
}
})
}