This is an automated email from the ASF dual-hosted git repository.

blaginin pushed a commit to branch annarose/dict-coercion
in repository https://gitbox.apache.org/repos/asf/datafusion-sandbox.git

commit 5981d62c2860268790986246b705a1963e25f4e7
Author: Jeffrey Vo <[email protected]>
AuthorDate: Wed Feb 4 06:26:14 2026 +0900

    chore: remove datatype check functions in favour of upstream versions 
(#20104)
    
    We now have
    
[`DataType::is_string`](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_string)
    and
    
[`DataType::is_decimal`](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_decimal)
    so make the most of them
---
 datafusion/expr-common/src/type_coercion/binary.rs | 22 ++++++--------------
 datafusion/expr/src/type_coercion/mod.rs           | 24 ----------------------
 datafusion/optimizer/src/analyzer/type_coercion.rs | 23 +++++++++++----------
 .../physical-expr/src/expressions/negative.rs      |  4 ++--
 datafusion/pruning/src/pruning_predicate.rs        | 11 ++--------
 5 files changed, 22 insertions(+), 62 deletions(-)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs 
b/datafusion/expr-common/src/type_coercion/binary.rs
index 427ebc598..9051f412b 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -351,16 +351,6 @@ impl<'a> BinaryTypeCoercer<'a> {
 
 // TODO Move the rest inside of BinaryTypeCoercer
 
-fn is_decimal(data_type: &DataType) -> bool {
-    matches!(
-        data_type,
-        DataType::Decimal32(..)
-            | DataType::Decimal64(..)
-            | DataType::Decimal128(..)
-            | DataType::Decimal256(..)
-    )
-}
-
 /// Returns true if both operands are Date types (Date32 or Date64)
 /// Used to detect Date - Date operations which should return Int64 (days 
difference)
 fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
@@ -402,8 +392,8 @@ fn math_decimal_coercion(
         }
         // Cross-variant decimal coercion - choose larger variant with 
appropriate precision/scale
         (lhs, rhs)
-            if is_decimal(lhs)
-                && is_decimal(rhs)
+            if lhs.is_decimal()
+                && rhs.is_decimal()
                 && std::mem::discriminant(lhs) != std::mem::discriminant(rhs) 
=>
         {
             let coerced_type = get_wider_decimal_type_cross_variant(lhs_type, 
rhs_type)?;
@@ -1018,8 +1008,8 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
     match (lhs_type, rhs_type) {
         // Same decimal types
         (lhs_type, rhs_type)
-            if is_decimal(lhs_type)
-                && is_decimal(rhs_type)
+            if lhs_type.is_decimal()
+                && rhs_type.is_decimal()
                 && std::mem::discriminant(lhs_type)
                     == std::mem::discriminant(rhs_type) =>
         {
@@ -1027,8 +1017,8 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
         }
         // Mismatched decimal types
         (lhs_type, rhs_type)
-            if is_decimal(lhs_type)
-                && is_decimal(rhs_type)
+            if lhs_type.is_decimal()
+                && rhs_type.is_decimal()
                 && std::mem::discriminant(lhs_type)
                     != std::mem::discriminant(rhs_type) =>
         {
diff --git a/datafusion/expr/src/type_coercion/mod.rs 
b/datafusion/expr/src/type_coercion/mod.rs
index bd1acd3f3..c92d434e3 100644
--- a/datafusion/expr/src/type_coercion/mod.rs
+++ b/datafusion/expr/src/type_coercion/mod.rs
@@ -58,11 +58,6 @@ pub fn is_signed_numeric(dt: &DataType) -> bool {
     )
 }
 
-/// Determine whether the given data type `dt` is `Null`.
-pub fn is_null(dt: &DataType) -> bool {
-    *dt == DataType::Null
-}
-
 /// Determine whether the given data type `dt` is a `Timestamp`.
 pub fn is_timestamp(dt: &DataType) -> bool {
     matches!(dt, DataType::Timestamp(_, _))
@@ -80,22 +75,3 @@ pub fn is_datetime(dt: &DataType) -> bool {
         DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _)
     )
 }
-
-/// Determine whether the given data type `dt` is a `Utf8` or `Utf8View` or 
`LargeUtf8`.
-pub fn is_utf8_or_utf8view_or_large_utf8(dt: &DataType) -> bool {
-    matches!(
-        dt,
-        DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8
-    )
-}
-
-/// Determine whether the given data type `dt` is a `Decimal`.
-pub fn is_decimal(dt: &DataType) -> bool {
-    matches!(
-        dt,
-        DataType::Decimal32(_, _)
-            | DataType::Decimal64(_, _)
-            | DataType::Decimal128(_, _)
-            | DataType::Decimal256(_, _)
-    )
-}
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs 
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 8b57cc798..1e59ceb97 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -43,10 +43,10 @@ use datafusion_expr::expr_schema::cast_subquery;
 use datafusion_expr::logical_plan::Subquery;
 use datafusion_expr::type_coercion::binary::{comparison_coercion, 
like_coercion};
 use datafusion_expr::type_coercion::functions::{UDFCoercionExt, 
fields_with_udf};
+use datafusion_expr::type_coercion::is_datetime;
 use datafusion_expr::type_coercion::other::{
     get_coerce_type_for_case_expression, get_coerce_type_for_list,
 };
-use datafusion_expr::type_coercion::{is_datetime, 
is_utf8_or_utf8view_or_large_utf8};
 use datafusion_expr::utils::merge_schema;
 use datafusion_expr::{
     Cast, Expr, ExprSchemable, Join, Limit, LogicalPlan, Operator, Projection, 
Union,
@@ -513,10 +513,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
                 .data;
                 let expr_type = expr.get_type(self.schema)?;
                 let subquery_type = new_plan.schema().field(0).data_type();
-                if (expr_type.is_numeric()
-                    && is_utf8_or_utf8view_or_large_utf8(subquery_type))
-                    || (subquery_type.is_numeric()
-                        && is_utf8_or_utf8view_or_large_utf8(&expr_type))
+                if (expr_type.is_numeric() && subquery_type.is_string())
+                    || (subquery_type.is_numeric() && expr_type.is_string())
                 {
                     return plan_err!(
                         "expr type {expr_type} can't cast to {subquery_type} 
in SetComparison"
@@ -890,12 +888,15 @@ fn coerce_frame_bound(
 
 fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
     if col_type.is_numeric()
-        || is_utf8_or_utf8view_or_large_utf8(col_type)
-        || matches!(col_type, DataType::List(_))
-        || matches!(col_type, DataType::LargeList(_))
-        || matches!(col_type, DataType::FixedSizeList(_, _))
-        || matches!(col_type, DataType::Null)
-        || matches!(col_type, DataType::Boolean)
+        || col_type.is_string()
+        || col_type.is_null()
+        || matches!(
+            col_type,
+            DataType::List(_)
+                | DataType::LargeList(_)
+                | DataType::FixedSizeList(_, _)
+                | DataType::Boolean
+        )
     {
         Ok(col_type.clone())
     } else if is_datetime(col_type) {
diff --git a/datafusion/physical-expr/src/expressions/negative.rs 
b/datafusion/physical-expr/src/expressions/negative.rs
index 0c9476beb..c727c8fa5 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -37,7 +37,7 @@ use datafusion_expr::statistics::Distribution::{
 };
 use datafusion_expr::{
     ColumnarValue,
-    type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
+    type_coercion::{is_interval, is_signed_numeric, is_timestamp},
 };
 
 /// Negative expression
@@ -190,7 +190,7 @@ pub fn negative(
     input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
     let data_type = arg.data_type(input_schema)?;
-    if is_null(&data_type) {
+    if data_type.is_null() {
         Ok(arg)
     } else if !is_signed_numeric(&data_type)
         && !is_interval(&data_type)
diff --git a/datafusion/pruning/src/pruning_predicate.rs 
b/datafusion/pruning/src/pruning_predicate.rs
index 5f1b4233b..d0cb06744 100644
--- a/datafusion/pruning/src/pruning_predicate.rs
+++ b/datafusion/pruning/src/pruning_predicate.rs
@@ -1205,13 +1205,6 @@ fn is_compare_op(op: Operator) -> bool {
     )
 }
 
-fn is_string_type(data_type: &DataType) -> bool {
-    matches!(
-        data_type,
-        DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
-    )
-}
-
 // The pruning logic is based on the comparing the min/max bounds.
 // Must make sure the two type has order.
 // For example, casts from string to numbers is not correct.
@@ -1233,7 +1226,7 @@ fn verify_support_type_for_prune(from_type: &DataType, 
to_type: &DataType) -> Re
     // If both types are strings or both are not strings (number, timestamp, 
etc)
     // then we can compare them.
     // PruningPredicate does not support casting of strings to numbers and 
such.
-    if is_string_type(from_type) == is_string_type(to_type) {
+    if from_type.is_string() == to_type.is_string() {
         Ok(())
     } else {
         plan_err!(
@@ -4681,7 +4674,7 @@ mod tests {
             true,
             // s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"]  ==> some rows 
could pass (must keep)
             true,
-            // s1 ["A\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"]  ==> no 
row match. (min, max) maybe truncate 
+            // s1 ["A\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"]  ==> no 
row match. (min, max) maybe truncate
             // original (min, max) maybe ("A\u{10ffff}\u{10ffff}\u{10ffff}", 
"A\u{10ffff}\u{10ffff}\u{10ffff}\u{10ffff}")
             true,
         ];


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to