waynexia commented on code in PR #5384:
URL: https://github.com/apache/arrow-datafusion/pull/5384#discussion_r1116851452


##########
datafusion/expr/src/type_coercion.rs:
##########
@@ -62,17 +62,18 @@ pub fn is_numeric(dt: &DataType) -> bool {
         )
 }
 
-/// Determine if a DataType is Timestamp or not
+/// Determine whether the given data type `dt` is a `Timestamp`.
 pub fn is_timestamp(dt: &DataType) -> bool {
     matches!(dt, DataType::Timestamp(_, _))
 }
 
-/// Determine if a DataType is Date or not
+/// Determine whether the given data type `dt` is a `Date`.
 pub fn is_date(dt: &DataType) -> bool {
     matches!(dt, DataType::Date32 | DataType::Date64)
 }
 
-pub fn is_uft8(dt: &DataType) -> bool {
+/// Determine whether the given data type `dt` is a `Utf8`.
+pub fn is_utf8(dt: &DataType) -> bool {
     matches!(dt, DataType::Utf8)
 }

Review Comment:
   Not related to this PR, but I think we missed `DataType::LargeUtf8` in 
#5234. cc @jackwener 



##########
datafusion/optimizer/src/type_coercion.rs:
##########
@@ -426,95 +426,128 @@ impl ExprRewriter for TypeCoercionRewriter {
     }
 }
 
-/// Casts the ScalarValue `value` to coerced type.
-// When coerced type is `Interval` we use `parse_interval` since 
`try_from_string` not
-// supports conversion from string to Interval
-fn convert_to_coerced_type(
-    coerced_type: &DataType,
-    value: &ScalarValue,
-) -> Result<ScalarValue> {
+/// Casts the given `value` to `target_type`. Note that this function
+/// only considers `Null` or `Utf8` values.
+fn coerce_scalar(target_type: &DataType, value: &ScalarValue) -> 
Result<ScalarValue> {
     match value {
-        // In here we do casting either for NULL types or
-        // ScalarValue::Utf8(Some(val)). The other types are already casted.
-        // The reason is that we convert the sqlparser result
-        // to the Utf8 for all possible cases. Hence the types other than Utf8
-        // are already casted to appropriate type. Therefore they can be 
returned directly.
+        // Coerce Utf8 values:
         ScalarValue::Utf8(Some(val)) => {
-            // we need special handling for Interval types
-            if let DataType::Interval(..) = coerced_type {
+            // When `target_type` is `Interval`, we use `parse_interval` since
+            // `try_from_string` does not support `String` to `Interval` 
coercions.
+            if let DataType::Interval(..) = target_type {
                 parse_interval("millisecond", val)
             } else {
-                ScalarValue::try_from_string(val.clone(), coerced_type)
+                ScalarValue::try_from_string(val.clone(), target_type)
             }
         }
         s => {
             if s.is_null() {
-                ScalarValue::try_from(coerced_type)
+                // Coerce `Null` values:
+                ScalarValue::try_from(target_type)
             } else {
+                // Values except `Utf8`/`Null` variants already have the right 
type
+                // (casted before) since we convert `sqlparser` outputs to 
`Utf8`
+                // for all possible cases. Therefore, we return a clone here.
                 Ok(s.clone())
             }
         }
     }
 }
 
+/// This function coerces `value` to `target_type` in a range-aware fashion.
+/// If the coercion is successful, we return an `Ok` value with the result.
+/// If the coercion fails because `target_type` is not wide enough (i.e. we
+/// can not coerce to `target_type`, but we can to a wider type in the same
+/// family), we return a `Null` value of this type to signal this situation.
+/// Downstream code uses this signal to treat these values as *unbounded*.
+fn coerce_scalar_range_aware(
+    target_type: &DataType,
+    value: &ScalarValue,
+) -> Result<ScalarValue> {
+    coerce_scalar(target_type, value).or_else(|err| {
+        // If type coercion fails, check if the largest type in family works:
+        if let Some(largest_type) = get_widest_type_in_family(target_type) {
+            coerce_scalar(largest_type, value).map_or_else(
+                |_| {
+                    Err(DataFusionError::NotImplemented(format!(
+                        "Cannot cast {:?} to {:?}",
+                        value, target_type
+                    )))

Review Comment:
   If the largest type doesn't work then this coercion is impossible rather 
than unimplemented?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to