alamb commented on code in PR #7614:
URL: https://github.com/apache/arrow-datafusion/pull/7614#discussion_r1333352337


##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
 ///
 /// See the module level documentation for more detail on coercion.
 pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-
     if type_into == type_from {
         return true;
     }
-    // Null can convert to most of types
+    if let Some(coerced) = coerced_from(type_into, type_from) {
+        return coerced == type_into;
+    }
+    false
+}
+
+fn coerced_from<'a>(
+    type_into: &'a DataType,
+    type_from: &'a DataType,
+) -> Option<&'a DataType> {
+    use self::DataType::*;
+
     match type_into {
-        Int8 => matches!(type_from, Null | Int8),
-        Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16),
-        Int64 => matches!(
-            type_from,
-            Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, Null | UInt8),
-        UInt16 => matches!(type_from, Null | UInt8 | UInt16),
-        UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-                | Decimal128(_, _)
-        ),
-        Timestamp(TimeUnit::Nanosecond, _) => {
-            matches!(
+        // coerced into type_into
+        Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+        Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => 
Some(type_into),
+        Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) => {
+            Some(type_into)
+        }
+        Int64
+            if matches!(
+                type_from,
+                Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+            ) =>
+        {
+            Some(type_into)
+        }
+        UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+        UInt16 if matches!(type_from, Null | UInt8 | UInt16) => 
Some(type_into),
+        UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => 
Some(type_into),
+        UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) 
=> {
+            Some(type_into)
+        }
+        Float32
+            if matches!(
                 type_from,
-                Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
-            )
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+            ) =>
+        {
+            Some(type_into)
         }
-        Interval(_) => {
-            matches!(type_from, Utf8 | LargeUtf8)
+        Float64
+            if matches!(
+                type_from,
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+                    | Float64
+                    | Decimal128(_, _)
+            ) =>
+        {
+            Some(type_into)
+        }
+        Timestamp(TimeUnit::Nanosecond, None)
+            if matches!(
+                type_from,
+                Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+            ) =>
+        {
+            Some(type_into)
         }
-        Utf8 | LargeUtf8 => true,
-        Null => can_cast_types(type_from, type_into),
-        _ => false,
+        Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => 
Some(type_into),
+        Utf8 | LargeUtf8 => Some(type_into),
+        Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+        // timestamp coercions, with timezone, accept the type_from timezone 
if valid

Review Comment:
   ```suggestion
           // Coerce to consistent timezones, if the `type_from` timezone is 
valid
   ```



##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -100,6 +100,40 @@ select * from foo where ts != '2000-02-01T00:00:00';
 statement ok
 drop table foo;
 
+
+##########

Review Comment:
   Could you also possibly add an invalid timezone test like
   
   ```sql
   ❯ select arrow_cast('2021-01-02T03:04:00', 'Timestamp(Nanosecond, 
Some("Foo"))');
   Optimizer rule 'simplify_expressions' failed
   caused by
   Arrow error: Parser error: Invalid timezone "Foo": 'Foo' is not a valid 
timezone
   ```
   
   



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
 ///
 /// See the module level documentation for more detail on coercion.
 pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-
     if type_into == type_from {
         return true;
     }
-    // Null can convert to most of types
+    if let Some(coerced) = coerced_from(type_into, type_from) {
+        return coerced == type_into;
+    }
+    false
+}
+
+fn coerced_from<'a>(
+    type_into: &'a DataType,
+    type_from: &'a DataType,
+) -> Option<&'a DataType> {
+    use self::DataType::*;
+
     match type_into {
-        Int8 => matches!(type_from, Null | Int8),
-        Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16),
-        Int64 => matches!(
-            type_from,
-            Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, Null | UInt8),
-        UInt16 => matches!(type_from, Null | UInt8 | UInt16),
-        UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-                | Decimal128(_, _)
-        ),
-        Timestamp(TimeUnit::Nanosecond, _) => {
-            matches!(
+        // coerced into type_into
+        Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+        Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => 
Some(type_into),
+        Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) => {
+            Some(type_into)
+        }
+        Int64
+            if matches!(
+                type_from,
+                Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+            ) =>
+        {
+            Some(type_into)
+        }
+        UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+        UInt16 if matches!(type_from, Null | UInt8 | UInt16) => 
Some(type_into),
+        UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => 
Some(type_into),
+        UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) 
=> {
+            Some(type_into)
+        }
+        Float32
+            if matches!(
                 type_from,
-                Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
-            )
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+            ) =>
+        {
+            Some(type_into)
         }
-        Interval(_) => {
-            matches!(type_from, Utf8 | LargeUtf8)
+        Float64
+            if matches!(
+                type_from,
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+                    | Float64
+                    | Decimal128(_, _)
+            ) =>
+        {
+            Some(type_into)
+        }
+        Timestamp(TimeUnit::Nanosecond, None)
+            if matches!(
+                type_from,
+                Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+            ) =>
+        {
+            Some(type_into)
         }
-        Utf8 | LargeUtf8 => true,
-        Null => can_cast_types(type_from, type_into),
-        _ => false,
+        Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => 
Some(type_into),
+        Utf8 | LargeUtf8 => Some(type_into),
+        Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+        // timestamp coercions, with timezone, accept the type_from timezone 
if valid
+        Timestamp(TimeUnit::Nanosecond, Some(_))
+            if matches!(
+                type_from,
+                Timestamp(TimeUnit::Nanosecond, Some(from_tz)) if 
arrow_array::timezone::Tz::from_str(from_tz).is_ok()

Review Comment:
   Now that I think about this I wonder if there is any reason to check for 
valid timezones here at all -- if there is an invalid timezone, any actual 
calculation will fail susbequently
   
   Perhaps we can sidestep the whole "don't ignore error" thing entirely if we 
just skipped the check 🤔 



##########
datafusion/expr/src/type_coercion/functions.rs:
##########
@@ -136,62 +137,100 @@ fn maybe_data_types(
 ///
 /// See the module level documentation for more detail on coercion.
 pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
-    use self::DataType::*;
-
     if type_into == type_from {
         return true;
     }
-    // Null can convert to most of types
+    if let Some(coerced) = coerced_from(type_into, type_from) {
+        return coerced == type_into;
+    }
+    false
+}
+
+fn coerced_from<'a>(
+    type_into: &'a DataType,
+    type_from: &'a DataType,
+) -> Option<&'a DataType> {
+    use self::DataType::*;
+
     match type_into {
-        Int8 => matches!(type_from, Null | Int8),
-        Int16 => matches!(type_from, Null | Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16),
-        Int64 => matches!(
-            type_from,
-            Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
-        ),
-        UInt8 => matches!(type_from, Null | UInt8),
-        UInt16 => matches!(type_from, Null | UInt8 | UInt16),
-        UInt32 => matches!(type_from, Null | UInt8 | UInt16 | UInt32),
-        UInt64 => matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64),
-        Float32 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-        ),
-        Float64 => matches!(
-            type_from,
-            Null | Int8
-                | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-                | Decimal128(_, _)
-        ),
-        Timestamp(TimeUnit::Nanosecond, _) => {
-            matches!(
+        // coerced into type_into
+        Int8 if matches!(type_from, Null | Int8) => Some(type_into),
+        Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => 
Some(type_into),
+        Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | 
UInt16) => {
+            Some(type_into)
+        }
+        Int64
+            if matches!(
+                type_from,
+                Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32
+            ) =>
+        {
+            Some(type_into)
+        }
+        UInt8 if matches!(type_from, Null | UInt8) => Some(type_into),
+        UInt16 if matches!(type_from, Null | UInt8 | UInt16) => 
Some(type_into),
+        UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => 
Some(type_into),
+        UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) 
=> {
+            Some(type_into)
+        }
+        Float32
+            if matches!(
                 type_from,
-                Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8
-            )
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+            ) =>
+        {
+            Some(type_into)
         }
-        Interval(_) => {
-            matches!(type_from, Utf8 | LargeUtf8)
+        Float64
+            if matches!(
+                type_from,
+                Null | Int8
+                    | Int16
+                    | Int32
+                    | Int64
+                    | UInt8
+                    | UInt16
+                    | UInt32
+                    | UInt64
+                    | Float32
+                    | Float64
+                    | Decimal128(_, _)
+            ) =>
+        {
+            Some(type_into)
+        }
+        Timestamp(TimeUnit::Nanosecond, None)
+            if matches!(
+                type_from,
+                Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8
+            ) =>
+        {
+            Some(type_into)
         }
-        Utf8 | LargeUtf8 => true,
-        Null => can_cast_types(type_from, type_into),
-        _ => false,
+        Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => 
Some(type_into),
+        Utf8 | LargeUtf8 => Some(type_into),
+        Null if can_cast_types(type_from, type_into) => Some(type_into),
+
+        // timestamp coercions, with timezone, accept the type_from timezone 
if valid
+        Timestamp(TimeUnit::Nanosecond, Some(_))
+            if matches!(
+                type_from,
+                Timestamp(TimeUnit::Nanosecond, Some(from_tz)) if 
arrow_array::timezone::Tz::from_str(from_tz).is_ok()

Review Comment:
   Now that I think about this I wonder if there is any reason to check for 
valid timezones here at all -- if there is an invalid timezone, any actual 
calculation will fail susbequently
   
   Perhaps we can sidestep the whole "don't ignore error" thing entirely if we 
just skipped the check 🤔 



##########
datafusion/sqllogictest/test_files/timestamps.slt:
##########
@@ -100,6 +100,40 @@ select * from foo where ts != '2000-02-01T00:00:00';
 statement ok
 drop table foo;
 
+
+##########

Review Comment:
   Could you also possibly add an invalid timezone test like
   
   ```sql
   ❯ select arrow_cast('2021-01-02T03:04:00', 'Timestamp(Nanosecond, 
Some("Foo"))');
   Optimizer rule 'simplify_expressions' failed
   caused by
   Arrow error: Parser error: Invalid timezone "Foo": 'Foo' is not a valid 
timezone
   ```
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to