alamb commented on code in PR #4020:
URL: https://github.com/apache/arrow-rs/pull/4020#discussion_r1167485528


##########
arrow-cast/src/cast.rs:
##########
@@ -8386,136 +8386,219 @@ mod tests {
         assert_eq!("Invalid argument error: 1234567000 is too large to store 
in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string());
     }
 
+    /// helper function to test casting from duration to interval
+    fn cast_from_duration_to_interval<T: ArrowTemporalType>(
+        array: Vec<i64>,
+        cast_options: &CastOptions,
+    ) -> Result<PrimitiveArray<IntervalMonthDayNanoType>, ArrowError>
+    where
+        arrow_array::PrimitiveArray<T>: From<Vec<i64>>,
+    {
+        let array = PrimitiveArray::<T>::from(array);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Interval(IntervalUnit::MonthDayNano),
+            cast_options,
+        )?;
+        casted_array
+            .as_any()
+            .downcast_ref::<IntervalMonthDayNanoArray>()
+            .ok_or_else(|| {
+                ArrowError::ComputeError(
+                    "Failed to downcast to 
IntervalMonthDayNanoArray".to_string(),
+                )
+            })
+            .cloned()
+    }
+
     #[test]
     fn test_cast_from_duration_to_interval() {
         // from duration second to interval month day nano
         let array = vec![1234567];
-        let array = DurationSecondArray::from(array);
-        let array = Arc::new(array) as ArrayRef;
-        let casted_array =
-            cast(&array, 
&DataType::Interval(IntervalUnit::MonthDayNano)).unwrap();
-        let casted_array = casted_array
-            .as_any()
-            .downcast_ref::<IntervalMonthDayNanoArray>()
-            .unwrap();
+        let casted_array = 
cast_from_duration_to_interval::<DurationSecondType>(
+            array,
+            &DEFAULT_CAST_OPTIONS,
+        )
+        .unwrap();
         assert_eq!(
             casted_array.data_type(),
             &DataType::Interval(IntervalUnit::MonthDayNano)
         );
         assert_eq!(casted_array.value(0), 1234567000000000);
 
+        let array = vec![i64::MAX];
+        let casted_array = 
cast_from_duration_to_interval::<DurationSecondType>(
+            array,
+            &CastOptions { safe: false },
+        )
+        .unwrap();
+        assert_eq!(casted_array.value(0), 9223372036854775807000000000);
+
         // from duration millisecond to interval month day nano
         let array = vec![1234567];
-        let array = DurationMillisecondArray::from(array);
-        let array = Arc::new(array) as ArrayRef;
-        let casted_array =
-            cast(&array, 
&DataType::Interval(IntervalUnit::MonthDayNano)).unwrap();
-        let casted_array = casted_array
-            .as_any()
-            .downcast_ref::<IntervalMonthDayNanoArray>()
-            .unwrap();
+        let casted_array = 
cast_from_duration_to_interval::<DurationMillisecondType>(
+            array,
+            &DEFAULT_CAST_OPTIONS,
+        )
+        .unwrap();
         assert_eq!(
             casted_array.data_type(),
             &DataType::Interval(IntervalUnit::MonthDayNano)
         );
         assert_eq!(casted_array.value(0), 1234567000000);
 
+        let array = vec![i64::MAX];
+        let casted_array = 
cast_from_duration_to_interval::<DurationMillisecondType>(
+            array,
+            &CastOptions { safe: false },
+        )
+        .unwrap();
+        assert_eq!(casted_array.value(0), 9223372036854775807000000);

Review Comment:
   So if I convert this value back into fields:
   
   ```rust
       let (months, days, nanos) = 
IntervalMonthDayNanoType::to_parts(9223372036854775807000000);
       println!("months: {months}, days: {days}, nanos: {nanos}");
   
   ```
   
   it prints out:
   
   ```
   months: 0, days: 499999, nanos: -1000000
   
   ```
   
   Which doesn't seem right for `9223372036854775807` milliseconds 
   
   I think what is happening can be explained here (the nanoseconds fields have 
overflowed into the days)
   
   https://docs.rs/arrow-array/37.0.0/src/arrow_array/types.rs.html#435-445
   
   ```
           /*
           
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
           struct MonthDayNanos {
               int32_t months;
               int32_t days;
               int64_t nanoseconds;
           }
           128     112     96      80      64      48      32      16      0
           +-------+-------+-------+-------+-------+-------+-------+-------+
           |     months    |      days     |             nanos             |
           +-------+-------+-------+-------+-------+-------+-------+-------+
           */
   ```
   
   I think what should happen is 
   1. (preferred): the cast should return an error if the nanosecond field 
would have overflowed
   2. (secondarily): if the nanosecond field overflows the overflow should be 
converted into days
   
   I think erroring (at least at first) is preferred because it avoids 
questions like "how many seconds are there in a day" which is not actually a 
fixed quantity given leap seconds, etc.
   
   If someone has need of casting such durations to intervals, we can then sort 
out the mechanics



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to