alamb commented on code in PR #4020:
URL: https://github.com/apache/arrow-rs/pull/4020#discussion_r1167485528
##########
arrow-cast/src/cast.rs:
##########
@@ -8386,136 +8386,219 @@ mod tests {
assert_eq!("Invalid argument error: 1234567000 is too large to store
in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string());
}
+ /// helper function to test casting from duration to interval
+ fn cast_from_duration_to_interval<T: ArrowTemporalType>(
+ array: Vec<i64>,
+ cast_options: &CastOptions,
+ ) -> Result<PrimitiveArray<IntervalMonthDayNanoType>, ArrowError>
+ where
+ arrow_array::PrimitiveArray<T>: From<Vec<i64>>,
+ {
+ let array = PrimitiveArray::<T>::from(array);
+ let array = Arc::new(array) as ArrayRef;
+ let casted_array = cast_with_options(
+ &array,
+ &DataType::Interval(IntervalUnit::MonthDayNano),
+ cast_options,
+ )?;
+ casted_array
+ .as_any()
+ .downcast_ref::<IntervalMonthDayNanoArray>()
+ .ok_or_else(|| {
+ ArrowError::ComputeError(
+ "Failed to downcast to
IntervalMonthDayNanoArray".to_string(),
+ )
+ })
+ .cloned()
+ }
+
#[test]
fn test_cast_from_duration_to_interval() {
// from duration second to interval month day nano
let array = vec![1234567];
- let array = DurationSecondArray::from(array);
- let array = Arc::new(array) as ArrayRef;
- let casted_array =
- cast(&array,
&DataType::Interval(IntervalUnit::MonthDayNano)).unwrap();
- let casted_array = casted_array
- .as_any()
- .downcast_ref::<IntervalMonthDayNanoArray>()
- .unwrap();
+ let casted_array =
cast_from_duration_to_interval::<DurationSecondType>(
+ array,
+ &DEFAULT_CAST_OPTIONS,
+ )
+ .unwrap();
assert_eq!(
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
assert_eq!(casted_array.value(0), 1234567000000000);
+ let array = vec![i64::MAX];
+ let casted_array =
cast_from_duration_to_interval::<DurationSecondType>(
+ array,
+ &CastOptions { safe: false },
+ )
+ .unwrap();
+ assert_eq!(casted_array.value(0), 9223372036854775807000000000);
+
// from duration millisecond to interval month day nano
let array = vec![1234567];
- let array = DurationMillisecondArray::from(array);
- let array = Arc::new(array) as ArrayRef;
- let casted_array =
- cast(&array,
&DataType::Interval(IntervalUnit::MonthDayNano)).unwrap();
- let casted_array = casted_array
- .as_any()
- .downcast_ref::<IntervalMonthDayNanoArray>()
- .unwrap();
+ let casted_array =
cast_from_duration_to_interval::<DurationMillisecondType>(
+ array,
+ &DEFAULT_CAST_OPTIONS,
+ )
+ .unwrap();
assert_eq!(
casted_array.data_type(),
&DataType::Interval(IntervalUnit::MonthDayNano)
);
assert_eq!(casted_array.value(0), 1234567000000);
+ let array = vec![i64::MAX];
+ let casted_array =
cast_from_duration_to_interval::<DurationMillisecondType>(
+ array,
+ &CastOptions { safe: false },
+ )
+ .unwrap();
+ assert_eq!(casted_array.value(0), 9223372036854775807000000);
Review Comment:
So if I convert this value back:
```rust
let (months, days, nanos) =
IntervalMonthDayNanoType::to_parts(9223372036854775807000000);
println!("months: {months}, days: {days}, nanos: {nanos}");
```
it prints out:
```
months: 0, days: 499999, nanos: -1000000
```
Which doesn't seem right for `9223372036854775807` milliseconds
I think what is happening can be explained here (the nanoseconds fields have
overflowed into the days)
https://docs.rs/arrow-array/37.0.0/src/arrow_array/types.rs.html#435-445
```
/*
https://github.com/apache/arrow/blob/02c8598d264c839a5b5cf3109bfd406f3b8a6ba5/cpp/src/arrow/type.h#L1475
struct MonthDayNanos {
int32_t months;
int32_t days;
int64_t nanoseconds;
}
128 112 96 80 64 48 32 16 0
+-------+-------+-------+-------+-------+-------+-------+-------+
| months | days | nanos |
+-------+-------+-------+-------+-------+-------+-------+-------+
*/
```
I think what should happen is
1. (preferred): the cast should return an error if the nanosecond field
would have overflowed
2. (secondarily): if the nanosecond field overflows the overflow should be
converted into days
I think erroring (at least at first) is preferred because it avoids
questions like "how many seconds are there in a day" which is not actually a
fixed quantity given leap seconds, etc.
If someone has need of casting such durations to intervals, we can then sort
out the mechanics
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]