alamb commented on code in PR #4020:
URL: https://github.com/apache/arrow-rs/pull/4020#discussion_r1167579187
##########
arrow-cast/src/cast.rs:
##########
@@ -458,6 +460,122 @@ where
}
}
+/// Cast the array from interval to duration
+fn cast_interval_to_duration<D: ArrowTemporalType<Native = i64>>(
+ array: &dyn Array,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let array = array
+ .as_any()
+ .downcast_ref::<IntervalMonthDayNanoArray>()
+ .ok_or_else(|| {
+ ArrowError::ComputeError(
+ "Internal Error: Cannot cast interval to IntervalArray of
expected type"
+ .to_string(),
+ )
+ })?;
+
+ let scale = match D::DATA_TYPE {
+ DataType::Duration(TimeUnit::Second) => 1_000_000_000,
+ DataType::Duration(TimeUnit::Millisecond) => 1_000_000,
+ DataType::Duration(TimeUnit::Microsecond) => 1_000,
+ DataType::Duration(TimeUnit::Nanosecond) => 1,
+ _ => unreachable!(),
+ };
+
+ if cast_options.safe {
+ let iter = array.iter().map(|v| {
+ v.and_then(|v| {
+ let v = v / scale;
+ if v > i64::MAX as i128 {
+ None
+ } else {
+ Some(v as i64)
+ }
+ })
+ });
+ Ok(Arc::new(unsafe {
+ PrimitiveArray::<D>::from_trusted_len_iter(iter)
+ }))
+ } else {
+ let vec = array
+ .iter()
+ .map(|v| {
+ v.map(|v| {
+ let v = v / scale;
+ if v > i64::MAX as i128 {
+ Err(ArrowError::ComputeError(format!(
+ "Cannot cast to {:?}. Overflowing on {:?}",
+ D::DATA_TYPE,
+ v
+ )))
+ } else {
+ Ok(v as i64)
+ }
+ })
+ .transpose()
+ })
+ .collect::<Result<Vec<_>, _>>()?;
+ Ok(Arc::new(unsafe {
+ PrimitiveArray::<D>::from_trusted_len_iter(vec.iter())
+ }))
+ }
+}
+
+/// Cast the array from duration and interval
+fn cast_duration_to_interval<D: ArrowTemporalType<Native = i64>>(
+ array: &dyn Array,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let array = array
+ .as_any()
+ .downcast_ref::<PrimitiveArray<D>>()
+ .ok_or_else(|| {
+ ArrowError::ComputeError(
+ "Internal Error: Cannot cast duration to DurationArray of
expected type"
+ .to_string(),
+ )
+ })?;
+
+ let scale = match array.data_type() {
+ DataType::Duration(TimeUnit::Second) => 1_000_000_000,
+ DataType::Duration(TimeUnit::Millisecond) => 1_000_000,
+ DataType::Duration(TimeUnit::Microsecond) => 1_000,
+ DataType::Duration(TimeUnit::Nanosecond) => 1,
+ _ => unreachable!(),
+ };
+
+ if cast_options.safe {
+ let iter = array
+ .iter()
+ .map(|v| v.and_then(|v| v.checked_mul(scale).map(|v| v as i128)));
Review Comment:
I think we need to check the overflow in the safe case as well -- and and
set the result to Null / NONE when overflow happened
##########
arrow-cast/src/cast.rs:
##########
@@ -8246,4 +8387,217 @@ mod tests {
);
assert_eq!("Invalid argument error: 1234567000 is too large to store
in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string());
}
+
+ /// helper function to test casting from duration to interval
+ fn cast_from_duration_to_interval<T: ArrowTemporalType>(
+ array: Vec<i64>,
+ cast_options: &CastOptions,
+ ) -> Result<PrimitiveArray<IntervalMonthDayNanoType>, ArrowError>
+ where
+ arrow_array::PrimitiveArray<T>: From<Vec<i64>>,
+ {
+ let array = PrimitiveArray::<T>::from(array);
+ let array = Arc::new(array) as ArrayRef;
+ let casted_array = cast_with_options(
+ &array,
+ &DataType::Interval(IntervalUnit::MonthDayNano),
+ cast_options,
+ )?;
+ casted_array
+ .as_any()
+ .downcast_ref::<IntervalMonthDayNanoArray>()
+ .ok_or_else(|| {
+ ArrowError::ComputeError(
+ "Failed to downcast to
IntervalMonthDayNanoArray".to_string(),
+ )
+ })
+ .cloned()
+ }
+
+ #[test]
+ fn test_cast_from_duration_to_interval() {
+ // from duration second to interval month day nano
+ let array = vec![1234567];
+ let casted_array =
cast_from_duration_to_interval::<DurationSecondType>(
+ array,
+ &DEFAULT_CAST_OPTIONS,
+ )
+ .unwrap();
+ assert_eq!(
+ casted_array.data_type(),
+ &DataType::Interval(IntervalUnit::MonthDayNano)
+ );
+ assert_eq!(casted_array.value(0), 1234567000000000);
+
+ let array = vec![i64::MAX];
+ let casted_array =
cast_from_duration_to_interval::<DurationSecondType>(
+ array,
+ &CastOptions { safe: false },
+ );
+ assert!(casted_array.is_err());
+
+ // from duration millisecond to interval month day nano
+ let array = vec![1234567];
+ let casted_array =
cast_from_duration_to_interval::<DurationMillisecondType>(
+ array,
+ &DEFAULT_CAST_OPTIONS,
+ )
+ .unwrap();
+ assert_eq!(
+ casted_array.data_type(),
+ &DataType::Interval(IntervalUnit::MonthDayNano)
+ );
+ assert_eq!(casted_array.value(0), 1234567000000);
+
+ let array = vec![i64::MAX];
+ let casted_array =
cast_from_duration_to_interval::<DurationMillisecondType>(
+ array,
+ &CastOptions { safe: false },
+ );
+ assert!(casted_array.is_err());
+
+ // from duration microsecond to interval month day nano
+ let array = vec![1234567];
+ let casted_array =
cast_from_duration_to_interval::<DurationMicrosecondType>(
+ array,
+ &DEFAULT_CAST_OPTIONS,
+ )
+ .unwrap();
+ assert_eq!(
+ casted_array.data_type(),
+ &DataType::Interval(IntervalUnit::MonthDayNano)
+ );
+ assert_eq!(casted_array.value(0), 1234567000);
+
+ let array = vec![i64::MAX];
Review Comment:
Can you also please add a test here (and the cases below) showing what
happens when an overflow happens with DEFAULT_CAST_OPTIONS? I would expect the
result to be Null
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]