This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push: new 493d3eec6f Support casting `Date` to a time zone-specific timestamp (#7141) 493d3eec6f is described below commit 493d3eec6fb664a180d3a08fc5380ce459615c7a Author: Matthew Kim <38759997+friendlymatt...@users.noreply.github.com> AuthorDate: Mon Feb 24 09:05:36 2025 -0500 Support casting `Date` to a time zone-specific timestamp (#7141) * Date can cast to tz-aware timestamp * Cover other Date x TimeUnit cases * Preserve local time when casting date to timestamp --- arrow-cast/src/cast/mod.rs | 274 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 242 insertions(+), 32 deletions(-) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index f0e7de056e..f96bd6eda4 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -265,8 +265,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { } (Timestamp(_, _), _) if to_type.is_numeric() => true, (_, Timestamp(_, _)) if from_type.is_numeric() => true, - (Date64, Timestamp(_, None)) => true, - (Date32, Timestamp(_, None)) => true, + (Date64, Timestamp(_, _)) => true, + (Date32, Timestamp(_, _)) => true, ( Timestamp(_, _), Timestamp(_, _) @@ -1806,44 +1806,63 @@ pub fn cast_with_options( })?, )) } - (Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new( - array + (Date64, Timestamp(TimeUnit::Second, _)) => { + let array = array .as_primitive::<Date64Type>() - .unary::<_, TimestampSecondType>(|x| x / MILLISECONDS), - )), - (Date64, Timestamp(TimeUnit::Millisecond, None)) => { - cast_reinterpret_arrays::<Date64Type, TimestampMillisecondType>(array) + .unary::<_, TimestampSecondType>(|x| x / MILLISECONDS); + + cast_with_options(&array, to_type, cast_options) } - (Date64, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new( - array + (Date64, Timestamp(TimeUnit::Millisecond, _)) => { + let array = array .as_primitive::<Date64Type>() - .unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS)), - )), - (Date64, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new( - array + .reinterpret_cast::<TimestampMillisecondType>(); + + cast_with_options(&array, to_type, cast_options) + } + + (Date64, Timestamp(TimeUnit::Microsecond, _)) => { + let array = array .as_primitive::<Date64Type>() - .unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS)), - )), - (Date32, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new( - array + .unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS)); + + cast_with_options(&array, to_type, cast_options) + } + (Date64, Timestamp(TimeUnit::Nanosecond, _)) => { + let array = array + .as_primitive::<Date64Type>() + .unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS)); + + cast_with_options(&array, to_type, cast_options) + } + (Date32, Timestamp(TimeUnit::Second, _)) => { + let array = array .as_primitive::<Date32Type>() - .unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY), - )), - (Date32, Timestamp(TimeUnit::Millisecond, None)) => Ok(Arc::new( - array + .unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY); + + cast_with_options(&array, to_type, cast_options) + } + (Date32, Timestamp(TimeUnit::Millisecond, _)) => { + let array = array .as_primitive::<Date32Type>() - .unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY), - )), - (Date32, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new( - array + .unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY); + + cast_with_options(&array, to_type, cast_options) + } + (Date32, Timestamp(TimeUnit::Microsecond, _)) => { + let array = array .as_primitive::<Date32Type>() - .unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY), - )), - (Date32, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new( - array + .unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY); + + cast_with_options(&array, to_type, cast_options) + } + (Date32, Timestamp(TimeUnit::Nanosecond, _)) => { + let array = array .as_primitive::<Date32Type>() - .unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY), - )), + .unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY); + + cast_with_options(&array, to_type, cast_options) + } (_, Duration(unit)) if from_type.is_numeric() => { let array = cast_with_options(array, &Int64, cast_options)?; @@ -5217,6 +5236,197 @@ mod tests { }}; } + #[test] + fn test_cast_date32_to_timestamp_and_timestamp_with_timezone() { + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let a = Date32Array::from(vec![Some(18628), None, None]); // 2021-1-1, 2022-1-1 + let array = Arc::new(a) as ArrayRef; + + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Second, Some(tz.into())), + ) + .unwrap(); + let c = b.as_primitive::<TimestampSecondType>(); + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00+05:45", result.value(0)); + + let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap(); + let c = b.as_primitive::<TimestampSecondType>(); + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00", result.value(0)); + } + + #[test] + fn test_cast_date32_to_timestamp_with_timezone() { + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1 + let array = Arc::new(a) as ArrayRef; + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Second, Some(tz.into())), + ) + .unwrap(); + let c = b.as_primitive::<TimestampSecondType>(); + assert_eq!(1609438500, c.value(0)); + assert_eq!(1640974500, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00+05:45", result.value(0)); + assert_eq!("2022-01-01T00:00:00+05:45", result.value(1)); + } + + #[test] + fn test_cast_date32_to_timestamp_with_timezone_ms() { + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1 + let array = Arc::new(a) as ArrayRef; + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())), + ) + .unwrap(); + let c = b.as_primitive::<TimestampMillisecondType>(); + assert_eq!(1609438500000, c.value(0)); + assert_eq!(1640974500000, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00+05:45", result.value(0)); + assert_eq!("2022-01-01T00:00:00+05:45", result.value(1)); + } + + #[test] + fn test_cast_date32_to_timestamp_with_timezone_us() { + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1 + let array = Arc::new(a) as ArrayRef; + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())), + ) + .unwrap(); + let c = b.as_primitive::<TimestampMicrosecondType>(); + assert_eq!(1609438500000000, c.value(0)); + assert_eq!(1640974500000000, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00+05:45", result.value(0)); + assert_eq!("2022-01-01T00:00:00+05:45", result.value(1)); + } + + #[test] + fn test_cast_date32_to_timestamp_with_timezone_ns() { + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let a = Date32Array::from(vec![Some(18628), Some(18993), None]); // 2021-1-1, 2022-1-1 + let array = Arc::new(a) as ArrayRef; + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())), + ) + .unwrap(); + let c = b.as_primitive::<TimestampNanosecondType>(); + assert_eq!(1609438500000000000, c.value(0)); + assert_eq!(1640974500000000000, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("2021-01-01T00:00:00+05:45", result.value(0)); + assert_eq!("2022-01-01T00:00:00+05:45", result.value(1)); + } + + #[test] + fn test_cast_date64_to_timestamp_with_timezone() { + let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]); + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Second, Some(tz.into())), + ) + .unwrap(); + + let c = b.as_primitive::<TimestampSecondType>(); + assert_eq!(863979300, c.value(0)); + assert_eq!(1545675300, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("1997-05-19T00:00:00+05:45", result.value(0)); + assert_eq!("2018-12-25T00:00:00+05:45", result.value(1)); + } + + #[test] + fn test_cast_date64_to_timestamp_with_timezone_ms() { + let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]); + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Millisecond, Some(tz.into())), + ) + .unwrap(); + + let c = b.as_primitive::<TimestampMillisecondType>(); + assert_eq!(863979300005, c.value(0)); + assert_eq!(1545675300001, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0)); + assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1)); + } + + #[test] + fn test_cast_date64_to_timestamp_with_timezone_us() { + let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]); + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Microsecond, Some(tz.into())), + ) + .unwrap(); + + let c = b.as_primitive::<TimestampMicrosecondType>(); + assert_eq!(863979300005000, c.value(0)); + assert_eq!(1545675300001000, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0)); + assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1)); + } + + #[test] + fn test_cast_date64_to_timestamp_with_timezone_ns() { + let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]); + let tz = "+0545"; // UTC + 0545 is Asia/Kathmandu + let b = cast( + &array, + &DataType::Timestamp(TimeUnit::Nanosecond, Some(tz.into())), + ) + .unwrap(); + + let c = b.as_primitive::<TimestampNanosecondType>(); + assert_eq!(863979300005000000, c.value(0)); + assert_eq!(1545675300001000000, c.value(1)); + assert!(c.is_null(2)); + + let string_array = cast(&c, &DataType::Utf8).unwrap(); + let result = string_array.as_string::<i32>(); + assert_eq!("1997-05-19T00:00:00.005+05:45", result.value(0)); + assert_eq!("2018-12-25T00:00:00.001+05:45", result.value(1)); + } + #[test] fn test_cast_timestamp_to_strings() { // "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None