This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 89767ccb1d3 Account for Timezone when Casting Timestamp to Date32
(#5605)
89767ccb1d3 is described below
commit 89767ccb1d3834cbcb3cba3df5480c4ef236fbd6
Author: Lordworms <[email protected]>
AuthorDate: Mon Apr 15 06:35:15 2024 -0500
Account for Timezone when Casting Timestamp to Date32 (#5605)
* Consider Timezone when converting Timestamp to Date32
* Remove UTC special case
* Cleanup
* Update tests
---------
Co-authored-by: Raphael Taylor-Davies <[email protected]>
---
arrow-cast/src/cast/mod.rs | 87 ++++++++++++++++++++++++++++++++++------------
1 file changed, 65 insertions(+), 22 deletions(-)
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 3e2bf4392ff..d559715911b 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -519,6 +519,34 @@ fn as_time_res_with_timezone<T: ArrowPrimitiveType>(
})
}
+fn timestamp_to_date32<T: ArrowTimestampType>(
+ array: &PrimitiveArray<T>,
+) -> Result<ArrayRef, ArrowError> {
+ let err = |x: i64| {
+ ArrowError::CastError(format!(
+ "Cannot convert {} {x} to datetime",
+ std::any::type_name::<T>()
+ ))
+ };
+
+ let array: Date32Array = match array.timezone() {
+ Some(tz) => {
+ let tz: Tz = tz.parse()?;
+ array.try_unary(|x| {
+ as_datetime_with_timezone::<T>(x, tz)
+ .ok_or_else(|| err(x))
+ .map(|d| Date32Type::from_naive_date(d.date_naive()))
+ })?
+ }
+ None => array.try_unary(|x| {
+ as_datetime::<T>(x)
+ .ok_or_else(|| err(x))
+ .map(|d| Date32Type::from_naive_date(d.date()))
+ })?,
+ };
+ Ok(Arc::new(array))
+}
+
/// Cast `array` to the provided data type and return a new Array with type
`to_type`, if possible.
///
/// Accepts [`CastOptions`] to specify cast behavior.
@@ -1590,24 +1618,17 @@ pub fn cast_with_options(
to_tz.clone(),
))
}
- (Timestamp(from_unit, _), Date32) => {
- let array = cast_with_options(array, &Int64, cast_options)?;
- let time_array = array.as_primitive::<Int64Type>();
- let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
-
- let mut b = Date32Builder::with_capacity(array.len());
-
- for i in 0..array.len() {
- if time_array.is_null(i) {
- b.append_null();
- } else {
- b.append_value(
- num::integer::div_floor::<i64>(time_array.value(i),
from_size) as i32,
- );
- }
- }
-
- Ok(Arc::new(b.finish()) as ArrayRef)
+ (Timestamp(TimeUnit::Microsecond, _), Date32) => {
+
timestamp_to_date32(array.as_primitive::<TimestampMicrosecondType>())
+ }
+ (Timestamp(TimeUnit::Millisecond, _), Date32) => {
+
timestamp_to_date32(array.as_primitive::<TimestampMillisecondType>())
+ }
+ (Timestamp(TimeUnit::Second, _), Date32) => {
+ timestamp_to_date32(array.as_primitive::<TimestampSecondType>())
+ }
+ (Timestamp(TimeUnit::Nanosecond, _), Date32) => {
+
timestamp_to_date32(array.as_primitive::<TimestampNanosecondType>())
}
(Timestamp(TimeUnit::Second, _), Date64) => Ok(Arc::new(match
cast_options.safe {
true => {
@@ -2220,6 +2241,7 @@ where
#[cfg(test)]
mod tests {
use arrow_buffer::{Buffer, NullBuffer};
+ use chrono::NaiveDate;
use half::f16;
use super::*;
@@ -4434,14 +4456,33 @@ mod tests {
fn test_cast_timestamp_to_date32() {
let array =
TimestampMillisecondArray::from(vec![Some(864000000005),
Some(1545696000001), None])
- .with_timezone("UTC".to_string());
+ .with_timezone("+00:00".to_string());
let b = cast(&array, &DataType::Date32).unwrap();
let c = b.as_primitive::<Date32Type>();
assert_eq!(10000, c.value(0));
assert_eq!(17890, c.value(1));
assert!(c.is_null(2));
}
-
+ #[test]
+ fn test_cast_timestamp_to_date32_zone() {
+ let strings = StringArray::from_iter([
+ Some("1970-01-01T00:00:01"),
+ Some("1970-01-01T23:59:59"),
+ None,
+ Some("2020-03-01T02:00:23+00:00"),
+ ]);
+ let dt = DataType::Timestamp(TimeUnit::Millisecond,
Some("-07:00".into()));
+ let timestamps = cast(&strings, &dt).unwrap();
+ let dates = cast(timestamps.as_ref(), &DataType::Date32).unwrap();
+
+ let c = dates.as_primitive::<Date32Type>();
+ let expected = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
+ assert_eq!(c.value_as_date(0).unwrap(), expected);
+ assert_eq!(c.value_as_date(1).unwrap(), expected);
+ assert!(c.is_null(2));
+ let expected = NaiveDate::from_ymd_opt(2020, 2, 29).unwrap();
+ assert_eq!(c.value_as_date(3).unwrap(), expected);
+ }
#[test]
fn test_cast_timestamp_to_date64() {
let array =
@@ -8381,7 +8422,7 @@ mod tests {
.map(|ts| ts / 1_000_000)
.collect::<Vec<_>>();
- let array =
TimestampMillisecondArray::from(ts_array).with_timezone("UTC".to_string());
+ let array =
TimestampMillisecondArray::from(ts_array).with_timezone("+00:00".to_string());
let casted_array = cast(&array, &DataType::Date32).unwrap();
let date_array = casted_array.as_primitive::<Date32Type>();
let casted_array = cast(&date_array, &DataType::Utf8).unwrap();
@@ -8473,7 +8514,9 @@ mod tests {
for dt in data_types {
assert_eq!(
- cast_with_options(&array, &dt,
&cast_options).unwrap_err().to_string(),
+ cast_with_options(&array, &dt, &cast_options)
+ .unwrap_err()
+ .to_string(),
"Parser error: Invalid timezone \"ZZTOP\": only offset based
timezones supported without chrono-tz feature"
);
}