This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 12ad8d71a Cast timestamp array to string array with timezone (#2608)
12ad8d71a is described below

commit 12ad8d71ace0624812215766e8e6b71272fa4598
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Sep 1 00:17:04 2022 -0700

    Cast timestamp array to string array with timezone (#2608)
    
    * Cast timestamp array to string array with timezone.
    
    * Add comments
---
 arrow/src/compute/kernels/cast.rs     | 107 +++++++++++++++++++++++++++-------
 arrow/src/compute/kernels/temporal.rs |  80 ++++++++++++++++---------
 2 files changed, 138 insertions(+), 49 deletions(-)

diff --git a/arrow/src/compute/kernels/cast.rs 
b/arrow/src/compute/kernels/cast.rs
index 3df0c861c..6b4f22470 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -35,6 +35,8 @@
 //! assert_eq!(7.0, c.value(2));
 //! ```
 
+use chrono::format::strftime::StrftimeItems;
+use chrono::format::{parse, Parsed};
 use chrono::Timelike;
 use std::ops::{Div, Mul};
 use std::str;
@@ -45,6 +47,9 @@ use crate::compute::divide_scalar;
 use crate::compute::kernels::arithmetic::{divide, multiply};
 use crate::compute::kernels::arity::unary;
 use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
+use crate::compute::kernels::temporal::extract_component_from_array;
+use crate::compute::kernels::temporal::return_compute_error_with;
+use crate::compute::using_chrono_tz_and_utc_naive_date_time;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 use crate::temporal_conversions::{
@@ -728,18 +733,18 @@ pub fn cast_with_options(
             Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
             Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
             Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
-            Timestamp(unit, _) => match unit {
+            Timestamp(unit, tz) => match unit {
                 TimeUnit::Nanosecond => {
-                    cast_timestamp_to_string::<TimestampNanosecondType, 
i32>(array)
+                    cast_timestamp_to_string::<TimestampNanosecondType, 
i32>(array, tz)
                 }
                 TimeUnit::Microsecond => {
-                    cast_timestamp_to_string::<TimestampMicrosecondType, 
i32>(array)
+                    cast_timestamp_to_string::<TimestampMicrosecondType, 
i32>(array, tz)
                 }
                 TimeUnit::Millisecond => {
-                    cast_timestamp_to_string::<TimestampMillisecondType, 
i32>(array)
+                    cast_timestamp_to_string::<TimestampMillisecondType, 
i32>(array, tz)
                 }
                 TimeUnit::Second => {
-                    cast_timestamp_to_string::<TimestampSecondType, i32>(array)
+                    cast_timestamp_to_string::<TimestampSecondType, 
i32>(array, tz)
                 }
             },
             Date32 => cast_date32_to_string::<i32>(array),
@@ -784,18 +789,18 @@ pub fn cast_with_options(
             Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
             Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
             Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
-            Timestamp(unit, _) => match unit {
+            Timestamp(unit, tz) => match unit {
                 TimeUnit::Nanosecond => {
-                    cast_timestamp_to_string::<TimestampNanosecondType, 
i64>(array)
+                    cast_timestamp_to_string::<TimestampNanosecondType, 
i64>(array, tz)
                 }
                 TimeUnit::Microsecond => {
-                    cast_timestamp_to_string::<TimestampMicrosecondType, 
i64>(array)
+                    cast_timestamp_to_string::<TimestampMicrosecondType, 
i64>(array, tz)
                 }
                 TimeUnit::Millisecond => {
-                    cast_timestamp_to_string::<TimestampMillisecondType, 
i64>(array)
+                    cast_timestamp_to_string::<TimestampMillisecondType, 
i64>(array, tz)
                 }
                 TimeUnit::Second => {
-                    cast_timestamp_to_string::<TimestampSecondType, i64>(array)
+                    cast_timestamp_to_string::<TimestampSecondType, 
i64>(array, tz)
                 }
             },
             Date32 => cast_date32_to_string::<i64>(array),
@@ -1482,7 +1487,10 @@ where
 }
 
 /// Cast timestamp types to Utf8/LargeUtf8
-fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) -> 
Result<ArrayRef>
+fn cast_timestamp_to_string<T, OffsetSize>(
+    array: &ArrayRef,
+    tz: &Option<String>,
+) -> Result<ArrayRef>
 where
     T: ArrowTemporalType + ArrowNumericType,
     i64: From<<T as ArrowPrimitiveType>::Native>,
@@ -1490,17 +1498,28 @@ where
 {
     let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
 
-    Ok(Arc::new(
-        (0..array.len())
-            .map(|ix| {
-                if array.is_null(ix) {
-                    None
-                } else {
-                    array.value_as_datetime(ix).map(|v| v.to_string())
-                }
-            })
-            .collect::<GenericStringArray<OffsetSize>>(),
-    ))
+    let mut builder = GenericStringBuilder::<OffsetSize>::new();
+
+    if let Some(tz) = tz {
+        let mut scratch = Parsed::new();
+        // The macro calls `value_as_datetime_with_tz` on timestamp values of 
the array.
+        // After applying timezone offset on the datatime, calling `to_string` 
to get
+        // the strings.
+        extract_component_from_array!(
+            array,
+            builder,
+            to_string,
+            value_as_datetime_with_tz,
+            tz,
+            scratch,
+            |h| h
+        )
+    } else {
+        // No timezone available. Calling `to_string` on the datatime value 
simply.
+        extract_component_from_array!(array, builder, to_string, 
value_as_datetime, |h| h)
+    }
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 /// Cast date32 types to Utf8/LargeUtf8
@@ -3602,6 +3621,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(feature = "chrono-tz")]
     fn test_cast_timestamp_to_string() {
         let a = TimestampMillisecondArray::from_opt_vec(
             vec![Some(864000000005), Some(1545696000001), None],
@@ -5127,6 +5147,7 @@ mod tests {
 
     #[test]
     #[cfg_attr(miri, ignore)] // running forever
+    #[cfg(feature = "chrono-tz")]
     fn test_can_cast_types() {
         // this function attempts to ensure that can_cast_types stays
         // in sync with cast.  It simply tries all combinations of
@@ -5194,6 +5215,7 @@ mod tests {
     }
 
     /// Create instances of arrays with varying types for cast tests
+    #[cfg(feature = "chrono-tz")]
     fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         let tz_name = String::from("America/New_York");
         let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
@@ -5334,6 +5356,7 @@ mod tests {
         LargeListArray::from(list_data)
     }
 
+    #[cfg(feature = "chrono-tz")]
     fn make_fixed_size_list_array() -> FixedSizeListArray {
         // Construct a value array
         let value_data = ArrayData::builder(DataType::Int32)
@@ -5355,6 +5378,7 @@ mod tests {
         FixedSizeListArray::from(list_data)
     }
 
+    #[cfg(feature = "chrono-tz")]
     fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
         let values: [u8; 15] = *b"hellotherearrow";
 
@@ -5366,6 +5390,7 @@ mod tests {
         FixedSizeBinaryArray::from(array_data)
     }
 
+    #[cfg(feature = "chrono-tz")]
     fn make_union_array() -> UnionArray {
         let mut builder = UnionBuilder::with_capacity_dense(7);
         builder.append::<Int32Type>("a", 1).unwrap();
@@ -5374,6 +5399,7 @@ mod tests {
     }
 
     /// Creates a dictionary with primitive dictionary values, and keys of 
type K
+    #[cfg(feature = "chrono-tz")]
     fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
         let keys_builder = PrimitiveBuilder::<K>::new();
         // Pick Int32 arbitrarily for dictionary values
@@ -5385,6 +5411,7 @@ mod tests {
     }
 
     /// Creates a dictionary with utf8 values, and keys of type K
+    #[cfg(feature = "chrono-tz")]
     fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
         let keys_builder = PrimitiveBuilder::<K>::new();
         // Pick Int32 arbitrarily for dictionary values
@@ -5396,6 +5423,7 @@ mod tests {
     }
 
     // Get a selection of datatypes to try and cast to
+    #[cfg(feature = "chrono-tz")]
     fn get_all_types() -> Vec<DataType> {
         use DataType::*;
         let tz_name = String::from("America/New_York");
@@ -5490,4 +5518,39 @@ mod tests {
 
         assert_eq!(&out1, &out2.slice(1, 2))
     }
+
+    #[test]
+    #[cfg(feature = "chrono-tz")]
+    fn test_timestamp_cast_utf8() {
+        let array: PrimitiveArray<TimestampMicrosecondType> =
+            vec![Some(37800000000), None, Some(86339000000)].into();
+        let out = cast(&(Arc::new(array) as ArrayRef), 
&DataType::Utf8).unwrap();
+
+        let expected = StringArray::from(vec![
+            Some("1970-01-01 10:30:00"),
+            None,
+            Some("1970-01-01 23:58:59"),
+        ]);
+
+        assert_eq!(
+            out.as_any().downcast_ref::<StringArray>().unwrap(),
+            &expected
+        );
+
+        let array: PrimitiveArray<TimestampMicrosecondType> =
+            vec![Some(37800000000), None, Some(86339000000)].into();
+        let array = array.with_timezone("Australia/Sydney".to_string());
+        let out = cast(&(Arc::new(array) as ArrayRef), 
&DataType::Utf8).unwrap();
+
+        let expected = StringArray::from(vec![
+            Some("1970-01-01 20:30:00"),
+            None,
+            Some("1970-01-02 09:58:59"),
+        ]);
+
+        assert_eq!(
+            out.as_any().downcast_ref::<StringArray>().unwrap(),
+            &expected
+        );
+    }
 }
diff --git a/arrow/src/compute/kernels/temporal.rs 
b/arrow/src/compute/kernels/temporal.rs
index b24a6333f..1bec1d84f 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -28,33 +28,33 @@ use chrono::format::{parse, Parsed};
 use chrono::FixedOffset;
 
 macro_rules! extract_component_from_array {
-    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => {
+    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, 
$convert:expr) => {
         for i in 0..$array.len() {
             if $array.is_null(i) {
                 $builder.append_null();
             } else {
                 match $array.$using(i) {
-                    Some(dt) => $builder.append_value(dt.$extract_fn() as i32),
+                    Some(dt) => 
$builder.append_value($convert(dt.$extract_fn())),
                     None => $builder.append_null(),
                 }
             }
         }
     };
-    ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, 
$using:ident) => {
+    ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident, 
$using:ident, $convert:expr) => {
         for i in 0..$array.len() {
             if $array.is_null(i) {
                 $builder.append_null();
             } else {
                 match $array.$using(i) {
                     Some(dt) => {
-                        $builder.append_value(dt.$extract_fn1().$extract_fn2() 
as i32);
+                        
$builder.append_value($convert(dt.$extract_fn1().$extract_fn2()));
                     }
                     None => $builder.append_null(),
                 }
             }
         }
     };
-    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, 
$parsed:ident) => {
+    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, 
$parsed:ident, $convert:expr) => {
         if ($tz.starts_with('+') || $tz.starts_with('-')) && 
!$tz.contains(':') {
             return_compute_error_with!(
                 "Invalid timezone",
@@ -90,7 +90,7 @@ macro_rules! extract_component_from_array {
                             };
                             match $array.$using(i, fixed_offset) {
                                 Some(dt) => {
-                                    $builder.append_value(dt.$extract_fn() as 
i32);
+                                    
$builder.append_value($convert(dt.$extract_fn()));
                                 }
                                 None => $builder.append_null(),
                             }
@@ -112,6 +112,9 @@ macro_rules! return_compute_error_with {
     };
 }
 
+pub(crate) use extract_component_from_array;
+pub(crate) use return_compute_error_with;
+
 // Internal trait, which is used for mapping values from DateLike structures
 trait ChronoDateExt {
     /// Returns a value in range `1..=4` indicating the quarter this date 
falls into
@@ -177,10 +180,10 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Time32(_) | &DataType::Time64(_) => {
-            extract_component_from_array!(array, b, hour, value_as_time)
+            extract_component_from_array!(array, b, hour, value_as_time, |h| h 
as i32)
         }
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, hour, value_as_datetime)
+            extract_component_from_array!(array, b, hour, value_as_datetime, 
|h| h as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -190,7 +193,8 @@ where
                 hour,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("hour does not support", dt),
@@ -208,7 +212,7 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            extract_component_from_array!(array, b, year, value_as_datetime)
+            extract_component_from_array!(array, b, year, value_as_datetime, 
|h| h as i32)
         }
         dt => return_compute_error_with!("year does not support", dt),
     }
@@ -225,7 +229,8 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, quarter, value_as_datetime)
+            extract_component_from_array!(array, b, quarter, 
value_as_datetime, |h| h
+                as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -235,7 +240,8 @@ where
                 quarter,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("quarter does not support", dt),
@@ -253,7 +259,8 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, month, value_as_datetime)
+            extract_component_from_array!(array, b, month, value_as_datetime, 
|h| h
+                as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -263,7 +270,8 @@ where
                 month,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("month does not support", dt),
@@ -290,7 +298,8 @@ where
                 array,
                 b,
                 num_days_from_monday,
-                value_as_datetime
+                value_as_datetime,
+                |h| h as i32
             )
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
@@ -301,7 +310,8 @@ where
                 num_days_from_monday,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("weekday does not support", dt),
@@ -328,7 +338,8 @@ where
                 array,
                 b,
                 num_days_from_sunday,
-                value_as_datetime
+                value_as_datetime,
+                |h| h as i32
             )
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
@@ -339,7 +350,8 @@ where
                 num_days_from_sunday,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("num_days_from_sunday does not 
support", dt),
@@ -357,7 +369,7 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, day, value_as_datetime)
+            extract_component_from_array!(array, b, day, value_as_datetime, 
|h| h as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -367,7 +379,8 @@ where
                 day,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("day does not support", dt),
@@ -386,7 +399,8 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, ordinal, value_as_datetime)
+            extract_component_from_array!(array, b, ordinal, 
value_as_datetime, |h| h
+                as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -396,7 +410,8 @@ where
                 ordinal,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("doy does not support", dt),
@@ -414,7 +429,8 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date64 | &DataType::Timestamp(_, None) => {
-            extract_component_from_array!(array, b, minute, value_as_datetime)
+            extract_component_from_array!(array, b, minute, value_as_datetime, 
|h| h
+                as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -424,7 +440,8 @@ where
                 minute,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("minute does not support", dt),
@@ -443,7 +460,14 @@ where
 
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
-            extract_component_from_array!(array, b, iso_week, week, 
value_as_datetime)
+            extract_component_from_array!(
+                array,
+                b,
+                iso_week,
+                week,
+                value_as_datetime,
+                |h| h as i32
+            )
         }
         dt => return_compute_error_with!("week does not support", dt),
     }
@@ -460,7 +484,8 @@ where
     let mut b = Int32Builder::with_capacity(array.len());
     match array.data_type() {
         &DataType::Date64 | &DataType::Timestamp(_, None) => {
-            extract_component_from_array!(array, b, second, value_as_datetime)
+            extract_component_from_array!(array, b, second, value_as_datetime, 
|h| h
+                as i32)
         }
         &DataType::Timestamp(_, Some(ref tz)) => {
             let mut scratch = Parsed::new();
@@ -470,7 +495,8 @@ where
                 second,
                 value_as_datetime_with_tz,
                 tz,
-                scratch
+                scratch,
+                |h| h as i32
             )
         }
         dt => return_compute_error_with!("second does not support", dt),

Reply via email to