This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 12ad8d71a Cast timestamp array to string array with timezone (#2608)
12ad8d71a is described below
commit 12ad8d71ace0624812215766e8e6b71272fa4598
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Thu Sep 1 00:17:04 2022 -0700
Cast timestamp array to string array with timezone (#2608)
* Cast timestamp array to string array with timezone.
* Add comments
---
arrow/src/compute/kernels/cast.rs | 107 +++++++++++++++++++++++++++-------
arrow/src/compute/kernels/temporal.rs | 80 ++++++++++++++++---------
2 files changed, 138 insertions(+), 49 deletions(-)
diff --git a/arrow/src/compute/kernels/cast.rs
b/arrow/src/compute/kernels/cast.rs
index 3df0c861c..6b4f22470 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -35,6 +35,8 @@
//! assert_eq!(7.0, c.value(2));
//! ```
+use chrono::format::strftime::StrftimeItems;
+use chrono::format::{parse, Parsed};
use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
@@ -45,6 +47,9 @@ use crate::compute::divide_scalar;
use crate::compute::kernels::arithmetic::{divide, multiply};
use crate::compute::kernels::arity::unary;
use crate::compute::kernels::cast_utils::string_to_timestamp_nanos;
+use crate::compute::kernels::temporal::extract_component_from_array;
+use crate::compute::kernels::temporal::return_compute_error_with;
+use crate::compute::using_chrono_tz_and_utc_naive_date_time;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::{
@@ -728,18 +733,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
- Timestamp(unit, _) => match unit {
+ Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
- cast_timestamp_to_string::<TimestampNanosecondType,
i32>(array)
+ cast_timestamp_to_string::<TimestampNanosecondType,
i32>(array, tz)
}
TimeUnit::Microsecond => {
- cast_timestamp_to_string::<TimestampMicrosecondType,
i32>(array)
+ cast_timestamp_to_string::<TimestampMicrosecondType,
i32>(array, tz)
}
TimeUnit::Millisecond => {
- cast_timestamp_to_string::<TimestampMillisecondType,
i32>(array)
+ cast_timestamp_to_string::<TimestampMillisecondType,
i32>(array, tz)
}
TimeUnit::Second => {
- cast_timestamp_to_string::<TimestampSecondType, i32>(array)
+ cast_timestamp_to_string::<TimestampSecondType,
i32>(array, tz)
}
},
Date32 => cast_date32_to_string::<i32>(array),
@@ -784,18 +789,18 @@ pub fn cast_with_options(
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
- Timestamp(unit, _) => match unit {
+ Timestamp(unit, tz) => match unit {
TimeUnit::Nanosecond => {
- cast_timestamp_to_string::<TimestampNanosecondType,
i64>(array)
+ cast_timestamp_to_string::<TimestampNanosecondType,
i64>(array, tz)
}
TimeUnit::Microsecond => {
- cast_timestamp_to_string::<TimestampMicrosecondType,
i64>(array)
+ cast_timestamp_to_string::<TimestampMicrosecondType,
i64>(array, tz)
}
TimeUnit::Millisecond => {
- cast_timestamp_to_string::<TimestampMillisecondType,
i64>(array)
+ cast_timestamp_to_string::<TimestampMillisecondType,
i64>(array, tz)
}
TimeUnit::Second => {
- cast_timestamp_to_string::<TimestampSecondType, i64>(array)
+ cast_timestamp_to_string::<TimestampSecondType,
i64>(array, tz)
}
},
Date32 => cast_date32_to_string::<i64>(array),
@@ -1482,7 +1487,10 @@ where
}
/// Cast timestamp types to Utf8/LargeUtf8
-fn cast_timestamp_to_string<T, OffsetSize>(array: &ArrayRef) ->
Result<ArrayRef>
+fn cast_timestamp_to_string<T, OffsetSize>(
+ array: &ArrayRef,
+ tz: &Option<String>,
+) -> Result<ArrayRef>
where
T: ArrowTemporalType + ArrowNumericType,
i64: From<<T as ArrowPrimitiveType>::Native>,
@@ -1490,17 +1498,28 @@ where
{
let array = array.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
- Ok(Arc::new(
- (0..array.len())
- .map(|ix| {
- if array.is_null(ix) {
- None
- } else {
- array.value_as_datetime(ix).map(|v| v.to_string())
- }
- })
- .collect::<GenericStringArray<OffsetSize>>(),
- ))
+ let mut builder = GenericStringBuilder::<OffsetSize>::new();
+
+ if let Some(tz) = tz {
+ let mut scratch = Parsed::new();
+ // The macro calls `value_as_datetime_with_tz` on timestamp values of
the array.
+ // After applying timezone offset on the datatime, calling `to_string`
to get
+ // the strings.
+ extract_component_from_array!(
+ array,
+ builder,
+ to_string,
+ value_as_datetime_with_tz,
+ tz,
+ scratch,
+ |h| h
+ )
+ } else {
+ // No timezone available. Calling `to_string` on the datatime value
simply.
+ extract_component_from_array!(array, builder, to_string,
value_as_datetime, |h| h)
+ }
+
+ Ok(Arc::new(builder.finish()) as ArrayRef)
}
/// Cast date32 types to Utf8/LargeUtf8
@@ -3602,6 +3621,7 @@ mod tests {
}
#[test]
+ #[cfg(feature = "chrono-tz")]
fn test_cast_timestamp_to_string() {
let a = TimestampMillisecondArray::from_opt_vec(
vec![Some(864000000005), Some(1545696000001), None],
@@ -5127,6 +5147,7 @@ mod tests {
#[test]
#[cfg_attr(miri, ignore)] // running forever
+ #[cfg(feature = "chrono-tz")]
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
@@ -5194,6 +5215,7 @@ mod tests {
}
/// Create instances of arrays with varying types for cast tests
+ #[cfg(feature = "chrono-tz")]
fn get_arrays_of_all_types() -> Vec<ArrayRef> {
let tz_name = String::from("America/New_York");
let binary_data: Vec<&[u8]> = vec![b"foo", b"bar"];
@@ -5334,6 +5356,7 @@ mod tests {
LargeListArray::from(list_data)
}
+ #[cfg(feature = "chrono-tz")]
fn make_fixed_size_list_array() -> FixedSizeListArray {
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
@@ -5355,6 +5378,7 @@ mod tests {
FixedSizeListArray::from(list_data)
}
+ #[cfg(feature = "chrono-tz")]
fn make_fixed_size_binary_array() -> FixedSizeBinaryArray {
let values: [u8; 15] = *b"hellotherearrow";
@@ -5366,6 +5390,7 @@ mod tests {
FixedSizeBinaryArray::from(array_data)
}
+ #[cfg(feature = "chrono-tz")]
fn make_union_array() -> UnionArray {
let mut builder = UnionBuilder::with_capacity_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
@@ -5374,6 +5399,7 @@ mod tests {
}
/// Creates a dictionary with primitive dictionary values, and keys of
type K
+ #[cfg(feature = "chrono-tz")]
fn make_dictionary_primitive<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
@@ -5385,6 +5411,7 @@ mod tests {
}
/// Creates a dictionary with utf8 values, and keys of type K
+ #[cfg(feature = "chrono-tz")]
fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
@@ -5396,6 +5423,7 @@ mod tests {
}
// Get a selection of datatypes to try and cast to
+ #[cfg(feature = "chrono-tz")]
fn get_all_types() -> Vec<DataType> {
use DataType::*;
let tz_name = String::from("America/New_York");
@@ -5490,4 +5518,39 @@ mod tests {
assert_eq!(&out1, &out2.slice(1, 2))
}
+
+ #[test]
+ #[cfg(feature = "chrono-tz")]
+ fn test_timestamp_cast_utf8() {
+ let array: PrimitiveArray<TimestampMicrosecondType> =
+ vec![Some(37800000000), None, Some(86339000000)].into();
+ let out = cast(&(Arc::new(array) as ArrayRef),
&DataType::Utf8).unwrap();
+
+ let expected = StringArray::from(vec![
+ Some("1970-01-01 10:30:00"),
+ None,
+ Some("1970-01-01 23:58:59"),
+ ]);
+
+ assert_eq!(
+ out.as_any().downcast_ref::<StringArray>().unwrap(),
+ &expected
+ );
+
+ let array: PrimitiveArray<TimestampMicrosecondType> =
+ vec![Some(37800000000), None, Some(86339000000)].into();
+ let array = array.with_timezone("Australia/Sydney".to_string());
+ let out = cast(&(Arc::new(array) as ArrayRef),
&DataType::Utf8).unwrap();
+
+ let expected = StringArray::from(vec![
+ Some("1970-01-01 20:30:00"),
+ None,
+ Some("1970-01-02 09:58:59"),
+ ]);
+
+ assert_eq!(
+ out.as_any().downcast_ref::<StringArray>().unwrap(),
+ &expected
+ );
+ }
}
diff --git a/arrow/src/compute/kernels/temporal.rs
b/arrow/src/compute/kernels/temporal.rs
index b24a6333f..1bec1d84f 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -28,33 +28,33 @@ use chrono::format::{parse, Parsed};
use chrono::FixedOffset;
macro_rules! extract_component_from_array {
- ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => {
+ ($array:ident, $builder:ident, $extract_fn:ident, $using:ident,
$convert:expr) => {
for i in 0..$array.len() {
if $array.is_null(i) {
$builder.append_null();
} else {
match $array.$using(i) {
- Some(dt) => $builder.append_value(dt.$extract_fn() as i32),
+ Some(dt) =>
$builder.append_value($convert(dt.$extract_fn())),
None => $builder.append_null(),
}
}
}
};
- ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident,
$using:ident) => {
+ ($array:ident, $builder:ident, $extract_fn1:ident, $extract_fn2:ident,
$using:ident, $convert:expr) => {
for i in 0..$array.len() {
if $array.is_null(i) {
$builder.append_null();
} else {
match $array.$using(i) {
Some(dt) => {
- $builder.append_value(dt.$extract_fn1().$extract_fn2()
as i32);
+
$builder.append_value($convert(dt.$extract_fn1().$extract_fn2()));
}
None => $builder.append_null(),
}
}
}
};
- ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident,
$parsed:ident) => {
+ ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident,
$parsed:ident, $convert:expr) => {
if ($tz.starts_with('+') || $tz.starts_with('-')) &&
!$tz.contains(':') {
return_compute_error_with!(
"Invalid timezone",
@@ -90,7 +90,7 @@ macro_rules! extract_component_from_array {
};
match $array.$using(i, fixed_offset) {
Some(dt) => {
- $builder.append_value(dt.$extract_fn() as
i32);
+
$builder.append_value($convert(dt.$extract_fn()));
}
None => $builder.append_null(),
}
@@ -112,6 +112,9 @@ macro_rules! return_compute_error_with {
};
}
+pub(crate) use extract_component_from_array;
+pub(crate) use return_compute_error_with;
+
// Internal trait, which is used for mapping values from DateLike structures
trait ChronoDateExt {
/// Returns a value in range `1..=4` indicating the quarter this date
falls into
@@ -177,10 +180,10 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Time32(_) | &DataType::Time64(_) => {
- extract_component_from_array!(array, b, hour, value_as_time)
+ extract_component_from_array!(array, b, hour, value_as_time, |h| h
as i32)
}
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, hour, value_as_datetime)
+ extract_component_from_array!(array, b, hour, value_as_datetime,
|h| h as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -190,7 +193,8 @@ where
hour,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("hour does not support", dt),
@@ -208,7 +212,7 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
- extract_component_from_array!(array, b, year, value_as_datetime)
+ extract_component_from_array!(array, b, year, value_as_datetime,
|h| h as i32)
}
dt => return_compute_error_with!("year does not support", dt),
}
@@ -225,7 +229,8 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, quarter, value_as_datetime)
+ extract_component_from_array!(array, b, quarter,
value_as_datetime, |h| h
+ as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -235,7 +240,8 @@ where
quarter,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("quarter does not support", dt),
@@ -253,7 +259,8 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, month, value_as_datetime)
+ extract_component_from_array!(array, b, month, value_as_datetime,
|h| h
+ as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -263,7 +270,8 @@ where
month,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("month does not support", dt),
@@ -290,7 +298,8 @@ where
array,
b,
num_days_from_monday,
- value_as_datetime
+ value_as_datetime,
+ |h| h as i32
)
}
&DataType::Timestamp(_, Some(ref tz)) => {
@@ -301,7 +310,8 @@ where
num_days_from_monday,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("weekday does not support", dt),
@@ -328,7 +338,8 @@ where
array,
b,
num_days_from_sunday,
- value_as_datetime
+ value_as_datetime,
+ |h| h as i32
)
}
&DataType::Timestamp(_, Some(ref tz)) => {
@@ -339,7 +350,8 @@ where
num_days_from_sunday,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("num_days_from_sunday does not
support", dt),
@@ -357,7 +369,7 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, day, value_as_datetime)
+ extract_component_from_array!(array, b, day, value_as_datetime,
|h| h as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -367,7 +379,8 @@ where
day,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("day does not support", dt),
@@ -386,7 +399,8 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, ordinal, value_as_datetime)
+ extract_component_from_array!(array, b, ordinal,
value_as_datetime, |h| h
+ as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -396,7 +410,8 @@ where
ordinal,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("doy does not support", dt),
@@ -414,7 +429,8 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date64 | &DataType::Timestamp(_, None) => {
- extract_component_from_array!(array, b, minute, value_as_datetime)
+ extract_component_from_array!(array, b, minute, value_as_datetime,
|h| h
+ as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -424,7 +440,8 @@ where
minute,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("minute does not support", dt),
@@ -443,7 +460,14 @@ where
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
- extract_component_from_array!(array, b, iso_week, week,
value_as_datetime)
+ extract_component_from_array!(
+ array,
+ b,
+ iso_week,
+ week,
+ value_as_datetime,
+ |h| h as i32
+ )
}
dt => return_compute_error_with!("week does not support", dt),
}
@@ -460,7 +484,8 @@ where
let mut b = Int32Builder::with_capacity(array.len());
match array.data_type() {
&DataType::Date64 | &DataType::Timestamp(_, None) => {
- extract_component_from_array!(array, b, second, value_as_datetime)
+ extract_component_from_array!(array, b, second, value_as_datetime,
|h| h
+ as i32)
}
&DataType::Timestamp(_, Some(ref tz)) => {
let mut scratch = Parsed::new();
@@ -470,7 +495,8 @@ where
second,
value_as_datetime_with_tz,
tz,
- scratch
+ scratch,
+ |h| h as i32
)
}
dt => return_compute_error_with!("second does not support", dt),