This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new ed5b398 handle tz while extractiing second/minute/hour from temporal
arrays (#771)
ed5b398 is described below
commit ed5b398a0c4574dec96e608659161c37aef18bb4
Author: Sumit <[email protected]>
AuthorDate: Mon Oct 11 22:16:28 2021 +0200
handle tz while extractiing second/minute/hour from temporal arrays (#771)
The patch rewrites the behaviour using macros to indicate the
repetitive nutate of operations
---
arrow/src/array/array_primitive.rs | 12 ++
arrow/src/compute/kernels/temporal.rs | 261 +++++++++++++++++++++++-----------
2 files changed, 191 insertions(+), 82 deletions(-)
diff --git a/arrow/src/array/array_primitive.rs
b/arrow/src/array/array_primitive.rs
index 5777a03..0878fb4 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -245,6 +245,18 @@ where
as_datetime::<T>(i64::from(self.value(i)))
}
+ /// Returns value as a chrono `NaiveDateTime`, handling time resolution
with the provided tz
+ ///
+ /// functionally it is same as `value_as_datetime`, however it adds
+ /// the passed tz to the to-be-returned NaiveDateTime
+ pub fn value_as_datetime_with_tz(
+ &self,
+ i: usize,
+ tz: FixedOffset,
+ ) -> Option<NaiveDateTime> {
+ as_datetime::<T>(i64::from(self.value(i))).map(|datetime| datetime +
tz)
+ }
+
/// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
///
/// If a data type cannot be converted to `NaiveDate`, a `None` is returned
diff --git a/arrow/src/compute/kernels/temporal.rs
b/arrow/src/compute/kernels/temporal.rs
index 5db0dfb..f461b51 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -22,6 +22,60 @@ use chrono::{Datelike, Timelike};
use crate::array::*;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
+
+use chrono::format::strftime::StrftimeItems;
+use chrono::format::{parse, Parsed};
+
+macro_rules! extract_component_from_array {
+ ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => {
+ for i in 0..$array.len() {
+ if $array.is_null(i) {
+ $builder.append_null()?;
+ } else {
+ match $array.$using(i) {
+ Some(dt) => $builder.append_value(dt.$extract_fn() as
i32)?,
+ None => $builder.append_null()?,
+ }
+ }
+ }
+ };
+ ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident,
$parsed:ident) => {
+ if ($tz.starts_with('+') || $tz.starts_with('-')) &&
!$tz.contains(':') {
+ return_compute_error_with!(
+ "Invalid timezone",
+ "Expected format [+-]XX:XX".to_string()
+ )
+ } else {
+ match parse(&mut $parsed, $tz, StrftimeItems::new("%z")) {
+ Ok(_) => match $parsed.to_fixed_offset() {
+ Ok(fixed_offset) => {
+ for i in 0..$array.len() {
+ if $array.is_null(i) {
+ $builder.append_null()?;
+ } else {
+ match $array.$using(i, fixed_offset) {
+ Some(dt) => {
+ $builder.append_value(dt.$extract_fn()
as i32)?
+ }
+ None => $builder.append_null()?,
+ }
+ }
+ }
+ }
+ err => return_compute_error_with!("Invalid timezone", err),
+ },
+ err => return_compute_error_with!("Unable to parse timezone",
err),
+ }
+ }
+ };
+}
+
+macro_rules! return_compute_error_with {
+ ($msg:expr, $param:expr) => {
+ return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg,
$param))) }
+ };
+}
+
/// Extracts the hours of a given temporal array as an array of integers
pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
where
@@ -31,37 +85,23 @@ where
let mut b = Int32Builder::new(array.len());
match array.data_type() {
&DataType::Time32(_) | &DataType::Time64(_) => {
- for i in 0..array.len() {
- if array.is_null(i) {
- b.append_null()?;
- } else {
- match array.value_as_time(i) {
- Some(time) => b.append_value(time.hour() as i32)?,
- None => b.append_null()?,
- };
- }
- }
+ extract_component_from_array!(array, b, hour, value_as_time)
}
- &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
- for i in 0..array.len() {
- if array.is_null(i) {
- b.append_null()?;
- } else {
- match array.value_as_datetime(i) {
- Some(dt) => b.append_value(dt.hour() as i32)?,
- None => b.append_null()?,
- }
- }
- }
+ &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None)
=> {
+ extract_component_from_array!(array, b, hour, value_as_datetime)
}
- dt => {
- return {
- Err(ArrowError::ComputeError(format!(
- "hour does not support type {:?}",
- dt
- )))
- }
+ &DataType::Timestamp(_, Some(ref tz)) => {
+ let mut scratch = Parsed::new();
+ extract_component_from_array!(
+ array,
+ b,
+ hour,
+ value_as_datetime_with_tz,
+ tz,
+ scratch
+ )
}
+ dt => return_compute_error_with!("hour does not support", dt),
}
Ok(b.finish())
@@ -76,25 +116,9 @@ where
let mut b = Int32Builder::new(array.len());
match array.data_type() {
&DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
- for i in 0..array.len() {
- if array.is_null(i) {
- b.append_null()?;
- } else {
- match array.value_as_datetime(i) {
- Some(dt) => b.append_value(dt.year() as i32)?,
- None => b.append_null()?,
- }
- }
- }
- }
- dt => {
- return {
- Err(ArrowError::ComputeError(format!(
- "year does not support type {:?}",
- dt
- )))
- }
+ extract_component_from_array!(array, b, year, value_as_datetime)
}
+ dt => return_compute_error_with!("year does not support", dt),
}
Ok(b.finish())
@@ -108,26 +132,21 @@ where
{
let mut b = Int32Builder::new(array.len());
match array.data_type() {
- &DataType::Date64 | &DataType::Timestamp(_, _) => {
- for i in 0..array.len() {
- if array.is_null(i) {
- b.append_null()?;
- } else {
- match array.value_as_datetime(i) {
- Some(dt) => b.append_value(dt.minute() as i32)?,
- None => b.append_null()?,
- }
- }
- }
+ &DataType::Date64 | &DataType::Timestamp(_, None) => {
+ extract_component_from_array!(array, b, minute, value_as_datetime)
}
- dt => {
- return {
- Err(ArrowError::ComputeError(format!(
- "minute does not support type {:?}",
- dt
- )))
- }
+ &DataType::Timestamp(_, Some(ref tz)) => {
+ let mut scratch = Parsed::new();
+ extract_component_from_array!(
+ array,
+ b,
+ minute,
+ value_as_datetime_with_tz,
+ tz,
+ scratch
+ )
}
+ dt => return_compute_error_with!("minute does not support", dt),
}
Ok(b.finish())
@@ -141,26 +160,21 @@ where
{
let mut b = Int32Builder::new(array.len());
match array.data_type() {
- &DataType::Date64 | &DataType::Timestamp(_, _) => {
- for i in 0..array.len() {
- if array.is_null(i) {
- b.append_null()?;
- } else {
- match array.value_as_datetime(i) {
- Some(dt) => b.append_value(dt.second() as i32)?,
- None => b.append_null()?,
- }
- }
- }
+ &DataType::Date64 | &DataType::Timestamp(_, None) => {
+ extract_component_from_array!(array, b, second, value_as_datetime)
}
- dt => {
- return {
- Err(ArrowError::ComputeError(format!(
- "second does not support type {:?}",
- dt
- )))
- }
+ &DataType::Timestamp(_, Some(ref tz)) => {
+ let mut scratch = Parsed::new();
+ extract_component_from_array!(
+ array,
+ b,
+ second,
+ value_as_datetime_with_tz,
+ tz,
+ scratch
+ )
}
+ dt => return_compute_error_with!("second does not support", dt),
}
Ok(b.finish())
@@ -294,4 +308,87 @@ mod tests {
assert!(!b.is_valid(1));
assert_eq!(7, b.value(2));
}
+
+ #[test]
+ fn test_temporal_array_timestamp_second_with_timezone() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![10, 20],
+ Some("+00:00".to_string()),
+ ));
+ let b = second(&a).unwrap();
+ assert_eq!(10, b.value(0));
+ assert_eq!(20, b.value(1));
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_minute_with_timezone() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![0, 60],
+ Some("+00:50".to_string()),
+ ));
+ let b = minute(&a).unwrap();
+ assert_eq!(50, b.value(0));
+ assert_eq!(51, b.value(1));
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_minute_with_negative_timezone() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 55],
+ Some("-00:50".to_string()),
+ ));
+ let b = minute(&a).unwrap();
+ assert_eq!(5, b.value(0));
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("+01:00".to_string()),
+ ));
+ let b = hour(&a).unwrap();
+ assert_eq!(11, b.value(0));
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("+0100".to_string()),
+ ));
+ assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign()
{
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("0100".to_string()),
+ ));
+ assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+ }
+
+ #[test]
+ fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
+ use std::sync::Arc;
+
+ let a = Arc::new(TimestampSecondArray::from_vec(
+ vec![60 * 60 * 10],
+ Some("01:00".to_string()),
+ ));
+ assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+ }
}