This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new ed5b398  handle tz while extractiing second/minute/hour from temporal 
arrays (#771)
ed5b398 is described below

commit ed5b398a0c4574dec96e608659161c37aef18bb4
Author: Sumit <[email protected]>
AuthorDate: Mon Oct 11 22:16:28 2021 +0200

    handle tz while extractiing second/minute/hour from temporal arrays (#771)
    
    The patch rewrites the behaviour using macros to indicate the
    repetitive nutate of operations
---
 arrow/src/array/array_primitive.rs    |  12 ++
 arrow/src/compute/kernels/temporal.rs | 261 +++++++++++++++++++++++-----------
 2 files changed, 191 insertions(+), 82 deletions(-)

diff --git a/arrow/src/array/array_primitive.rs 
b/arrow/src/array/array_primitive.rs
index 5777a03..0878fb4 100644
--- a/arrow/src/array/array_primitive.rs
+++ b/arrow/src/array/array_primitive.rs
@@ -245,6 +245,18 @@ where
         as_datetime::<T>(i64::from(self.value(i)))
     }
 
+    /// Returns value as a chrono `NaiveDateTime`, handling time resolution 
with the provided tz
+    ///
+    /// functionally it is same as `value_as_datetime`, however it adds
+    /// the passed tz to the to-be-returned NaiveDateTime
+    pub fn value_as_datetime_with_tz(
+        &self,
+        i: usize,
+        tz: FixedOffset,
+    ) -> Option<NaiveDateTime> {
+        as_datetime::<T>(i64::from(self.value(i))).map(|datetime| datetime + 
tz)
+    }
+
     /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
     ///
     /// If a data type cannot be converted to `NaiveDate`, a `None` is returned
diff --git a/arrow/src/compute/kernels/temporal.rs 
b/arrow/src/compute/kernels/temporal.rs
index 5db0dfb..f461b51 100644
--- a/arrow/src/compute/kernels/temporal.rs
+++ b/arrow/src/compute/kernels/temporal.rs
@@ -22,6 +22,60 @@ use chrono::{Datelike, Timelike};
 use crate::array::*;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
+
+use chrono::format::strftime::StrftimeItems;
+use chrono::format::{parse, Parsed};
+
+macro_rules! extract_component_from_array {
+    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident) => {
+        for i in 0..$array.len() {
+            if $array.is_null(i) {
+                $builder.append_null()?;
+            } else {
+                match $array.$using(i) {
+                    Some(dt) => $builder.append_value(dt.$extract_fn() as 
i32)?,
+                    None => $builder.append_null()?,
+                }
+            }
+        }
+    };
+    ($array:ident, $builder:ident, $extract_fn:ident, $using:ident, $tz:ident, 
$parsed:ident) => {
+        if ($tz.starts_with('+') || $tz.starts_with('-')) && 
!$tz.contains(':') {
+            return_compute_error_with!(
+                "Invalid timezone",
+                "Expected format [+-]XX:XX".to_string()
+            )
+        } else {
+            match parse(&mut $parsed, $tz, StrftimeItems::new("%z")) {
+                Ok(_) => match $parsed.to_fixed_offset() {
+                    Ok(fixed_offset) => {
+                        for i in 0..$array.len() {
+                            if $array.is_null(i) {
+                                $builder.append_null()?;
+                            } else {
+                                match $array.$using(i, fixed_offset) {
+                                    Some(dt) => {
+                                        $builder.append_value(dt.$extract_fn() 
as i32)?
+                                    }
+                                    None => $builder.append_null()?,
+                                }
+                            }
+                        }
+                    }
+                    err => return_compute_error_with!("Invalid timezone", err),
+                },
+                err => return_compute_error_with!("Unable to parse timezone", 
err),
+            }
+        }
+    };
+}
+
+macro_rules! return_compute_error_with {
+    ($msg:expr, $param:expr) => {
+        return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, 
$param))) }
+    };
+}
+
 /// Extracts the hours of a given temporal array as an array of integers
 pub fn hour<T>(array: &PrimitiveArray<T>) -> Result<Int32Array>
 where
@@ -31,37 +85,23 @@ where
     let mut b = Int32Builder::new(array.len());
     match array.data_type() {
         &DataType::Time32(_) | &DataType::Time64(_) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_time(i) {
-                        Some(time) => b.append_value(time.hour() as i32)?,
-                        None => b.append_null()?,
-                    };
-                }
-            }
+            extract_component_from_array!(array, b, hour, value_as_time)
         }
-        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.hour() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
+        &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, None) 
=> {
+            extract_component_from_array!(array, b, hour, value_as_datetime)
         }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "hour does not support type {:?}",
-                    dt
-                )))
-            }
+        &DataType::Timestamp(_, Some(ref tz)) => {
+            let mut scratch = Parsed::new();
+            extract_component_from_array!(
+                array,
+                b,
+                hour,
+                value_as_datetime_with_tz,
+                tz,
+                scratch
+            )
         }
+        dt => return_compute_error_with!("hour does not support", dt),
     }
 
     Ok(b.finish())
@@ -76,25 +116,9 @@ where
     let mut b = Int32Builder::new(array.len());
     match array.data_type() {
         &DataType::Date32 | &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.year() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
-        }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "year does not support type {:?}",
-                    dt
-                )))
-            }
+            extract_component_from_array!(array, b, year, value_as_datetime)
         }
+        dt => return_compute_error_with!("year does not support", dt),
     }
 
     Ok(b.finish())
@@ -108,26 +132,21 @@ where
 {
     let mut b = Int32Builder::new(array.len());
     match array.data_type() {
-        &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.minute() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
+        &DataType::Date64 | &DataType::Timestamp(_, None) => {
+            extract_component_from_array!(array, b, minute, value_as_datetime)
         }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "minute does not support type {:?}",
-                    dt
-                )))
-            }
+        &DataType::Timestamp(_, Some(ref tz)) => {
+            let mut scratch = Parsed::new();
+            extract_component_from_array!(
+                array,
+                b,
+                minute,
+                value_as_datetime_with_tz,
+                tz,
+                scratch
+            )
         }
+        dt => return_compute_error_with!("minute does not support", dt),
     }
 
     Ok(b.finish())
@@ -141,26 +160,21 @@ where
 {
     let mut b = Int32Builder::new(array.len());
     match array.data_type() {
-        &DataType::Date64 | &DataType::Timestamp(_, _) => {
-            for i in 0..array.len() {
-                if array.is_null(i) {
-                    b.append_null()?;
-                } else {
-                    match array.value_as_datetime(i) {
-                        Some(dt) => b.append_value(dt.second() as i32)?,
-                        None => b.append_null()?,
-                    }
-                }
-            }
+        &DataType::Date64 | &DataType::Timestamp(_, None) => {
+            extract_component_from_array!(array, b, second, value_as_datetime)
         }
-        dt => {
-            return {
-                Err(ArrowError::ComputeError(format!(
-                    "second does not support type {:?}",
-                    dt
-                )))
-            }
+        &DataType::Timestamp(_, Some(ref tz)) => {
+            let mut scratch = Parsed::new();
+            extract_component_from_array!(
+                array,
+                b,
+                second,
+                value_as_datetime_with_tz,
+                tz,
+                scratch
+            )
         }
+        dt => return_compute_error_with!("second does not support", dt),
     }
 
     Ok(b.finish())
@@ -294,4 +308,87 @@ mod tests {
         assert!(!b.is_valid(1));
         assert_eq!(7, b.value(2));
     }
+
+    #[test]
+    fn test_temporal_array_timestamp_second_with_timezone() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![10, 20],
+            Some("+00:00".to_string()),
+        ));
+        let b = second(&a).unwrap();
+        assert_eq!(10, b.value(0));
+        assert_eq!(20, b.value(1));
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_minute_with_timezone() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![0, 60],
+            Some("+00:50".to_string()),
+        ));
+        let b = minute(&a).unwrap();
+        assert_eq!(50, b.value(0));
+        assert_eq!(51, b.value(1));
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_minute_with_negative_timezone() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![60 * 55],
+            Some("-00:50".to_string()),
+        ));
+        let b = minute(&a).unwrap();
+        assert_eq!(5, b.value(0));
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_hour_with_timezone() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![60 * 60 * 10],
+            Some("+01:00".to_string()),
+        ));
+        let b = hour(&a).unwrap();
+        assert_eq!(11, b.value(0));
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![60 * 60 * 10],
+            Some("+0100".to_string()),
+        ));
+        assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign() 
{
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![60 * 60 * 10],
+            Some("0100".to_string()),
+        ));
+        assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+    }
+
+    #[test]
+    fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
+        use std::sync::Arc;
+
+        let a = Arc::new(TimestampSecondArray::from_vec(
+            vec![60 * 60 * 10],
+            Some("01:00".to_string()),
+        ));
+        assert!(matches!(hour(&a), Err(ArrowError::ComputeError(_))))
+    }
 }

Reply via email to