This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1237c89922 Enhance Time32/Time64 support in date_part (#5337)
1237c89922 is described below
commit 1237c899228055c9b6d377cd03b33cde68085360
Author: Jeffrey Vo <[email protected]>
AuthorDate: Wed Jan 31 21:36:25 2024 +1100
Enhance Time32/Time64 support in date_part (#5337)
* Enhance Time32/Time64 support in date_part
* Refactoring
---
arrow-arith/src/temporal.rs | 280 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 268 insertions(+), 12 deletions(-)
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index a386559e30..d52af32f42 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -23,9 +23,9 @@ use arrow_array::cast::AsArray;
use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc};
use arrow_array::temporal_conversions::{
- date32_to_datetime, date64_to_datetime, time32ms_to_time, time32s_to_time,
time64ns_to_time,
- time64us_to_time, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
timestamp_s_to_datetime,
- timestamp_us_to_datetime,
+ date32_to_datetime, date64_to_datetime, timestamp_ms_to_datetime,
timestamp_ns_to_datetime,
+ timestamp_s_to_datetime, timestamp_us_to_datetime, MICROSECONDS,
MICROSECONDS_IN_DAY,
+ MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS, NANOSECONDS_IN_DAY,
SECONDS_IN_DAY,
};
use arrow_array::timezone::Tz;
use arrow_array::types::*;
@@ -109,7 +109,7 @@ where
///
/// Currently only supports temporal types:
/// - Date32/Date64
-/// - Time32/Time64 (Limited support)
+/// - Time32/Time64
/// - Timestamp
///
/// Returns an [`Int32Array`] unless input was a dictionary type, in which
case returns
@@ -179,9 +179,18 @@ trait ExtractDatePartExt {
impl ExtractDatePartExt for PrimitiveArray<Time32SecondType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+ #[inline]
+ fn range_check(s: i32) -> bool {
+ (0..SECONDS_IN_DAY as i32).contains(&s)
+ }
match part {
- DatePart::Hour => Ok(self.unary_opt(|d| time32s_to_time(d).map(|c|
c.hour() as i32))),
- // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ DatePart::Hour => Ok(self.unary_opt(|s| range_check(s).then_some(s
/ 3_600))),
+ DatePart::Minute => Ok(self.unary_opt(|s|
range_check(s).then_some((s / 60) % 60))),
+ DatePart::Second => Ok(self.unary_opt(|s|
range_check(s).then_some(s % 60))),
+ // Time32Second only encodes number of seconds, so these will
always be 0 (if in valid range)
+ DatePart::Millisecond | DatePart::Microsecond |
DatePart::Nanosecond => {
+ Ok(self.unary_opt(|s| range_check(s).then_some(0)))
+ }
_ => return_compute_error_with!(format!("{part} does not
support"), self.data_type()),
}
}
@@ -189,9 +198,30 @@ impl ExtractDatePartExt for
PrimitiveArray<Time32SecondType> {
impl ExtractDatePartExt for PrimitiveArray<Time32MillisecondType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+ #[inline]
+ fn range_check(ms: i32) -> bool {
+ (0..MILLISECONDS_IN_DAY as i32).contains(&ms)
+ }
+ let milliseconds = MILLISECONDS as i32;
match part {
- DatePart::Hour => Ok(self.unary_opt(|d|
time32ms_to_time(d).map(|c| c.hour() as i32))),
- // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ DatePart::Hour => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some(ms / 3_600 /
milliseconds)))
+ }
+ DatePart::Minute => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some((ms / 60 /
milliseconds) % 60)))
+ }
+ DatePart::Second => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some((ms /
milliseconds) % 60)))
+ }
+ DatePart::Millisecond => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some(ms %
milliseconds)))
+ }
+ DatePart::Microsecond => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some((ms %
milliseconds) * 1_000)))
+ }
+ DatePart::Nanosecond => {
+ Ok(self.unary_opt(|ms| range_check(ms).then_some((ms %
milliseconds) * 1_000_000)))
+ }
_ => return_compute_error_with!(format!("{part} does not
support"), self.data_type()),
}
}
@@ -199,9 +229,28 @@ impl ExtractDatePartExt for
PrimitiveArray<Time32MillisecondType> {
impl ExtractDatePartExt for PrimitiveArray<Time64MicrosecondType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+ #[inline]
+ fn range_check(us: i64) -> bool {
+ (0..MICROSECONDS_IN_DAY).contains(&us)
+ }
match part {
- DatePart::Hour => Ok(self.unary_opt(|d|
time64us_to_time(d).map(|c| c.hour() as i32))),
- // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ DatePart::Hour => {
+ Ok(self
+ .unary_opt(|us| range_check(us).then_some((us / 3_600 /
MICROSECONDS) as i32)))
+ }
+ DatePart::Minute => Ok(self
+ .unary_opt(|us| range_check(us).then_some(((us / 60 /
MICROSECONDS) % 60) as i32))),
+ DatePart::Second => {
+ Ok(self
+ .unary_opt(|us| range_check(us).then_some(((us /
MICROSECONDS) % 60) as i32)))
+ }
+ DatePart::Millisecond => Ok(self
+ .unary_opt(|us| range_check(us).then_some(((us % MICROSECONDS)
/ 1_000) as i32))),
+ DatePart::Microsecond => {
+ Ok(self.unary_opt(|us| range_check(us).then_some((us %
MICROSECONDS) as i32)))
+ }
+ DatePart::Nanosecond => Ok(self
+ .unary_opt(|us| range_check(us).then_some(((us % MICROSECONDS)
* 1_000) as i32))),
_ => return_compute_error_with!(format!("{part} does not
support"), self.data_type()),
}
}
@@ -209,9 +258,30 @@ impl ExtractDatePartExt for
PrimitiveArray<Time64MicrosecondType> {
impl ExtractDatePartExt for PrimitiveArray<Time64NanosecondType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+ #[inline]
+ fn range_check(ns: i64) -> bool {
+ (0..NANOSECONDS_IN_DAY).contains(&ns)
+ }
match part {
- DatePart::Hour => Ok(self.unary_opt(|d|
time64ns_to_time(d).map(|c| c.hour() as i32))),
- // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ DatePart::Hour => {
+ Ok(self
+ .unary_opt(|ns| range_check(ns).then_some((ns / 3_600 /
NANOSECONDS) as i32)))
+ }
+ DatePart::Minute => Ok(self
+ .unary_opt(|ns| range_check(ns).then_some(((ns / 60 /
NANOSECONDS) % 60) as i32))),
+ DatePart::Second => Ok(
+ self.unary_opt(|ns| range_check(ns).then_some(((ns /
NANOSECONDS) % 60) as i32))
+ ),
+ DatePart::Millisecond => Ok(self.unary_opt(|ns| {
+ range_check(ns).then_some(((ns % NANOSECONDS) / 1_000_000) as
i32)
+ })),
+ DatePart::Microsecond => {
+ Ok(self
+ .unary_opt(|ns| range_check(ns).then_some(((ns %
NANOSECONDS) / 1_000) as i32)))
+ }
+ DatePart::Nanosecond => {
+ Ok(self.unary_opt(|ns| range_check(ns).then_some((ns %
NANOSECONDS) as i32)))
+ }
_ => return_compute_error_with!(format!("{part} does not
support"), self.data_type()),
}
}
@@ -1244,4 +1314,190 @@ mod tests {
let expected = Arc::new(expected_dict) as ArrayRef;
assert_eq!(&expected, &b);
}
+
+ #[test]
+ fn test_temporal_array_time64_nanoseconds() {
+ // 23:32:50.123456789
+ let input: Time64NanosecondArray =
vec![Some(84_770_123_456_789)].into();
+
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(23, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(32, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Second).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(50, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Millisecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Microsecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_456, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Nanosecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_456_789, actual.value(0));
+
+ // invalid values should turn into null
+ let input: Time64NanosecondArray = vec![
+ Some(-1),
+ Some(86_400_000_000_000),
+ Some(86_401_000_000_000),
+ None,
+ ]
+ .into();
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ let expected: Int32Array = vec![None, None, None, None].into();
+ assert_eq!(&expected, actual);
+ }
+
+ #[test]
+ fn test_temporal_array_time64_microseconds() {
+ // 23:32:50.123456
+ let input: Time64MicrosecondArray = vec![Some(84_770_123_456)].into();
+
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(23, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(32, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Second).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(50, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Millisecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Microsecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_456, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Nanosecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_456_000, actual.value(0));
+
+ // invalid values should turn into null
+ let input: Time64MicrosecondArray =
+ vec![Some(-1), Some(86_400_000_000), Some(86_401_000_000),
None].into();
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ let expected: Int32Array = vec![None, None, None, None].into();
+ assert_eq!(&expected, actual);
+ }
+
+ #[test]
+ fn test_temporal_array_time32_milliseconds() {
+ // 23:32:50.123
+ let input: Time32MillisecondArray = vec![Some(84_770_123)].into();
+
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(23, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(32, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Second).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(50, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Millisecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Microsecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_000, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Nanosecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(123_000_000, actual.value(0));
+
+ // invalid values should turn into null
+ let input: Time32MillisecondArray =
+ vec![Some(-1), Some(86_400_000), Some(86_401_000), None].into();
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ let expected: Int32Array = vec![None, None, None, None].into();
+ assert_eq!(&expected, actual);
+ }
+
+ #[test]
+ fn test_temporal_array_time32_seconds() {
+ // 23:32:50
+ let input: Time32SecondArray = vec![84_770].into();
+
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(23, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Minute).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(32, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Second).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(50, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Millisecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(0, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Microsecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(0, actual.value(0));
+
+ let actual = date_part(&input, DatePart::Nanosecond).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ assert_eq!(0, actual.value(0));
+
+ // invalid values should turn into null
+ let input: Time32SecondArray = vec![Some(-1), Some(86_400),
Some(86_401), None].into();
+ let actual = date_part(&input, DatePart::Hour).unwrap();
+ let actual = actual.as_primitive::<Int32Type>();
+ let expected: Int32Array = vec![None, None, None, None].into();
+ assert_eq!(&expected, actual);
+ }
+
+ #[test]
+ fn test_temporal_array_time_invalid_parts() {
+ fn ensure_returns_error(array: &dyn Array) {
+ let invalid_parts = [
+ DatePart::Quarter,
+ DatePart::Year,
+ DatePart::Month,
+ DatePart::Week,
+ DatePart::Day,
+ DatePart::DayOfWeekSunday0,
+ DatePart::DayOfWeekMonday0,
+ DatePart::DayOfYear,
+ ];
+
+ for part in invalid_parts {
+ let err = date_part(array, part).unwrap_err();
+ let expected = format!(
+ "Compute error: {part} does not support: {}",
+ array.data_type()
+ );
+ assert_eq!(expected, err.to_string());
+ }
+ }
+
+ ensure_returns_error(&Time32SecondArray::from(vec![0]));
+ ensure_returns_error(&Time32MillisecondArray::from(vec![0]));
+ ensure_returns_error(&Time64MicrosecondArray::from(vec![0]));
+ ensure_returns_error(&Time64NanosecondArray::from(vec![0]));
+ }
}