Jefffrey commented on code in PR #5319:
URL: https://github.com/apache/arrow-rs/pull/5319#discussion_r1463930681


##########
arrow-arith/src/temporal.rs:
##########
@@ -19,105 +19,273 @@
 
 use std::sync::Arc;
 
-use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
-
-use arrow_array::builder::*;
-use arrow_array::iterator::ArrayIter;
-use arrow_array::temporal_conversions::{as_datetime, 
as_datetime_with_timezone, as_time};
+use arrow_array::cast::AsArray;
+use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc};
+
+use arrow_array::temporal_conversions::{
+    date32_to_datetime, date64_to_datetime, time32ms_to_time, time32s_to_time, 
time64ns_to_time,
+    time64us_to_time, timestamp_ms_to_datetime, timestamp_ns_to_datetime, 
timestamp_s_to_datetime,
+    timestamp_us_to_datetime,
+};
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
 
-/// This function takes an `ArrayIter` of input array and an extractor `op` 
which takes
-/// an input `NaiveTime` and returns time component (e.g. hour) as `i32` value.
-/// The extracted values are built by the given `builder` to be an 
`Int32Array`.
-fn as_time_with_op<A: ArrayAccessor<Item = T::Native>, T: ArrowTemporalType, 
F>(
-    iter: ArrayIter<A>,
-    mut builder: PrimitiveBuilder<Int32Type>,
-    op: F,
-) -> Int32Array
+/// Valid parts to extract from date/timestamp arrays.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DatePart {
+    /// Quarter of the year, in range `1..=4`
+    Quarter,
+    /// Calendar year
+    Year,
+    /// Month in the year, in range `1..=12`
+    Month,
+    /// ISO week of the year, in range `1..=53`
+    Week,
+    /// Day of the month, in range `1..=31`
+    Day,
+    /// Day of the week, in range `0..=6`, where Sunday is 0
+    DayOfWeekSunday0,
+    /// Day of the week, in range `0..=6`, where Monday is 0
+    DayOfWeekMonday0,
+    /// Day of year, in range `1..=366`
+    DayOfYear,
+    /// Hour of the day, in range `0..=23`
+    Hour,
+    /// Minute of the hour, in range `0..=59`
+    Minute,
+    /// Second of the minute, in range `0..=59`
+    Second,
+    /// Millisecond of the second
+    Millisecond,
+    /// Microsecond of the second
+    Microsecond,
+    /// Nanosecond of the second
+    Nanosecond,
+}
+
+impl std::fmt::Display for DatePart {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+/// Returns function to extract relevant [`DatePart`] from types like a
+/// [`NaiveDateTime`] or [`DateTime`].
+///
+/// [`DateTime`]: chrono::DateTime
+fn get_date_time_part_extract_fn<T>(part: DatePart) -> fn(T) -> i32
 where
-    F: Fn(NaiveTime) -> i32,
-    i64: From<T::Native>,
+    T: ChronoDateExt + Datelike + Timelike,
 {
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_time::<T>(i64::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
-            }
-        } else {
-            builder.append_null();
+    match part {
+        DatePart::Quarter => |d| d.quarter() as i32,
+        DatePart::Year => |d| d.year(),
+        DatePart::Month => |d| d.month() as i32,
+        DatePart::Week => |d| d.iso_week().week() as i32,
+        DatePart::Day => |d| d.day() as i32,
+        DatePart::DayOfWeekSunday0 => |d| d.num_days_from_sunday(),
+        DatePart::DayOfWeekMonday0 => |d| d.num_days_from_monday(),
+        DatePart::DayOfYear => |d| d.ordinal() as i32,
+        DatePart::Hour => |d| d.hour() as i32,
+        DatePart::Minute => |d| d.minute() as i32,
+        DatePart::Second => |d| d.second() as i32,
+        DatePart::Millisecond => |d| (d.nanosecond() / 1_000_000) as i32,
+        DatePart::Microsecond => |d| (d.nanosecond() / 1_000) as i32,
+        DatePart::Nanosecond => |d| (d.nanosecond()) as i32,
+    }
+}
+
+/// Given array, return new array with the extracted [`DatePart`].
+///
+/// Returns an [`Int32Array`] unless input was a dictionary type, in which 
case returns
+/// the dictionary but with this function applied onto its values.
+///
+/// Returns error if attempting to extract date part from unsupported type 
(i.e. non-date/timestamp types).
+pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, 
ArrowError> {
+    downcast_temporal_array!(
+        array => {
+            let array = array.date_part(part)?;
+            let array = Arc::new(array) as ArrayRef;
+            Ok(array)
         }
-    });
+        // TODO: support interval
+        // DataType::Interval(_) => {
+        //     todo!();
+        // }
+        DataType::Dictionary(_, _) => {
+            let array = array.as_any_dictionary();
+            let values = date_part(array.values(), part)?;
+            let values = Arc::new(values) as ArrayRef;
+            let new_array = array.with_values(values);
+            Ok(new_array)
+        }
+        t => return_compute_error_with!(format!("{part} does not support"), t),
+    )
+}
 
-    builder.finish()
+/// Used to integrate new [`date_part()`] method with existing shims such as
+/// [`hour()`] and [`week()`].
+fn date_part_primitive<T: ArrowTemporalType>(
+    array: &PrimitiveArray<T>,
+    part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+    let array = date_part(array, part)?;
+    Ok(array.as_primitive::<Int32Type>().to_owned())
 }
 
-/// This function takes an `ArrayIter` of input array and an extractor `op` 
which takes
-/// an input `NaiveDateTime` and returns data time component (e.g. hour) as 
`i32` value.
-/// The extracted values are built by the given `builder` to be an 
`Int32Array`.
-fn as_datetime_with_op<A: ArrayAccessor<Item = T::Native>, T: 
ArrowTemporalType, F>(
-    iter: ArrayIter<A>,
-    mut builder: PrimitiveBuilder<Int32Type>,
-    op: F,
-) -> Int32Array
-where
-    F: Fn(NaiveDateTime) -> i32,
-    i64: From<T::Native>,
-{
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_datetime::<T>(i64::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
-            }
-        } else {
-            builder.append_null();
+/// Extract optional [`Tz`] from timestamp data types, returning error
+/// if called with a non-timestamp type.
+fn get_tz(dt: &DataType) -> Result<Option<Tz>, ArrowError> {
+    match dt {
+        DataType::Timestamp(_, Some(tz)) => Ok(Some(tz.parse::<Tz>()?)),
+        DataType::Timestamp(_, None) => Ok(None),
+        _ => Err(ArrowError::CastError(format!("Not a timestamp type: {dt}"))),
+    }
+}
+
+/// Implement the specialized functions for extracting date part from temporal 
arrays.
+trait ExtractDatePartExt {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError>;
+}
+
+impl ExtractDatePartExt for PrimitiveArray<Time32SecondType> {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+        match part {
+            DatePart::Hour => Ok(self.unary_opt(|d| time32s_to_time(d).map(|c| 
c.hour() as i32))),
+            // TODO expand support for Time types, see: 
https://github.com/apache/arrow-rs/issues/5261
+            _ => return_compute_error_with!(format!("{part} does not 
support"), self.data_type()),
         }
-    });
+    }
+}
 
-    builder.finish()
+impl ExtractDatePartExt for PrimitiveArray<Time32MillisecondType> {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+        match part {
+            DatePart::Hour => Ok(self.unary_opt(|d| 
time32ms_to_time(d).map(|c| c.hour() as i32))),
+            // TODO expand support for Time types, see: 
https://github.com/apache/arrow-rs/issues/5261
+            _ => return_compute_error_with!(format!("{part} does not 
support"), self.data_type()),
+        }
+    }
 }
 
-/// This function extracts date time component (e.g. hour) from an array of 
datatime.
-/// `iter` is the `ArrayIter` of input datatime array. `builder` is used to 
build the
-/// returned `Int32Array` containing the extracted components. `tz` is 
timezone string
-/// which will be added to datetime values in the input array. `parsed` is a 
`Parsed`
-/// object used to parse timezone string. `op` is the extractor closure which 
takes
-/// data time object of `NaiveDateTime` type and returns `i32` value of 
extracted
-/// component.
-fn extract_component_from_datetime_array<
-    A: ArrayAccessor<Item = T::Native>,
-    T: ArrowTemporalType,
-    F,
->(
-    iter: ArrayIter<A>,
-    mut builder: PrimitiveBuilder<Int32Type>,
-    tz: &str,
-    op: F,
-) -> Result<Int32Array, ArrowError>
-where
-    F: Fn(DateTime<Tz>) -> i32,
-    i64: From<T::Native>,
-{
-    let tz: Tz = tz.parse()?;
-    for value in iter {
-        match value {
-            Some(value) => match as_datetime_with_timezone::<T>(value.into(), 
tz) {
-                Some(time) => builder.append_value(op(time)),
-                _ => {
-                    return Err(ArrowError::ComputeError(
-                        "Unable to read value as datetime".to_string(),
-                    ))
-                }
-            },
-            None => builder.append_null(),
+impl ExtractDatePartExt for PrimitiveArray<Time64MicrosecondType> {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+        match part {
+            DatePart::Hour => Ok(self.unary_opt(|d| 
time64us_to_time(d).map(|c| c.hour() as i32))),
+            // TODO expand support for Time types, see: 
https://github.com/apache/arrow-rs/issues/5261
+            _ => return_compute_error_with!(format!("{part} does not 
support"), self.data_type()),
+        }
+    }
+}
+
+impl ExtractDatePartExt for PrimitiveArray<Time64NanosecondType> {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+        match part {
+            DatePart::Hour => Ok(self.unary_opt(|d| 
time64ns_to_time(d).map(|c| c.hour() as i32))),
+            // TODO expand support for Time types, see: 
https://github.com/apache/arrow-rs/issues/5261
+            _ => return_compute_error_with!(format!("{part} does not 
support"), self.data_type()),
+        }
+    }
+}
+
+impl ExtractDatePartExt for PrimitiveArray<Date32Type> {
+    fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
+        // Date32 only encodes number of days, so these will always be 0
+        if let DatePart::Hour
+        | DatePart::Minute
+        | DatePart::Second
+        | DatePart::Millisecond
+        | DatePart::Microsecond
+        | DatePart::Nanosecond = part
+        {
+            Ok(self.unary(|_| 0))

Review Comment:
   Good point, applied change



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to