tustvold commented on code in PR #5319:
URL: https://github.com/apache/arrow-rs/pull/5319#discussion_r1463253070


##########
arrow-arith/src/temporal.rs:
##########
@@ -19,105 +19,273 @@
 
 use std::sync::Arc;
 
-use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
-
-use arrow_array::builder::*;
-use arrow_array::iterator::ArrayIter;
-use arrow_array::temporal_conversions::{as_datetime, 
as_datetime_with_timezone, as_time};
+use arrow_array::cast::AsArray;
+use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc};
+
+use arrow_array::temporal_conversions::{
+    date32_to_datetime, date64_to_datetime, time32ms_to_time, time32s_to_time, 
time64ns_to_time,
+    time64us_to_time, timestamp_ms_to_datetime, timestamp_ns_to_datetime, 
timestamp_s_to_datetime,
+    timestamp_us_to_datetime,
+};
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
 
-/// This function takes an `ArrayIter` of input array and an extractor `op` 
which takes
-/// an input `NaiveTime` and returns time component (e.g. hour) as `i32` value.
-/// The extracted values are built by the given `builder` to be an 
`Int32Array`.
-fn as_time_with_op<A: ArrayAccessor<Item = T::Native>, T: ArrowTemporalType, 
F>(
-    iter: ArrayIter<A>,
-    mut builder: PrimitiveBuilder<Int32Type>,
-    op: F,
-) -> Int32Array
+/// Valid parts to extract from date/timestamp arrays.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DatePart {
+    /// Quarter of the year, in range `1..=4`
+    Quarter,
+    /// Calendar year
+    Year,
+    /// Month in the year, in range `1..=12`
+    Month,
+    /// ISO week of the year, in range `1..=53`
+    Week,
+    /// Day of the month, in range `1..=31`
+    Day,
+    /// Day of the week, in range `0..=6`, where Sunday is 0
+    DayOfWeekSunday0,
+    /// Day of the week, in range `0..=6`, where Monday is 0
+    DayOfWeekMonday0,
+    /// Day of year, in range `1..=366`
+    DayOfYear,
+    /// Hour of the day, in range `0..=23`
+    Hour,
+    /// Minute of the hour, in range `0..=59`
+    Minute,
+    /// Second of the minute, in range `0..=59`
+    Second,
+    /// Millisecond of the second
+    Millisecond,
+    /// Microsecond of the second
+    Microsecond,
+    /// Nanosecond of the second
+    Nanosecond,
+}
+
+impl std::fmt::Display for DatePart {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+/// Returns function to extract relevant [`DatePart`] from types like a
+/// [`NaiveDateTime`] or [`DateTime`].
+///
+/// [`DateTime`]: chrono::DateTime
+fn get_date_time_part_extract_fn<T>(part: DatePart) -> fn(T) -> i32
 where
-    F: Fn(NaiveTime) -> i32,
-    i64: From<T::Native>,
+    T: ChronoDateExt + Datelike + Timelike,
 {
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_time::<T>(i64::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
-            }
-        } else {
-            builder.append_null();
+    match part {
+        DatePart::Quarter => |d| d.quarter() as i32,
+        DatePart::Year => |d| d.year(),
+        DatePart::Month => |d| d.month() as i32,
+        DatePart::Week => |d| d.iso_week().week() as i32,
+        DatePart::Day => |d| d.day() as i32,
+        DatePart::DayOfWeekSunday0 => |d| d.num_days_from_sunday(),
+        DatePart::DayOfWeekMonday0 => |d| d.num_days_from_monday(),
+        DatePart::DayOfYear => |d| d.ordinal() as i32,
+        DatePart::Hour => |d| d.hour() as i32,
+        DatePart::Minute => |d| d.minute() as i32,
+        DatePart::Second => |d| d.second() as i32,
+        DatePart::Millisecond => |d| (d.nanosecond() / 1_000_000) as i32,
+        DatePart::Microsecond => |d| (d.nanosecond() / 1_000) as i32,
+        DatePart::Nanosecond => |d| (d.nanosecond()) as i32,
+    }
+}
+
+/// Given array, return new array with the extracted [`DatePart`].
+///
+/// Returns an [`Int32Array`] unless input was a dictionary type, in which 
case returns
+/// the dictionary but with this function applied onto its values.
+///
+/// Returns error if attempting to extract date part from unsupported type 
(i.e. non-date/timestamp types).
+pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, 
ArrowError> {
+    downcast_temporal_array!(
+        array => {
+            let array = array.date_part(part)?;
+            let array = Arc::new(array) as ArrayRef;
+            Ok(array)
         }
-    });
+        // TODO: support interval
+        // DataType::Interval(_) => {
+        //     todo!();
+        // }
+        DataType::Dictionary(_, _) => {
+            let array = array.as_any_dictionary();
+            let values = date_part(array.values(), part)?;
+            let values = Arc::new(values) as ArrayRef;
+            let new_array = array.with_values(values);
+            Ok(new_array)
+        }
+        t => return_compute_error_with!(format!("{part} does not support"), t),
+    )
+}
 
-    builder.finish()
+/// Used to integrate new [`date_part()`] method with existing shims such as
+/// [`hour()`] and [`week()`].
+fn date_part_primitive<T: ArrowTemporalType>(
+    array: &PrimitiveArray<T>,
+    part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+    let array = date_part(array, part)?;
+    Ok(array.as_primitive::<Int32Type>().to_owned())
 }
 
-/// This function takes an `ArrayIter` of input array and an extractor `op` 
which takes
-/// an input `NaiveDateTime` and returns data time component (e.g. hour) as 
`i32` value.
-/// The extracted values are built by the given `builder` to be an 
`Int32Array`.
-fn as_datetime_with_op<A: ArrayAccessor<Item = T::Native>, T: 
ArrowTemporalType, F>(
-    iter: ArrayIter<A>,
-    mut builder: PrimitiveBuilder<Int32Type>,
-    op: F,
-) -> Int32Array
-where
-    F: Fn(NaiveDateTime) -> i32,
-    i64: From<T::Native>,
-{
-    iter.into_iter().for_each(|value| {
-        if let Some(value) = value {
-            match as_datetime::<T>(i64::from(value)) {
-                Some(dt) => builder.append_value(op(dt)),
-                None => builder.append_null(),
-            }
-        } else {
-            builder.append_null();
+/// Extract optional [`Tz`] from timestamp data types, returning error
+/// if called with a non-timestamp type.
+fn get_tz(dt: &DataType) -> Result<Option<Tz>, ArrowError> {
+    match dt {
+        DataType::Timestamp(_, Some(tz)) => Ok(Some(tz.parse::<Tz>()?)),
+        DataType::Timestamp(_, None) => Ok(None),
+        _ => Err(ArrowError::CastError(format!("Not a timestamp type: {dt}"))),
+    }
+}
+
+/// Implement the specialized functions for extracting date part from temporal 
arrays.
+trait ExtractDatePartExt {

Review Comment:
   Nice, this is exactly what I was thinking of :+1: 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to