Jefffrey commented on code in PR #5319:
URL: https://github.com/apache/arrow-rs/pull/5319#discussion_r1462376100
##########
arrow-arith/src/temporal.rs:
##########
@@ -19,105 +19,352 @@
use std::sync::Arc;
-use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
-
-use arrow_array::builder::*;
-use arrow_array::iterator::ArrayIter;
-use arrow_array::temporal_conversions::{as_datetime,
as_datetime_with_timezone, as_time};
+use arrow_array::cast::{downcast_array, AsArray};
+use chrono::{DateTime, Datelike, NaiveDateTime, Offset, TimeZone, Timelike,
Utc};
+
+use arrow_array::temporal_conversions::{
+ date32_to_datetime, date64_to_datetime, time32ms_to_time, time32s_to_time,
time64ns_to_time,
+ time64us_to_time, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
timestamp_s_to_datetime,
+ timestamp_us_to_datetime,
+};
use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
-use arrow_schema::{ArrowError, DataType};
-
-/// This function takes an `ArrayIter` of input array and an extractor `op`
which takes
-/// an input `NaiveTime` and returns time component (e.g. hour) as `i32` value.
-/// The extracted values are built by the given `builder` to be an
`Int32Array`.
-fn as_time_with_op<A: ArrayAccessor<Item = T::Native>, T: ArrowTemporalType,
F>(
- iter: ArrayIter<A>,
- mut builder: PrimitiveBuilder<Int32Type>,
- op: F,
-) -> Int32Array
-where
- F: Fn(NaiveTime) -> i32,
- i64: From<T::Native>,
-{
- iter.into_iter().for_each(|value| {
- if let Some(value) = value {
- match as_time::<T>(i64::from(value)) {
- Some(dt) => builder.append_value(op(dt)),
- None => builder.append_null(),
- }
- } else {
- builder.append_null();
- }
- });
+use arrow_schema::{ArrowError, DataType, TimeUnit};
+
+/// Valid parts to extract from date/timestamp arrays.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DatePart {
+ /// Quarter of the year, in range `1..=4`
+ Quarter,
+ /// Calendar year
+ Year,
+ /// Month in the year, in range `1..=12`
+ Month,
+ /// ISO week of the year, in range `1..=53`
+ Week,
+ /// Day of the month, in range `1..=31`
+ Day,
+ /// Day of the week, in range `0..=6`, where Sunday is 0
+ DayOfWeekSunday0,
+ /// Day of the week, in range `0..=6`, where Monday is 0
+ DayOfWeekMonday0,
+ /// Day of year, in range `1..=366`
+ DayOfYear,
+ /// Hour of the day, in range `0..=23`
+ Hour,
+ /// Minute of the hour, in range `0..=59`
+ Minute,
+ /// Second of the minute, in range `0..=59`
+ Second,
+ /// Millisecond of the second
+ Millisecond,
+ /// Microsecond of the second
+ Microsecond,
+ /// Nanosecond of the second
+ Nanosecond,
+}
- builder.finish()
+impl std::fmt::Display for DatePart {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self)
+ }
}
-/// This function takes an `ArrayIter` of input array and an extractor `op`
which takes
-/// an input `NaiveDateTime` and returns data time component (e.g. hour) as
`i32` value.
-/// The extracted values are built by the given `builder` to be an
`Int32Array`.
-fn as_datetime_with_op<A: ArrayAccessor<Item = T::Native>, T:
ArrowTemporalType, F>(
- iter: ArrayIter<A>,
- mut builder: PrimitiveBuilder<Int32Type>,
- op: F,
-) -> Int32Array
-where
- F: Fn(NaiveDateTime) -> i32,
- i64: From<T::Native>,
-{
- iter.into_iter().for_each(|value| {
- if let Some(value) = value {
- match as_datetime::<T>(i64::from(value)) {
- Some(dt) => builder.append_value(op(dt)),
- None => builder.append_null(),
- }
- } else {
- builder.append_null();
- }
- });
+/// Returns function to extract relevant [`DatePart`] from a [`NaiveDateTime`].
+fn get_naive_date_time_part_extract_fn(part: DatePart) -> fn(NaiveDateTime) ->
i32 {
+ match part {
+ DatePart::Quarter => |d| d.quarter() as i32,
+ DatePart::Year => |d| d.year(),
+ DatePart::Month => |d| d.month() as i32,
+ DatePart::Week => |d| d.iso_week().week() as i32,
+ DatePart::Day => |d| d.day() as i32,
+ DatePart::DayOfWeekSunday0 => |d| d.num_days_from_sunday(),
+ DatePart::DayOfWeekMonday0 => |d| d.num_days_from_monday(),
+ DatePart::DayOfYear => |d| d.ordinal() as i32,
+ DatePart::Hour => |d| d.hour() as i32,
+ DatePart::Minute => |d| d.minute() as i32,
+ DatePart::Second => |d| d.second() as i32,
+ DatePart::Millisecond => |d| (d.nanosecond() / 1_000_000) as i32,
+ DatePart::Microsecond => |d| (d.nanosecond() / 1_000) as i32,
+ DatePart::Nanosecond => |d| (d.nanosecond()) as i32,
+ }
+}
- builder.finish()
+/// Returns function to extract relevant [`DatePart`] from a [`DateTime`].
+fn get_date_time_tz_part_extract_fn(part: DatePart) -> fn(DateTime<Tz>) -> i32
{
+ match part {
+ DatePart::Quarter => |d| d.quarter() as i32,
+ DatePart::Year => |d| d.year(),
+ DatePart::Month => |d| d.month() as i32,
+ DatePart::Week => |d| d.iso_week().week() as i32,
+ DatePart::Day => |d| d.day() as i32,
+ DatePart::DayOfWeekSunday0 => |d| d.num_days_from_sunday(),
+ DatePart::DayOfWeekMonday0 => |d| d.num_days_from_monday(),
+ DatePart::DayOfYear => |d| d.ordinal() as i32,
+ DatePart::Hour => |d| d.hour() as i32,
+ DatePart::Minute => |d| d.minute() as i32,
+ DatePart::Second => |d| d.second() as i32,
+ DatePart::Millisecond => |d| (d.nanosecond() / 1_000_000) as i32,
+ DatePart::Microsecond => |d| (d.nanosecond() / 1_000) as i32,
+ DatePart::Nanosecond => |d| (d.nanosecond()) as i32,
+ }
}
-/// This function extracts date time component (e.g. hour) from an array of
datatime.
-/// `iter` is the `ArrayIter` of input datatime array. `builder` is used to
build the
-/// returned `Int32Array` containing the extracted components. `tz` is
timezone string
-/// which will be added to datetime values in the input array. `parsed` is a
`Parsed`
-/// object used to parse timezone string. `op` is the extractor closure which
takes
-/// data time object of `NaiveDateTime` type and returns `i32` value of
extracted
-/// component.
-fn extract_component_from_datetime_array<
- A: ArrayAccessor<Item = T::Native>,
- T: ArrowTemporalType,
- F,
->(
- iter: ArrayIter<A>,
- mut builder: PrimitiveBuilder<Int32Type>,
- tz: &str,
- op: F,
-) -> Result<Int32Array, ArrowError>
-where
- F: Fn(DateTime<Tz>) -> i32,
- i64: From<T::Native>,
-{
- let tz: Tz = tz.parse()?;
- for value in iter {
- match value {
- Some(value) => match as_datetime_with_timezone::<T>(value.into(),
tz) {
- Some(time) => builder.append_value(op(time)),
- _ => {
- return Err(ArrowError::ComputeError(
- "Unable to read value as datetime".to_string(),
- ))
- }
- },
- None => builder.append_null(),
+fn date_part_time32_s(
+ array: &PrimitiveArray<Time32SecondType>,
+ part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+ match part {
+ DatePart::Hour => Ok(array.unary_opt(|d| time32s_to_time(d).map(|c|
c.hour() as i32))),
+ // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ _ => return_compute_error_with!(format!("{part} does not support"),
array.data_type()),
+ }
+}
+
+fn date_part_time32_ms(
+ array: &PrimitiveArray<Time32MillisecondType>,
+ part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+ match part {
+ DatePart::Hour => Ok(array.unary_opt(|d| time32ms_to_time(d).map(|c|
c.hour() as i32))),
+ // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ _ => return_compute_error_with!(format!("{part} does not support"),
array.data_type()),
+ }
+}
+
+fn date_part_time64_us(
+ array: &PrimitiveArray<Time64MicrosecondType>,
+ part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+ match part {
+ DatePart::Hour => Ok(array.unary_opt(|d| time64us_to_time(d).map(|c|
c.hour() as i32))),
+ // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ _ => return_compute_error_with!(format!("{part} does not support"),
array.data_type()),
+ }
+}
+
+fn date_part_time64_ns(
+ array: &PrimitiveArray<Time64NanosecondType>,
+ part: DatePart,
+) -> Result<Int32Array, ArrowError> {
+ match part {
+ DatePart::Hour => Ok(array.unary_opt(|d| time64ns_to_time(d).map(|c|
c.hour() as i32))),
+ // TODO expand support for Time types, see:
https://github.com/apache/arrow-rs/issues/5261
+ _ => return_compute_error_with!(format!("{part} does not support"),
array.data_type()),
+ }
+}
+
+fn date_part_date32(array: &PrimitiveArray<Date32Type>, part: DatePart) ->
Int32Array {
+ // Date32 only encodes number of days, so these will always be 0
+ if let DatePart::Hour
+ | DatePart::Minute
+ | DatePart::Second
+ | DatePart::Millisecond
+ | DatePart::Microsecond
+ | DatePart::Nanosecond = part
+ {
+ array.unary(|_| 0)
+ } else {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| date32_to_datetime(d).map(map_func))
+ }
+}
+
+fn date_part_date64(array: &PrimitiveArray<Date64Type>, part: DatePart) ->
Int32Array {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| date64_to_datetime(d).map(map_func))
+}
+
+fn date_part_timestamp_s(
+ array: &PrimitiveArray<TimestampSecondType>,
+ part: DatePart,
+) -> Int32Array {
+ // TimestampSecond only encodes number of seconds, so these will always be 0
+ if let DatePart::Millisecond | DatePart::Microsecond |
DatePart::Nanosecond = part {
+ array.unary(|_| 0)
+ } else {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| timestamp_s_to_datetime(d).map(map_func))
+ }
+}
+
+fn date_part_timestamp_s_tz(
+ array: &PrimitiveArray<TimestampSecondType>,
+ part: DatePart,
+ tz: Tz,
+) -> Int32Array {
+ // TimestampSecond only encodes number of seconds, so these will always be 0
+ if let DatePart::Millisecond | DatePart::Microsecond |
DatePart::Nanosecond = part {
+ array.unary(|_| 0)
+ } else {
+ let map_func = get_date_time_tz_part_extract_fn(part);
+ array.unary_opt(|d| {
+ timestamp_s_to_datetime(d)
+ .map(|c| Utc.from_utc_datetime(&c).with_timezone(&tz))
+ .map(map_func)
+ })
+ }
+}
+
+fn date_part_timestamp_ms(
+ array: &PrimitiveArray<TimestampMillisecondType>,
+ part: DatePart,
+) -> Int32Array {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| timestamp_ms_to_datetime(d).map(map_func))
+}
+
+fn date_part_timestamp_ms_tz(
+ array: &PrimitiveArray<TimestampMillisecondType>,
+ part: DatePart,
+ tz: Tz,
+) -> Int32Array {
+ let map_func = get_date_time_tz_part_extract_fn(part);
+ array.unary_opt(|d| {
+ timestamp_ms_to_datetime(d)
+ .map(|c| Utc.from_utc_datetime(&c).with_timezone(&tz))
+ .map(map_func)
+ })
+}
+
+fn date_part_timestamp_us(
+ array: &PrimitiveArray<TimestampMicrosecondType>,
+ part: DatePart,
+) -> Int32Array {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| timestamp_us_to_datetime(d).map(map_func))
+}
+
+fn date_part_timestamp_us_tz(
+ array: &PrimitiveArray<TimestampMicrosecondType>,
+ part: DatePart,
+ tz: Tz,
+) -> Int32Array {
+ let map_func = get_date_time_tz_part_extract_fn(part);
+ array.unary_opt(|d| {
+ timestamp_us_to_datetime(d)
+ .map(|c| Utc.from_utc_datetime(&c).with_timezone(&tz))
+ .map(map_func)
+ })
+}
+
+fn date_part_timestamp_ns(
+ array: &PrimitiveArray<TimestampNanosecondType>,
+ part: DatePart,
+) -> Int32Array {
+ let map_func = get_naive_date_time_part_extract_fn(part);
+ array.unary_opt(|d| timestamp_ns_to_datetime(d).map(map_func))
+}
+
+fn date_part_timestamp_ns_tz(
+ array: &PrimitiveArray<TimestampNanosecondType>,
+ part: DatePart,
+ tz: Tz,
+) -> Int32Array {
Review Comment:
That's a good point, will look into it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]