This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new dc373a3550 Support for `extract(x from time)` / `date_part` from time
types (#8693)
dc373a3550 is described below
commit dc373a3550610ce041fd73a1eabe08b096d6ed27
Author: Jeffrey Vo <[email protected]>
AuthorDate: Fri Mar 22 01:13:44 2024 +1100
Support for `extract(x from time)` / `date_part` from time types (#8693)
* Initial support for `extract(x from time)`
* Update function docs
* Add extract tests
---
datafusion/common/src/cast.rs | 37 +++-
datafusion/functions/src/datetime/date_part.rs | 31 ++-
datafusion/sqllogictest/test_files/expr.slt | 287 +++++++++++++++++++++++++
docs/source/user-guide/sql/scalar_functions.md | 27 +--
4 files changed, 345 insertions(+), 37 deletions(-)
diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs
index 088f03e002..0dc0532bbb 100644
--- a/datafusion/common/src/cast.rs
+++ b/datafusion/common/src/cast.rs
@@ -24,17 +24,18 @@ use crate::{downcast_value, DataFusionError, Result};
use arrow::{
array::{
Array, BinaryArray, BooleanArray, Date32Array, Date64Array,
Decimal128Array,
- DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
Float32Array,
- Float64Array, GenericBinaryArray, GenericListArray, GenericStringArray,
- Int32Array, Int64Array, IntervalDayTimeArray,
IntervalMonthDayNanoArray,
- IntervalYearMonthArray, LargeListArray, ListArray, MapArray, NullArray,
- OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
- TimestampMicrosecondArray, TimestampMillisecondArray,
TimestampNanosecondArray,
- TimestampSecondArray, UInt32Array, UInt64Array, UInt8Array, UnionArray,
+ Decimal256Array, DictionaryArray, FixedSizeBinaryArray,
FixedSizeListArray,
+ Float32Array, Float64Array, GenericBinaryArray, GenericListArray,
+ GenericStringArray, Int32Array, Int64Array, IntervalDayTimeArray,
+ IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeListArray,
ListArray,
+ MapArray, NullArray, OffsetSizeTrait, PrimitiveArray, StringArray,
StructArray,
+ Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+ Time64NanosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray,
+ TimestampNanosecondArray, TimestampSecondArray, UInt32Array,
UInt64Array,
+ UInt8Array, UnionArray,
},
datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
};
-use arrow_array::Decimal256Array;
// Downcast ArrayRef to Date32Array
pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> {
@@ -154,6 +155,26 @@ pub fn as_union_array(array: &dyn Array) ->
Result<&UnionArray> {
Ok(downcast_value!(array, UnionArray))
}
+// Downcast ArrayRef to Time32SecondArray
+pub fn as_time32_second_array(array: &dyn Array) -> Result<&Time32SecondArray>
{
+ Ok(downcast_value!(array, Time32SecondArray))
+}
+
+// Downcast ArrayRef to Time32MillisecondArray
+pub fn as_time32_millisecond_array(array: &dyn Array) ->
Result<&Time32MillisecondArray> {
+ Ok(downcast_value!(array, Time32MillisecondArray))
+}
+
+// Downcast ArrayRef to Time64MicrosecondArray
+pub fn as_time64_microsecond_array(array: &dyn Array) ->
Result<&Time64MicrosecondArray> {
+ Ok(downcast_value!(array, Time64MicrosecondArray))
+}
+
+// Downcast ArrayRef to Time64NanosecondArray
+pub fn as_time64_nanosecond_array(array: &dyn Array) ->
Result<&Time64NanosecondArray> {
+ Ok(downcast_value!(array, Time64NanosecondArray))
+}
+
// Downcast ArrayRef to TimestampNanosecondArray
pub fn as_timestamp_nanosecond_array(
array: &dyn Array,
diff --git a/datafusion/functions/src/datetime/date_part.rs
b/datafusion/functions/src/datetime/date_part.rs
index 5d2719bf03..b41f7e13cf 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -20,14 +20,17 @@ use std::sync::Arc;
use arrow::array::{Array, ArrayRef, Float64Array};
use arrow::compute::{binary, cast, date_part, DatePart};
-use arrow::datatypes::DataType::{Date32, Date64, Float64, Timestamp, Utf8};
+use arrow::datatypes::DataType::{
+ Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8,
+};
use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
use arrow::datatypes::{DataType, TimeUnit};
use datafusion_common::cast::{
- as_date32_array, as_date64_array, as_int32_array,
as_timestamp_microsecond_array,
- as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
- as_timestamp_second_array,
+ as_date32_array, as_date64_array, as_int32_array,
as_time32_millisecond_array,
+ as_time32_second_array, as_time64_microsecond_array,
as_time64_nanosecond_array,
+ as_timestamp_microsecond_array, as_timestamp_millisecond_array,
+ as_timestamp_nanosecond_array, as_timestamp_second_array,
};
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::TypeSignature::Exact;
@@ -68,6 +71,10 @@ impl DatePartFunc {
]),
Exact(vec![Utf8, Date64]),
Exact(vec![Utf8, Date32]),
+ Exact(vec![Utf8, Time32(Second)]),
+ Exact(vec![Utf8, Time32(Millisecond)]),
+ Exact(vec![Utf8, Time64(Microsecond)]),
+ Exact(vec![Utf8, Time64(Nanosecond)]),
],
Volatility::Immutable,
),
@@ -149,12 +156,9 @@ fn date_part_f64(array: &dyn Array, part: DatePart) ->
Result<ArrayRef> {
Ok(cast(date_part(array, part)?.as_ref(), &Float64)?)
}
-/// invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
+/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
/// result to a total number of seconds, milliseconds, microseconds or
/// nanoseconds
-///
-/// # Panics
-/// If `array` is not a temporal type such as Timestamp or Date32
fn seconds(array: &dyn Array, unit: TimeUnit) -> Result<ArrayRef> {
let sf = match unit {
Second => 1_f64,
@@ -163,6 +167,7 @@ fn seconds(array: &dyn Array, unit: TimeUnit) ->
Result<ArrayRef> {
Nanosecond => 1_000_000_000_f64,
};
let secs = date_part(array, DatePart::Second)?;
+ // This assumes array is primitive and not a dictionary
let secs = as_int32_array(secs.as_ref())?;
let subsecs = date_part(array, DatePart::Nanosecond)?;
let subsecs = as_int32_array(subsecs.as_ref())?;
@@ -189,6 +194,16 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
}
Date32 => as_date32_array(array)?.unary(|x| x as f64 *
SECONDS_IN_A_DAY),
Date64 => as_date64_array(array)?.unary(|x| x as f64 / 1_000_f64),
+ Time32(Second) => as_time32_second_array(array)?.unary(|x| x as f64),
+ Time32(Millisecond) => {
+ as_time32_millisecond_array(array)?.unary(|x| x as f64 / 1_000_f64)
+ }
+ Time64(Microsecond) => {
+ as_time64_microsecond_array(array)?.unary(|x| x as f64 /
1_000_000_f64)
+ }
+ Time64(Nanosecond) => {
+ as_time64_nanosecond_array(array)?.unary(|x| x as f64 /
1_000_000_000_f64)
+ }
d => return exec_err!("Can not convert {d:?} to epoch"),
};
Ok(Arc::new(f))
diff --git a/datafusion/sqllogictest/test_files/expr.slt
b/datafusion/sqllogictest/test_files/expr.slt
index 73fb5eec97..d6343f9a3f 100644
--- a/datafusion/sqllogictest/test_files/expr.slt
+++ b/datafusion/sqllogictest/test_files/expr.slt
@@ -939,6 +939,293 @@ SELECT date_part('nanosecond',
'2020-09-08T12:00:12.12345678+00:00')
----
12123456780
+# test_date_part_time
+
+## time32 seconds
+query R
+SELECT date_part('hour', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+23
+
+query R
+SELECT extract(hour from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+23
+
+query R
+SELECT date_part('minute', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+32
+
+query R
+SELECT extract(minute from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+32
+
+query R
+SELECT date_part('second', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50
+
+query R
+SELECT extract(second from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50
+
+query R
+SELECT date_part('millisecond', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000
+
+query R
+SELECT extract(millisecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000
+
+query R
+SELECT date_part('microsecond', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000000
+
+query R
+SELECT extract(microsecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000000
+
+query R
+SELECT date_part('nanosecond', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000000000
+
+query R
+SELECT extract(nanosecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+50000000000
+
+query R
+SELECT date_part('epoch', arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+84770
+
+query R
+SELECT extract(epoch from arrow_cast('23:32:50'::time, 'Time32(Second)'))
+----
+84770
+
+## time32 milliseconds
+query R
+SELECT date_part('hour', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+23
+
+query R
+SELECT extract(hour from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+23
+
+query R
+SELECT date_part('minute', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+32
+
+query R
+SELECT extract(minute from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+32
+
+query R
+SELECT date_part('second', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50.123
+
+query R
+SELECT extract(second from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50.123
+
+query R
+SELECT date_part('millisecond', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123
+
+query R
+SELECT extract(millisecond from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123
+
+query R
+SELECT date_part('microsecond', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123000
+
+query R
+SELECT extract(microsecond from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123000
+
+query R
+SELECT date_part('nanosecond', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123000000
+
+query R
+SELECT extract(nanosecond from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+50123000000
+
+query R
+SELECT date_part('epoch', arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+84770.123
+
+query R
+SELECT extract(epoch from arrow_cast('23:32:50.123'::time,
'Time32(Millisecond)'))
+----
+84770.123
+
+## time64 microseconds
+query R
+SELECT date_part('hour', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+23
+
+query R
+SELECT extract(hour from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+23
+
+query R
+SELECT date_part('minute', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+32
+
+query R
+SELECT extract(minute from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+32
+
+query R
+SELECT date_part('second', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50.123456
+
+query R
+SELECT extract(second from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50.123456
+
+query R
+SELECT date_part('millisecond', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123.456
+
+query R
+SELECT extract(millisecond from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123.456
+
+query R
+SELECT date_part('microsecond', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123456
+
+query R
+SELECT extract(microsecond from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123456
+
+query R
+SELECT date_part('nanosecond', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123456000
+
+query R
+SELECT extract(nanosecond from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+50123456000
+
+query R
+SELECT date_part('epoch', arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+84770.123456
+
+query R
+SELECT extract(epoch from arrow_cast('23:32:50.123456'::time,
'Time64(Microsecond)'))
+----
+84770.123456
+
+## time64 nanoseconds
+query R
+SELECT date_part('hour', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+23
+
+query R
+SELECT extract(hour from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+23
+
+query R
+SELECT date_part('minute', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+32
+
+query R
+SELECT extract(minute from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+32
+
+query R
+SELECT date_part('second', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50.123456789
+
+query R
+SELECT extract(second from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50.123456789
+
+query R
+SELECT date_part('millisecond', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123.456789
+
+query R
+SELECT extract(millisecond from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123.456789
+
+# just some floating point stuff happening in the result here
+query R
+SELECT date_part('microsecond', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123456.789000005
+
+query R
+SELECT extract(microsecond from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123456.789000005
+
+query R
+SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123456789
+
+query R
+SELECT extract(nanosecond from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+50123456789
+
+query R
+SELECT date_part('epoch', arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+84770.123456789
+
+query R
+SELECT extract(epoch from arrow_cast('23:32:50.123456789'::time,
'Time64(Nanosecond)'))
+----
+84770.123456789
+
# test_extract_epoch
query R
diff --git a/docs/source/user-guide/sql/scalar_functions.md
b/docs/source/user-guide/sql/scalar_functions.md
index b63fa9950a..d4570dbc35 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1624,34 +1624,19 @@ _Alias of [date_part](#date_part)._
### `extract`
Returns a sub-field from a time value as an integer.
-Similar to `date_part`, but with different arguments.
```
extract(field FROM source)
```
-#### Arguments
-
-- **field**: Part or field of the date to return.
- The following date fields are supported:
+Equivalent to calling `date_part('field', source)`. For example, these are
equivalent:
- - year
- - quarter _(emits value in inclusive range [1, 4] based on which quartile of
the year the date is in)_
- - month
- - week _(week of the year)_
- - day _(day of the month)_
- - hour
- - minute
- - second
- - millisecond
- - microsecond
- - nanosecond
- - dow _(day of the week)_
- - doy _(day of the year)_
- - epoch _(seconds since Unix epoch)_
+```sql
+extract(day FROM '2024-04-13'::date)
+date_part('day', '2024-04-13'::date)
+```
-- **source**: Source time expression to operate on.
- Can be a constant, column, or function.
+See [date_part](#date_part).
### `make_date`