This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 3e247958b1 feat: `date_bin` supports MonthDayNano, microsecond and
nanosecond units (#5698)
3e247958b1 is described below
commit 3e247958b1dc30b67b309f712be75f3c725d427e
Author: Stuart Carnie <[email protected]>
AuthorDate: Fri Mar 24 01:31:19 2023 +1100
feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units
(#5698)
---
datafusion/common/src/parsers.rs | 37 ++++++++++----
.../tests/sqllogictests/test_files/timestamps.slt | 22 ++++++++
datafusion/expr/src/function.rs | 9 ++++
.../physical-expr/src/datetime_expressions.rs | 58 ++++++++++++++++++++--
4 files changed, 111 insertions(+), 15 deletions(-)
diff --git a/datafusion/common/src/parsers.rs b/datafusion/common/src/parsers.rs
index 6a61da970d..fbc663e1aa 100644
--- a/datafusion/common/src/parsers.rs
+++ b/datafusion/common/src/parsers.rs
@@ -77,19 +77,22 @@ impl CompressionTypeVariant {
}
}
+#[rustfmt::skip]
#[derive(Clone, Copy)]
#[repr(u16)]
enum IntervalType {
- Century = 0b_00_0000_0001,
- Decade = 0b_00_0000_0010,
- Year = 0b_00_0000_0100,
- Month = 0b_00_0000_1000,
- Week = 0b_00_0001_0000,
- Day = 0b_00_0010_0000,
- Hour = 0b_00_0100_0000,
- Minute = 0b_00_1000_0000,
- Second = 0b_01_0000_0000,
- Millisecond = 0b_10_0000_0000,
+ Century = 0b_0000_0000_0001,
+ Decade = 0b_0000_0000_0010,
+ Year = 0b_0000_0000_0100,
+ Month = 0b_0000_0000_1000,
+ Week = 0b_0000_0001_0000,
+ Day = 0b_0000_0010_0000,
+ Hour = 0b_0000_0100_0000,
+ Minute = 0b_0000_1000_0000,
+ Second = 0b_0001_0000_0000,
+ Millisecond = 0b_0010_0000_0000,
+ Microsecond = 0b_0100_0000_0000,
+ Nanosecond = 0b_1000_0000_0000,
}
impl FromStr for IntervalType {
@@ -107,6 +110,8 @@ impl FromStr for IntervalType {
"minute" | "minutes" => Ok(Self::Minute),
"second" | "seconds" => Ok(Self::Second),
"millisecond" | "milliseconds" => Ok(Self::Millisecond),
+ "microsecond" | "microseconds" => Ok(Self::Microsecond),
+ "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
_ => Err(DataFusionError::NotImplemented(format!(
"Unknown interval type: {s}"
))),
@@ -194,6 +199,8 @@ pub fn parse_interval(leading_field: &str, value: &str) ->
Result<ScalarValue> {
}
IntervalType::Second => Ok((0, 0, interval_period *
NANOS_PER_SECOND)),
IntervalType::Millisecond => Ok((0, 0, interval_period *
1_000_000f64)),
+ IntervalType::Microsecond => Ok((0, 0, interval_period *
1_000f64)),
+ IntervalType::Nanosecond => Ok((0, 0, interval_period)),
}
};
@@ -372,6 +379,16 @@ mod test {
ScalarValue::new_interval_mdn(12, 1, 1_00 * 1_000)
);
+ assert_eq!(
+ parse_interval("months", "1 year 1 day 1 microsecond").unwrap(),
+ ScalarValue::new_interval_mdn(12, 1, 1_000)
+ );
+
+ assert_eq!(
+ parse_interval("months", "1 year 1 day 5 nanoseconds").unwrap(),
+ ScalarValue::new_interval_mdn(12, 1, 5)
+ );
+
assert_eq!(
parse_interval("months", "1 month -1 second").unwrap(),
ScalarValue::new_interval_mdn(1, 0, -1_000_000_000)
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index 7ca513b99b..40acda2d8b 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -76,6 +76,28 @@ SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03
14:38:50Z', TIMESTA
----
2022-08-03T14:30:00
+# Supports Month-Day-Nano nanosecond interval
+query P
+SELECT DATE_BIN(INTERVAL '10 nanoseconds', TIMESTAMP '2022-08-03
14:38:50.000000016Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000000010
+
+# Supports Month-Day-Nano nanosecond interval via fractions
+query P
+SELECT DATE_BIN(INTERVAL '0.000000010 seconds', TIMESTAMP '2022-08-03
14:38:50.000000016Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000000010
+
+# Supports Month-Day-Nano microsecond interval
+query P
+SELECT DATE_BIN(INTERVAL '5 microseconds', TIMESTAMP '2022-08-03
14:38:50.000006Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000005
+
+# Does not support months for Month-Day-Nano interval
+statement error This feature is not implemented: DATE_BIN stride does not
support month intervals
+SELECT DATE_BIN(INTERVAL '1 month 5 nanoseconds', TIMESTAMP '2022-08-03
14:38:50.000000006Z', TIMESTAMP '1970-01-01T00:00:00Z')
+
# Can coerce string interval arguments
query P
SELECT DATE_BIN('15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP
'1970-01-01T00:00:00Z')
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index defb41d452..03932ccc4d 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -455,10 +455,19 @@ pub fn signature(fun: &BuiltinScalarFunction) ->
Signature {
DataType::Timestamp(TimeUnit::Nanosecond, None),
DataType::Timestamp(TimeUnit::Nanosecond, None),
]),
+ TypeSignature::Exact(vec![
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ ]),
TypeSignature::Exact(vec![
DataType::Interval(IntervalUnit::DayTime),
DataType::Timestamp(TimeUnit::Nanosecond, None),
]),
+ TypeSignature::Exact(vec![
+ DataType::Interval(IntervalUnit::MonthDayNano),
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ ]),
],
fun.volatility(),
),
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs
b/datafusion/physical-expr/src/datetime_expressions.rs
index 2763ff981c..c0df41df61 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -28,8 +28,8 @@ use arrow::{
compute::kernels::cast_utils::string_to_timestamp_nanos,
datatypes::{
ArrowNumericType, ArrowPrimitiveType, ArrowTemporalType, DataType,
- IntervalDayTimeType, TimestampMicrosecondType,
TimestampMillisecondType,
- TimestampNanosecondType, TimestampSecondType,
+ IntervalDayTimeType, IntervalMonthDayNanoType,
TimestampMicrosecondType,
+ TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
},
};
use chrono::prelude::*;
@@ -354,6 +354,24 @@ fn date_bin_impl(
}
}
}
+ ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(v))) => {
+ let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
+ if months != 0 {
+ return Err(DataFusionError::NotImplemented(
+ "DATE_BIN stride does not support month
intervals".to_string(),
+ ));
+ }
+ let nanos = (Duration::days(days as i64) +
Duration::nanoseconds(nanos))
+ .num_nanoseconds();
+ match nanos {
+ Some(v) => v,
+ _ => {
+ return Err(DataFusionError::Execution(
+ "DATE_BIN stride argument is too large".to_string(),
+ ))
+ }
+ }
+ }
ColumnarValue::Scalar(v) => {
return Err(DataFusionError::Execution(format!(
"DATE_BIN expects stride argument to be an INTERVAL but got
{}",
@@ -802,6 +820,14 @@ mod tests {
]);
assert!(res.is_ok());
+ // stride supports month-day-nano
+ let res = date_bin(&[
+ ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(1))),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ]);
+ assert!(res.is_ok());
+
//
// Fallible test cases
//
@@ -816,16 +842,16 @@ mod tests {
// stride: invalid type
let res = date_bin(&[
- ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(1))),
+ ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
]);
assert_eq!(
res.err().unwrap().to_string(),
- "Execution error: DATE_BIN expects stride argument to be an
INTERVAL but got Interval(MonthDayNano)"
+ "Execution error: DATE_BIN expects stride argument to be an
INTERVAL but got Interval(YearMonth)"
);
- // stride: overflow
+ // stride: overflow of day-time interval
let res = date_bin(&[
ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(i64::MAX))),
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
@@ -836,6 +862,28 @@ mod tests {
"Execution error: DATE_BIN stride argument is too large"
);
+ // stride: overflow of month-day-nano interval
+ let res = date_bin(&[
+ ColumnarValue::Scalar(ScalarValue::new_interval_mdn(0, i32::MAX,
1)),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ]);
+ assert_eq!(
+ res.err().unwrap().to_string(),
+ "Execution error: DATE_BIN stride argument is too large"
+ );
+
+ // stride: month intervals
+ let res = date_bin(&[
+ ColumnarValue::Scalar(ScalarValue::new_interval_mdn(1, 1, 1)),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1),
None)),
+ ]);
+ assert_eq!(
+ res.err().unwrap().to_string(),
+ "This feature is not implemented: DATE_BIN stride does not support
month intervals"
+ );
+
// origin: invalid type
let res = date_bin(&[
ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(1))),