This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 3e247958b1 feat: `date_bin` supports MonthDayNano, microsecond and 
nanosecond units (#5698)
3e247958b1 is described below

commit 3e247958b1dc30b67b309f712be75f3c725d427e
Author: Stuart Carnie <[email protected]>
AuthorDate: Fri Mar 24 01:31:19 2023 +1100

    feat: `date_bin` supports MonthDayNano, microsecond and nanosecond units 
(#5698)
---
 datafusion/common/src/parsers.rs                   | 37 ++++++++++----
 .../tests/sqllogictests/test_files/timestamps.slt  | 22 ++++++++
 datafusion/expr/src/function.rs                    |  9 ++++
 .../physical-expr/src/datetime_expressions.rs      | 58 ++++++++++++++++++++--
 4 files changed, 111 insertions(+), 15 deletions(-)

diff --git a/datafusion/common/src/parsers.rs b/datafusion/common/src/parsers.rs
index 6a61da970d..fbc663e1aa 100644
--- a/datafusion/common/src/parsers.rs
+++ b/datafusion/common/src/parsers.rs
@@ -77,19 +77,22 @@ impl CompressionTypeVariant {
     }
 }
 
+#[rustfmt::skip]
 #[derive(Clone, Copy)]
 #[repr(u16)]
 enum IntervalType {
-    Century = 0b_00_0000_0001,
-    Decade = 0b_00_0000_0010,
-    Year = 0b_00_0000_0100,
-    Month = 0b_00_0000_1000,
-    Week = 0b_00_0001_0000,
-    Day = 0b_00_0010_0000,
-    Hour = 0b_00_0100_0000,
-    Minute = 0b_00_1000_0000,
-    Second = 0b_01_0000_0000,
-    Millisecond = 0b_10_0000_0000,
+    Century     = 0b_0000_0000_0001,
+    Decade      = 0b_0000_0000_0010,
+    Year        = 0b_0000_0000_0100,
+    Month       = 0b_0000_0000_1000,
+    Week        = 0b_0000_0001_0000,
+    Day         = 0b_0000_0010_0000,
+    Hour        = 0b_0000_0100_0000,
+    Minute      = 0b_0000_1000_0000,
+    Second      = 0b_0001_0000_0000,
+    Millisecond = 0b_0010_0000_0000,
+    Microsecond = 0b_0100_0000_0000,
+    Nanosecond  = 0b_1000_0000_0000,
 }
 
 impl FromStr for IntervalType {
@@ -107,6 +110,8 @@ impl FromStr for IntervalType {
             "minute" | "minutes" => Ok(Self::Minute),
             "second" | "seconds" => Ok(Self::Second),
             "millisecond" | "milliseconds" => Ok(Self::Millisecond),
+            "microsecond" | "microseconds" => Ok(Self::Microsecond),
+            "nanosecond" | "nanoseconds" => Ok(Self::Nanosecond),
             _ => Err(DataFusionError::NotImplemented(format!(
                 "Unknown interval type: {s}"
             ))),
@@ -194,6 +199,8 @@ pub fn parse_interval(leading_field: &str, value: &str) -> 
Result<ScalarValue> {
             }
             IntervalType::Second => Ok((0, 0, interval_period * 
NANOS_PER_SECOND)),
             IntervalType::Millisecond => Ok((0, 0, interval_period * 
1_000_000f64)),
+            IntervalType::Microsecond => Ok((0, 0, interval_period * 
1_000f64)),
+            IntervalType::Nanosecond => Ok((0, 0, interval_period)),
         }
     };
 
@@ -372,6 +379,16 @@ mod test {
             ScalarValue::new_interval_mdn(12, 1, 1_00 * 1_000)
         );
 
+        assert_eq!(
+            parse_interval("months", "1 year 1 day 1 microsecond").unwrap(),
+            ScalarValue::new_interval_mdn(12, 1, 1_000)
+        );
+
+        assert_eq!(
+            parse_interval("months", "1 year 1 day 5 nanoseconds").unwrap(),
+            ScalarValue::new_interval_mdn(12, 1, 5)
+        );
+
         assert_eq!(
             parse_interval("months", "1 month -1 second").unwrap(),
             ScalarValue::new_interval_mdn(1, 0, -1_000_000_000)
diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt 
b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
index 7ca513b99b..40acda2d8b 100644
--- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt
@@ -76,6 +76,28 @@ SELECT DATE_BIN(INTERVAL '15 minutes', TIMESTAMP '2022-08-03 
14:38:50Z', TIMESTA
 ----
 2022-08-03T14:30:00
 
+# Supports Month-Day-Nano nanosecond interval
+query P
+SELECT DATE_BIN(INTERVAL '10 nanoseconds', TIMESTAMP '2022-08-03 
14:38:50.000000016Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000000010
+
+# Supports Month-Day-Nano nanosecond interval via fractions
+query P
+SELECT DATE_BIN(INTERVAL '0.000000010 seconds', TIMESTAMP '2022-08-03 
14:38:50.000000016Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000000010
+
+# Supports Month-Day-Nano microsecond interval
+query P
+SELECT DATE_BIN(INTERVAL '5 microseconds', TIMESTAMP '2022-08-03 
14:38:50.000006Z', TIMESTAMP '1970-01-01T00:00:00Z')
+----
+2022-08-03T14:38:50.000005
+
+# Does not support months for Month-Day-Nano interval
+statement error This feature is not implemented: DATE_BIN stride does not 
support month intervals
+SELECT DATE_BIN(INTERVAL '1 month 5 nanoseconds', TIMESTAMP '2022-08-03 
14:38:50.000000006Z', TIMESTAMP '1970-01-01T00:00:00Z')
+
 # Can coerce string interval arguments
 query P
 SELECT DATE_BIN('15 minutes', TIMESTAMP '2022-08-03 14:38:50Z', TIMESTAMP 
'1970-01-01T00:00:00Z')
diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs
index defb41d452..03932ccc4d 100644
--- a/datafusion/expr/src/function.rs
+++ b/datafusion/expr/src/function.rs
@@ -455,10 +455,19 @@ pub fn signature(fun: &BuiltinScalarFunction) -> 
Signature {
                     DataType::Timestamp(TimeUnit::Nanosecond, None),
                     DataType::Timestamp(TimeUnit::Nanosecond, None),
                 ]),
+                TypeSignature::Exact(vec![
+                    DataType::Interval(IntervalUnit::MonthDayNano),
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                ]),
                 TypeSignature::Exact(vec![
                     DataType::Interval(IntervalUnit::DayTime),
                     DataType::Timestamp(TimeUnit::Nanosecond, None),
                 ]),
+                TypeSignature::Exact(vec![
+                    DataType::Interval(IntervalUnit::MonthDayNano),
+                    DataType::Timestamp(TimeUnit::Nanosecond, None),
+                ]),
             ],
             fun.volatility(),
         ),
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs 
b/datafusion/physical-expr/src/datetime_expressions.rs
index 2763ff981c..c0df41df61 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -28,8 +28,8 @@ use arrow::{
     compute::kernels::cast_utils::string_to_timestamp_nanos,
     datatypes::{
         ArrowNumericType, ArrowPrimitiveType, ArrowTemporalType, DataType,
-        IntervalDayTimeType, TimestampMicrosecondType, 
TimestampMillisecondType,
-        TimestampNanosecondType, TimestampSecondType,
+        IntervalDayTimeType, IntervalMonthDayNanoType, 
TimestampMicrosecondType,
+        TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
     },
 };
 use chrono::prelude::*;
@@ -354,6 +354,24 @@ fn date_bin_impl(
                 }
             }
         }
+        ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(v))) => {
+            let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(*v);
+            if months != 0 {
+                return Err(DataFusionError::NotImplemented(
+                    "DATE_BIN stride does not support month 
intervals".to_string(),
+                ));
+            }
+            let nanos = (Duration::days(days as i64) + 
Duration::nanoseconds(nanos))
+                .num_nanoseconds();
+            match nanos {
+                Some(v) => v,
+                _ => {
+                    return Err(DataFusionError::Execution(
+                        "DATE_BIN stride argument is too large".to_string(),
+                    ))
+                }
+            }
+        }
         ColumnarValue::Scalar(v) => {
             return Err(DataFusionError::Execution(format!(
                 "DATE_BIN expects stride argument to be an INTERVAL but got 
{}",
@@ -802,6 +820,14 @@ mod tests {
         ]);
         assert!(res.is_ok());
 
+        // stride supports month-day-nano
+        let res = date_bin(&[
+            ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(1))),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+        ]);
+        assert!(res.is_ok());
+
         //
         // Fallible test cases
         //
@@ -816,16 +842,16 @@ mod tests {
 
         // stride: invalid type
         let res = date_bin(&[
-            ColumnarValue::Scalar(ScalarValue::IntervalMonthDayNano(Some(1))),
+            ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
             ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
             ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
         ]);
         assert_eq!(
             res.err().unwrap().to_string(),
-            "Execution error: DATE_BIN expects stride argument to be an 
INTERVAL but got Interval(MonthDayNano)"
+            "Execution error: DATE_BIN expects stride argument to be an 
INTERVAL but got Interval(YearMonth)"
         );
 
-        // stride: overflow
+        // stride: overflow of day-time interval
         let res = date_bin(&[
             
ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(i64::MAX))),
             ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
@@ -836,6 +862,28 @@ mod tests {
             "Execution error: DATE_BIN stride argument is too large"
         );
 
+        // stride: overflow of month-day-nano interval
+        let res = date_bin(&[
+            ColumnarValue::Scalar(ScalarValue::new_interval_mdn(0, i32::MAX, 
1)),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+        ]);
+        assert_eq!(
+            res.err().unwrap().to_string(),
+            "Execution error: DATE_BIN stride argument is too large"
+        );
+
+        // stride: month intervals
+        let res = date_bin(&[
+            ColumnarValue::Scalar(ScalarValue::new_interval_mdn(1, 1, 1)),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), 
None)),
+        ]);
+        assert_eq!(
+            res.err().unwrap().to_string(),
+            "This feature is not implemented: DATE_BIN stride does not support 
month intervals"
+        );
+
         // origin: invalid type
         let res = date_bin(&[
             ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(1))),

Reply via email to