This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new dd2de91bf4 Add support for Arrow Duration type in Substrait (#16503)
dd2de91bf4 is described below

commit dd2de91bf4dcf43c32306aae67d49c650a3846a3
Author: Joseph Koshakow <kosh...@gmail.com>
AuthorDate: Wed Jun 25 15:33:30 2025 -0400

    Add support for Arrow Duration type in Substrait (#16503)
    
    * Add support for Arrow Duration type in Substrait
    
    This commit adds support for Arrow Duration types in Substrait plans.
    Substrait has no equivalent to the Duration type, which only includes
    time-based information (i.e. some multiple of nanoseconds). However,
    the Substrait Interval Day type is very similar, it stores day and
    time-based information. This commit converts Arrow Duration types into
    Substrait Interval Day types, with a Duration specific type variation
    reference, so that it can round trip back to a Duration.
    
    An alternative approach would be to use a new Substrait user defined
    type.
    
    Resolves #16285
    
    * Respond to comments
    
    * Fix docs
---
 .../substrait/src/logical_plan/consumer/types.rs   | 26 ++++++++++++++++++++--
 .../substrait/src/logical_plan/producer/types.rs   | 25 +++++++++++++++++++--
 datafusion/substrait/src/variation_const.rs        |  9 ++++++++
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs 
b/datafusion/substrait/src/logical_plan/consumer/types.rs
index 7bc30e433d..4ea479e7cc 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -21,7 +21,8 @@ use super::SubstraitConsumer;
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
-    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DEFAULT_CONTAINER_TYPE_VARIATION_REF, 
DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
+    DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
     INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
@@ -213,7 +214,28 @@ pub fn from_substrait_type(
             r#type::Kind::IntervalYear(_) => {
                 Ok(DataType::Interval(IntervalUnit::YearMonth))
             }
-            r#type::Kind::IntervalDay(_) => 
Ok(DataType::Interval(IntervalUnit::DayTime)),
+            r#type::Kind::IntervalDay(i) => match i.type_variation_reference {
+                DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF => {
+                    Ok(DataType::Interval(IntervalUnit::DayTime))
+                }
+                DURATION_INTERVAL_DAY_TYPE_VARIATION_REF => {
+                    let duration_unit = match i.precision {
+                        Some(0) => Ok(TimeUnit::Second),
+                        Some(3) => Ok(TimeUnit::Millisecond),
+                        Some(6) => Ok(TimeUnit::Microsecond),
+                        Some(9) => Ok(TimeUnit::Nanosecond),
+                        p => {
+                            not_impl_err!(
+                                "Unsupported Substrait precision {p:?} for 
Duration"
+                            )
+                        }
+                    }?;
+                    Ok(DataType::Duration(duration_unit))
+                }
+                v => not_impl_err!(
+                    "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
+                ),
+            },
             r#type::Kind::IntervalCompound(_) => {
                 Ok(DataType::Interval(IntervalUnit::MonthDayNano))
             }
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs 
b/datafusion/substrait/src/logical_plan/producer/types.rs
index 61b7a79095..6a63bbef5d 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -19,7 +19,8 @@ use crate::logical_plan::producer::utils::flatten_names;
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
-    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DEFAULT_CONTAINER_TYPE_VARIATION_REF, 
DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
+    DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
     LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
@@ -153,7 +154,7 @@ pub(crate) fn to_substrait_type(
                 }),
                 IntervalUnit::DayTime => Ok(substrait::proto::Type {
                     kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
-                        type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                        type_variation_reference: 
DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
                         nullability,
                         precision: Some(3), // DayTime precision is always 
milliseconds
                     })),
@@ -171,6 +172,21 @@ pub(crate) fn to_substrait_type(
                 }
             }
         }
+        DataType::Duration(duration_unit) => {
+            let precision = match duration_unit {
+                TimeUnit::Second => 0,
+                TimeUnit::Millisecond => 3,
+                TimeUnit::Microsecond => 6,
+                TimeUnit::Nanosecond => 9,
+            };
+            Ok(substrait::proto::Type {
+                kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
+                    type_variation_reference: 
DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
+                    nullability,
+                    precision: Some(precision),
+                })),
+            })
+        }
         DataType::Binary => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Binary(r#type::Binary {
                 type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF,
@@ -388,6 +404,11 @@ mod tests {
         round_trip_type(DataType::Interval(IntervalUnit::MonthDayNano))?;
         round_trip_type(DataType::Interval(IntervalUnit::DayTime))?;
 
+        round_trip_type(DataType::Duration(TimeUnit::Second))?;
+        round_trip_type(DataType::Duration(TimeUnit::Millisecond))?;
+        round_trip_type(DataType::Duration(TimeUnit::Microsecond))?;
+        round_trip_type(DataType::Duration(TimeUnit::Nanosecond))?;
+
         Ok(())
     }
 
diff --git a/datafusion/substrait/src/variation_const.rs 
b/datafusion/substrait/src/variation_const.rs
index e5bebf8e11..efde8efe50 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -55,6 +55,15 @@ pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
 pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
 pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
 pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
+/// Used for the arrow type [`DataType::Interval`] with 
[`IntervalUnit::DayTime`].
+///
+/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
+/// [`IntervalUnit::DayTime`]: 
datafusion::arrow::datatypes::IntervalUnit::DayTime
+pub const DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF: u32 = 0;
+/// Used for the arrow type [`DataType::Duration`].
+///
+/// [`DataType::Duration`]: datafusion::arrow::datatypes::DataType::Duration
+pub const DURATION_INTERVAL_DAY_TYPE_VARIATION_REF: u32 = 1;
 
 // For [user-defined 
types](https://substrait.io/types/type_classes/#user-defined-types).
 /// For [`DataType::Interval`] with [`IntervalUnit::YearMonth`].


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to