This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new dd2de91bf4 Add support for Arrow Duration type in Substrait (#16503) dd2de91bf4 is described below commit dd2de91bf4dcf43c32306aae67d49c650a3846a3 Author: Joseph Koshakow <kosh...@gmail.com> AuthorDate: Wed Jun 25 15:33:30 2025 -0400 Add support for Arrow Duration type in Substrait (#16503) * Add support for Arrow Duration type in Substrait This commit adds support for Arrow Duration types in Substrait plans. Substrait has no equivalent to the Duration type, which only includes time-based information (i.e. some multiple of nanoseconds). However, the Substrait Interval Day type is very similar, it stores day and time-based information. This commit converts Arrow Duration types into Substrait Interval Day types, with a Duration specific type variation reference, so that it can round trip back to a Duration. An alternative approach would be to use a new Substrait user defined type. Resolves #16285 * Respond to comments * Fix docs --- .../substrait/src/logical_plan/consumer/types.rs | 26 ++++++++++++++++++++-- .../substrait/src/logical_plan/producer/types.rs | 25 +++++++++++++++++++-- datafusion/substrait/src/variation_const.rs | 9 ++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index 7bc30e433d..4ea479e7cc 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -21,7 +21,8 @@ use super::SubstraitConsumer; use crate::variation_const::{ DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, - DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, + DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, @@ -213,7 +214,28 @@ pub fn from_substrait_type( r#type::Kind::IntervalYear(_) => { Ok(DataType::Interval(IntervalUnit::YearMonth)) } - r#type::Kind::IntervalDay(_) => Ok(DataType::Interval(IntervalUnit::DayTime)), + r#type::Kind::IntervalDay(i) => match i.type_variation_reference { + DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF => { + Ok(DataType::Interval(IntervalUnit::DayTime)) + } + DURATION_INTERVAL_DAY_TYPE_VARIATION_REF => { + let duration_unit = match i.precision { + Some(0) => Ok(TimeUnit::Second), + Some(3) => Ok(TimeUnit::Millisecond), + Some(6) => Ok(TimeUnit::Microsecond), + Some(9) => Ok(TimeUnit::Nanosecond), + p => { + not_impl_err!( + "Unsupported Substrait precision {p:?} for Duration" + ) + } + }?; + Ok(DataType::Duration(duration_unit)) + } + v => not_impl_err!( + "Unsupported Substrait type variation {v} of type {s_kind:?}" + ), + }, r#type::Kind::IntervalCompound(_) => { Ok(DataType::Interval(IntervalUnit::MonthDayNano)) } diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 61b7a79095..6a63bbef5d 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -19,7 +19,8 @@ use crate::logical_plan::producer::utils::flatten_names; use crate::variation_const::{ DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF, - DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, + DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, + DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; @@ -153,7 +154,7 @@ pub(crate) fn to_substrait_type( }), IntervalUnit::DayTime => Ok(substrait::proto::Type { kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay { - type_variation_reference: DEFAULT_TYPE_VARIATION_REF, + type_variation_reference: DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, nullability, precision: Some(3), // DayTime precision is always milliseconds })), @@ -171,6 +172,21 @@ pub(crate) fn to_substrait_type( } } } + DataType::Duration(duration_unit) => { + let precision = match duration_unit { + TimeUnit::Second => 0, + TimeUnit::Millisecond => 3, + TimeUnit::Microsecond => 6, + TimeUnit::Nanosecond => 9, + }; + Ok(substrait::proto::Type { + kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay { + type_variation_reference: DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, + nullability, + precision: Some(precision), + })), + }) + } DataType::Binary => Ok(substrait::proto::Type { kind: Some(r#type::Kind::Binary(r#type::Binary { type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF, @@ -388,6 +404,11 @@ mod tests { round_trip_type(DataType::Interval(IntervalUnit::MonthDayNano))?; round_trip_type(DataType::Interval(IntervalUnit::DayTime))?; + round_trip_type(DataType::Duration(TimeUnit::Second))?; + round_trip_type(DataType::Duration(TimeUnit::Millisecond))?; + round_trip_type(DataType::Duration(TimeUnit::Microsecond))?; + round_trip_type(DataType::Duration(TimeUnit::Nanosecond))?; + Ok(()) } diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index e5bebf8e11..efde8efe50 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -55,6 +55,15 @@ pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1; pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2; pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0; pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1; +/// Used for the arrow type [`DataType::Interval`] with [`IntervalUnit::DayTime`]. +/// +/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval +/// [`IntervalUnit::DayTime`]: datafusion::arrow::datatypes::IntervalUnit::DayTime +pub const DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF: u32 = 0; +/// Used for the arrow type [`DataType::Duration`]. +/// +/// [`DataType::Duration`]: datafusion::arrow::datatypes::DataType::Duration +pub const DURATION_INTERVAL_DAY_TYPE_VARIATION_REF: u32 = 1; // For [user-defined types](https://substrait.io/types/type_classes/#user-defined-types). /// For [`DataType::Interval`] with [`IntervalUnit::YearMonth`]. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org