This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 169a0d338c chore: switch to using proper Substrait types for 
IntervalYearMonth and IntervalDayTime (#11471)
169a0d338c is described below

commit 169a0d338cd1b3247da199a03add2119b5289d61
Author: Arttu <[email protected]>
AuthorDate: Tue Jul 16 22:33:28 2024 +0200

    chore: switch to using proper Substrait types for IntervalYearMonth and 
IntervalDayTime (#11471)
    
    also clean up IntervalMonthDayNano type - the type itself needs no 
parameters
---
 datafusion/substrait/src/logical_plan/consumer.rs |  46 +++++--
 datafusion/substrait/src/logical_plan/producer.rs | 140 +++++++---------------
 2 files changed, 79 insertions(+), 107 deletions(-)

diff --git a/datafusion/substrait/src/logical_plan/consumer.rs 
b/datafusion/substrait/src/logical_plan/consumer.rs
index a4f7242024..991aa61fbf 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -65,7 +65,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 use substrait::proto::exchange_rel::ExchangeKind;
 use substrait::proto::expression::literal::user_defined::Val;
-use substrait::proto::expression::literal::IntervalDayToSecond;
+use substrait::proto::expression::literal::{IntervalDayToSecond, 
IntervalYearToMonth};
 use substrait::proto::expression::subquery::SubqueryType;
 use substrait::proto::expression::{self, FieldReference, Literal, 
ScalarFunction};
 use substrait::proto::read_rel::local_files::file_or_files::PathType::UriFile;
@@ -1414,7 +1414,7 @@ fn from_substrait_type(
                 })?;
                 let field = Arc::new(Field::new_list_field(
                     from_substrait_type(inner_type, dfs_names, name_idx)?,
-                    // We ignore Substrait's nullability here to match 
to_substrait_literal 
+                    // We ignore Substrait's nullability here to match 
to_substrait_literal
                     // which always creates nullable lists
                     true,
                 ));
@@ -1445,12 +1445,15 @@ fn from_substrait_type(
                 ));
                 match map.type_variation_reference {
                     DEFAULT_CONTAINER_TYPE_VARIATION_REF => {
-                        Ok(DataType::Map(Arc::new(Field::new_struct(
-                            "entries",
-                            [key_field, value_field],
-                            false, // The inner map field is always 
non-nullable (Arrow #1697),
-                        )), false))
-                    },
+                        Ok(DataType::Map(
+                            Arc::new(Field::new_struct(
+                                "entries",
+                                [key_field, value_field],
+                                false, // The inner map field is always 
non-nullable (Arrow #1697),
+                            )),
+                            false,
+                        ))
+                    }
                     v => not_impl_err!(
                         "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
                     )?,
@@ -1467,14 +1470,33 @@ fn from_substrait_type(
                     "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
                 ),
             },
+            r#type::Kind::IntervalYear(i) => match i.type_variation_reference {
+                DEFAULT_TYPE_VARIATION_REF => {
+                    Ok(DataType::Interval(IntervalUnit::YearMonth))
+                }
+                v => not_impl_err!(
+                    "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
+                ),
+            },
+            r#type::Kind::IntervalDay(i) => match i.type_variation_reference {
+                DEFAULT_TYPE_VARIATION_REF => {
+                    Ok(DataType::Interval(IntervalUnit::DayTime))
+                }
+                v => not_impl_err!(
+                    "Unsupported Substrait type variation {v} of type 
{s_kind:?}"
+                ),
+            },
             r#type::Kind::UserDefined(u) => {
                 match u.type_reference {
+                    // Kept for backwards compatibility, use IntervalYear 
instead
                     INTERVAL_YEAR_MONTH_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::YearMonth))
                     }
+                    // Kept for backwards compatibility, use IntervalDay 
instead
                     INTERVAL_DAY_TIME_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::DayTime))
                     }
+                    // Not supported yet by Substrait
                     INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
                         Ok(DataType::Interval(IntervalUnit::MonthDayNano))
                     }
@@ -1484,7 +1506,7 @@ fn from_substrait_type(
                         u.type_variation_reference
                     ),
                 }
-            },
+            }
             r#type::Kind::Struct(s) => 
Ok(DataType::Struct(from_substrait_struct_type(
                 s, dfs_names, name_idx,
             )?)),
@@ -1753,11 +1775,16 @@ fn from_substrait_literal(
             seconds,
             microseconds,
         })) => {
+            // DF only supports millisecond precision, so we lose the micros 
here
             ScalarValue::new_interval_dt(*days, (seconds * 1000) + 
(microseconds / 1000))
         }
+        Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years, 
months })) => {
+            ScalarValue::new_interval_ym(*years, *months)
+        }
         Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
         Some(LiteralType::UserDefined(user_defined)) => {
             match user_defined.type_reference {
+                // Kept for backwards compatibility, use IntervalYearToMonth 
instead
                 INTERVAL_YEAR_MONTH_TYPE_REF => {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() 
else {
                         return substrait_err!("Interval year month value is 
empty");
@@ -1770,6 +1797,7 @@ fn from_substrait_literal(
                         })?;
                     
ScalarValue::IntervalYearMonth(Some(i32::from_le_bytes(value_slice)))
                 }
+                // Kept for backwards compatibility, use IntervalDayToSecond 
instead
                 INTERVAL_DAY_TIME_TYPE_REF => {
                     let Some(Val::Value(raw_val)) = user_defined.val.as_ref() 
else {
                         return substrait_err!("Interval day time value is 
empty");
diff --git a/datafusion/substrait/src/logical_plan/producer.rs 
b/datafusion/substrait/src/logical_plan/producer.rs
index 8d039a0502..7849d0bd43 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -48,12 +48,11 @@ use datafusion::logical_expr::{expr, Between, 
JoinConstraint, LogicalPlan, Opera
 use datafusion::prelude::Expr;
 use pbjson_types::Any as ProtoAny;
 use substrait::proto::exchange_rel::{ExchangeKind, RoundRobin, ScatterFields};
-use substrait::proto::expression::literal::user_defined::Val;
-use substrait::proto::expression::literal::UserDefined;
-use substrait::proto::expression::literal::{List, Struct};
+use substrait::proto::expression::literal::{
+    user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Struct, 
UserDefined,
+};
 use substrait::proto::expression::subquery::InPredicate;
 use substrait::proto::expression::window_function::BoundsType;
-use substrait::proto::r#type::{parameter, Parameter};
 use substrait::proto::read_rel::VirtualTable;
 use substrait::proto::{CrossRel, ExchangeRel};
 use substrait::{
@@ -95,9 +94,7 @@ use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
-    INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_DAY_TIME_TYPE_URL,
     INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_URL,
-    INTERVAL_YEAR_MONTH_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_URL,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
     TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
     TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
@@ -1534,47 +1531,31 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> 
Result<substrait::proto::
             })),
         }),
         DataType::Interval(interval_unit) => {
-            // define two type parameters for convenience
-            let i32_param = Parameter {
-                parameter: 
Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I32(r#type::I32 {
+            match interval_unit {
+                IntervalUnit::YearMonth => Ok(substrait::proto::Type {
+                    kind: Some(r#type::Kind::IntervalYear(r#type::IntervalYear 
{
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Unspecified as i32,
+                        nullability,
                     })),
-                })),
-            };
-            let i64_param = Parameter {
-                parameter: 
Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I64(r#type::I64 {
+                }),
+                IntervalUnit::DayTime => Ok(substrait::proto::Type {
+                    kind: Some(r#type::Kind::IntervalDay(r#type::IntervalDay {
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Unspecified as i32,
+                        nullability,
                     })),
-                })),
-            };
-
-            let (type_parameters, type_reference) = match interval_unit {
-                IntervalUnit::YearMonth => {
-                    let type_parameters = vec![i32_param];
-                    (type_parameters, INTERVAL_YEAR_MONTH_TYPE_REF)
-                }
-                IntervalUnit::DayTime => {
-                    let type_parameters = vec![i64_param];
-                    (type_parameters, INTERVAL_DAY_TIME_TYPE_REF)
-                }
+                }),
                 IntervalUnit::MonthDayNano => {
-                    // use 2 `i64` as `i128`
-                    let type_parameters = vec![i64_param.clone(), i64_param];
-                    (type_parameters, INTERVAL_MONTH_DAY_NANO_TYPE_REF)
+                    // Substrait doesn't currently support this type, so we 
represent it as a UDT
+                    Ok(substrait::proto::Type {
+                        kind: 
Some(r#type::Kind::UserDefined(r#type::UserDefined {
+                            type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
+                            type_variation_reference: 
DEFAULT_TYPE_VARIATION_REF,
+                            nullability,
+                            type_parameters: vec![],
+                        })),
+                    })
                 }
-            };
-            Ok(substrait::proto::Type {
-                kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
-                    type_reference,
-                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                    nullability,
-                    type_parameters,
-                })),
-            })
+            }
         }
         DataType::Binary => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Binary(r#type::Binary {
@@ -1954,45 +1935,23 @@ fn to_substrait_literal(value: &ScalarValue) -> 
Result<Literal> {
             (LiteralType::Date(*d), DATE_32_TYPE_VARIATION_REF)
         }
         // Date64 literal is not supported in Substrait
-        ScalarValue::IntervalYearMonth(Some(i)) => {
-            let bytes = i.to_le_bytes();
-            (
-                LiteralType::UserDefined(UserDefined {
-                    type_reference: INTERVAL_YEAR_MONTH_TYPE_REF,
-                    type_parameters: vec![Parameter {
-                        parameter: Some(parameter::Parameter::DataType(
-                            substrait::proto::Type {
-                                kind: Some(r#type::Kind::I32(r#type::I32 {
-                                    type_variation_reference: 
DEFAULT_TYPE_VARIATION_REF,
-                                    nullability: r#type::Nullability::Required 
as i32,
-                                })),
-                            },
-                        )),
-                    }],
-                    val: Some(Val::Value(ProtoAny {
-                        type_url: INTERVAL_YEAR_MONTH_TYPE_URL.to_string(),
-                        value: bytes.to_vec().into(),
-                    })),
-                }),
-                INTERVAL_YEAR_MONTH_TYPE_REF,
-            )
-        }
+        ScalarValue::IntervalYearMonth(Some(i)) => (
+            LiteralType::IntervalYearToMonth(IntervalYearToMonth {
+                // DF only tracks total months, but there should always be 12 
months in a year
+                years: *i / 12,
+                months: *i % 12,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
         ScalarValue::IntervalMonthDayNano(Some(i)) => {
-            // treat `i128` as two contiguous `i64`
+            // IntervalMonthDayNano is internally represented as a 128-bit 
integer, containing
+            // months (32bit), days (32bit), and nanoseconds (64bit)
             let bytes = i.to_byte_slice();
-            let i64_param = Parameter {
-                parameter: 
Some(parameter::Parameter::DataType(substrait::proto::Type {
-                    kind: Some(r#type::Kind::I64(r#type::I64 {
-                        type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
-                        nullability: r#type::Nullability::Required as i32,
-                    })),
-                })),
-            };
             (
                 LiteralType::UserDefined(UserDefined {
                     type_reference: INTERVAL_MONTH_DAY_NANO_TYPE_REF,
-                    type_parameters: vec![i64_param.clone(), i64_param],
-                    val: Some(Val::Value(ProtoAny {
+                    type_parameters: vec![],
+                    val: Some(user_defined::Val::Value(ProtoAny {
                         type_url: INTERVAL_MONTH_DAY_NANO_TYPE_URL.to_string(),
                         value: bytes.to_vec().into(),
                     })),
@@ -2000,29 +1959,14 @@ fn to_substrait_literal(value: &ScalarValue) -> 
Result<Literal> {
                 INTERVAL_MONTH_DAY_NANO_TYPE_REF,
             )
         }
-        ScalarValue::IntervalDayTime(Some(i)) => {
-            let bytes = i.to_byte_slice();
-            (
-                LiteralType::UserDefined(UserDefined {
-                    type_reference: INTERVAL_DAY_TIME_TYPE_REF,
-                    type_parameters: vec![Parameter {
-                        parameter: Some(parameter::Parameter::DataType(
-                            substrait::proto::Type {
-                                kind: Some(r#type::Kind::I64(r#type::I64 {
-                                    type_variation_reference: 
DEFAULT_TYPE_VARIATION_REF,
-                                    nullability: r#type::Nullability::Required 
as i32,
-                                })),
-                            },
-                        )),
-                    }],
-                    val: Some(Val::Value(ProtoAny {
-                        type_url: INTERVAL_DAY_TIME_TYPE_URL.to_string(),
-                        value: bytes.to_vec().into(),
-                    })),
-                }),
-                INTERVAL_DAY_TIME_TYPE_REF,
-            )
-        }
+        ScalarValue::IntervalDayTime(Some(i)) => (
+            LiteralType::IntervalDayToSecond(IntervalDayToSecond {
+                days: i.days,
+                seconds: i.milliseconds / 1000,
+                microseconds: (i.milliseconds % 1000) * 1000,
+            }),
+            DEFAULT_TYPE_VARIATION_REF,
+        ),
         ScalarValue::Binary(Some(b)) => (
             LiteralType::Binary(b.clone()),
             DEFAULT_CONTAINER_TYPE_VARIATION_REF,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to