This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 444a673682 feat: Support Substrait's IntervalCompound type/literal
instead of interval-month-day-nano UDT (#12112)
444a673682 is described below
commit 444a673682a4823b98695568ecdab1087b5b6a60
Author: Arttu <[email protected]>
AuthorDate: Tue Oct 29 10:22:41 2024 -0400
feat: Support Substrait's IntervalCompound type/literal instead of
interval-month-day-nano UDT (#12112)
* feat(substrait): use IntervalCompound instead of interval-month-day-nano
UDT
* clippy
* more clippy
* even more clippy
* fix precision exponent
* add a test
* update deprecation version
* update deprecation comments
---
datafusion/substrait/src/logical_plan/consumer.rs | 116 +++++++++++--
datafusion/substrait/src/logical_plan/producer.rs | 180 ++++++---------------
datafusion/substrait/src/variation_const.rs | 6 +-
.../tests/cases/roundtrip_logical_plan.rs | 22 +--
4 files changed, 153 insertions(+), 171 deletions(-)
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs
b/datafusion/substrait/src/logical_plan/consumer.rs
index 43263196bb..2aaf8ec0aa 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -42,17 +42,18 @@ use crate::variation_const::{
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
- INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
- UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
+ LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
+ VIEW_CONTAINER_TYPE_VARIATION_REF,
};
#[allow(deprecated)]
use crate::variation_const::{
- INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_REF,
- INTERVAL_YEAR_MONTH_TYPE_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
- TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF,
- TIMESTAMP_SECOND_TYPE_VARIATION_REF,
+ INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
+ INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
+ TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
+ TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
};
use datafusion::arrow::array::{new_empty_array, AsArray};
+use datafusion::arrow::temporal_conversions::NANOSECONDS;
use datafusion::common::scalar::ScalarStructBuilder;
use datafusion::dataframe::DataFrame;
use datafusion::logical_expr::expr::InList;
@@ -71,10 +72,10 @@ use datafusion::{
use std::collections::HashSet;
use std::sync::Arc;
use substrait::proto::exchange_rel::ExchangeKind;
-use
substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
use substrait::proto::expression::literal::user_defined::Val;
use substrait::proto::expression::literal::{
- IntervalDayToSecond, IntervalYearToMonth, UserDefined,
+ interval_day_to_second, IntervalCompound, IntervalDayToSecond,
IntervalYearToMonth,
+ UserDefined,
};
use substrait::proto::expression::subquery::SubqueryType;
use substrait::proto::expression::{FieldReference, Literal, ScalarFunction};
@@ -1845,9 +1846,14 @@ fn from_substrait_type(
Ok(DataType::Interval(IntervalUnit::YearMonth))
}
r#type::Kind::IntervalDay(_) =>
Ok(DataType::Interval(IntervalUnit::DayTime)),
+ r#type::Kind::IntervalCompound(_) => {
+ Ok(DataType::Interval(IntervalUnit::MonthDayNano))
+ }
r#type::Kind::UserDefined(u) => {
if let Some(name) = extensions.types.get(&u.type_reference) {
+ #[allow(deprecated)]
match name.as_ref() {
+ // Kept for backwards compatibility, producers should
use IntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_NAME =>
Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
_ => not_impl_err!(
"Unsupported Substrait user defined type with
ref {} and variation {}",
@@ -1856,18 +1862,17 @@ fn from_substrait_type(
),
}
} else {
- // Kept for backwards compatibility, new plans should
include the extension instead
#[allow(deprecated)]
match u.type_reference {
- // Kept for backwards compatibility, use IntervalYear
instead
+ // Kept for backwards compatibility, producers should
use IntervalYear instead
INTERVAL_YEAR_MONTH_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::YearMonth))
}
- // Kept for backwards compatibility, use IntervalDay
instead
+ // Kept for backwards compatibility, producers should
use IntervalDay instead
INTERVAL_DAY_TIME_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::DayTime))
}
- // Not supported yet by Substrait
+ // Kept for backwards compatibility, producers should
use IntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
Ok(DataType::Interval(IntervalUnit::MonthDayNano))
}
@@ -2285,6 +2290,7 @@ fn from_substrait_literal(
subseconds,
precision_mode,
})) => {
+ use interval_day_to_second::PrecisionMode;
// DF only supports millisecond precision, so for any more
granular type we lose precision
let milliseconds = match precision_mode {
Some(PrecisionMode::Microseconds(ms)) => ms / 1000,
@@ -2309,6 +2315,35 @@ fn from_substrait_literal(
Some(LiteralType::IntervalYearToMonth(IntervalYearToMonth { years,
months })) => {
ScalarValue::new_interval_ym(*years, *months)
}
+ Some(LiteralType::IntervalCompound(IntervalCompound {
+ interval_year_to_month,
+ interval_day_to_second,
+ })) => match (interval_year_to_month, interval_day_to_second) {
+ (
+ Some(IntervalYearToMonth { years, months }),
+ Some(IntervalDayToSecond {
+ days,
+ seconds,
+ subseconds,
+ precision_mode:
+
Some(interval_day_to_second::PrecisionMode::Precision(p)),
+ }),
+ ) => {
+ if *p < 0 || *p > 9 {
+ return plan_err!(
+ "Unsupported Substrait interval day to second
precision: {}",
+ p
+ );
+ }
+ let nanos = *subseconds * i64::pow(10, (9 - p) as u32);
+ ScalarValue::new_interval_mdn(
+ *years * 12 + months,
+ *days,
+ *seconds as i64 * NANOSECONDS + nanos,
+ )
+ }
+ _ => return plan_err!("Substrait compound interval missing
components"),
+ },
Some(LiteralType::FixedChar(c)) => ScalarValue::Utf8(Some(c.clone())),
Some(LiteralType::UserDefined(user_defined)) => {
// Helper function to prevent duplicating this code - can be
inlined once the non-extension path is removed
@@ -2339,6 +2374,8 @@ fn from_substrait_literal(
if let Some(name) =
extensions.types.get(&user_defined.type_reference) {
match name.as_ref() {
+ // Kept for backwards compatibility - producers should use
IntervalCompound instead
+ #[allow(deprecated)]
INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
interval_month_day_nano(user_defined)?
}
@@ -2351,10 +2388,9 @@ fn from_substrait_literal(
}
}
} else {
- // Kept for backwards compatibility - new plans should include
extension instead
#[allow(deprecated)]
match user_defined.type_reference {
- // Kept for backwards compatibility, use
IntervalYearToMonth instead
+ // Kept for backwards compatibility, producers should
useIntervalYearToMonth instead
INTERVAL_YEAR_MONTH_TYPE_REF => {
let Some(Val::Value(raw_val)) =
user_defined.val.as_ref() else {
return substrait_err!("Interval year month value
is empty");
@@ -2369,7 +2405,7 @@ fn from_substrait_literal(
value_slice,
)))
}
- // Kept for backwards compatibility, use
IntervalDayToSecond instead
+ // Kept for backwards compatibility, producers should
useIntervalDayToSecond instead
INTERVAL_DAY_TIME_TYPE_REF => {
let Some(Val::Value(raw_val)) =
user_defined.val.as_ref() else {
return substrait_err!("Interval day time value is
empty");
@@ -2389,6 +2425,7 @@ fn from_substrait_literal(
milliseconds,
}))
}
+ // Kept for backwards compatibility, producers should
useIntervalCompound instead
INTERVAL_MONTH_DAY_NANO_TYPE_REF => {
interval_month_day_nano(user_defined)?
}
@@ -2738,3 +2775,52 @@ impl BuiltinExprBuilder {
}))
}
}
+
+#[cfg(test)]
+mod test {
+ use crate::extensions::Extensions;
+ use crate::logical_plan::consumer::from_substrait_literal_without_names;
+ use arrow_buffer::IntervalMonthDayNano;
+ use datafusion::error::Result;
+ use datafusion::scalar::ScalarValue;
+ use substrait::proto::expression::literal::{
+ interval_day_to_second, IntervalCompound, IntervalDayToSecond,
+ IntervalYearToMonth, LiteralType,
+ };
+ use substrait::proto::expression::Literal;
+
+ #[test]
+ fn interval_compound_different_precision() -> Result<()> {
+ // DF producer (and thus roundtrip) always uses precision = 9,
+ // this test exists to test with some other value.
+ let substrait = Literal {
+ nullable: false,
+ type_variation_reference: 0,
+ literal_type: Some(LiteralType::IntervalCompound(IntervalCompound {
+ interval_year_to_month: Some(IntervalYearToMonth {
+ years: 1,
+ months: 2,
+ }),
+ interval_day_to_second: Some(IntervalDayToSecond {
+ days: 3,
+ seconds: 4,
+ subseconds: 5,
+ precision_mode: Some(
+ interval_day_to_second::PrecisionMode::Precision(6),
+ ),
+ }),
+ })),
+ };
+
+ assert_eq!(
+ from_substrait_literal_without_names(&substrait,
&Extensions::default())?,
+ ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano {
+ months: 14,
+ days: 3,
+ nanoseconds: 4_000_005_000
+ }))
+ );
+
+ Ok(())
+ }
+}
diff --git a/datafusion/substrait/src/logical_plan/producer.rs
b/datafusion/substrait/src/logical_plan/producer.rs
index 17ed41f016..408885f706 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -21,7 +21,6 @@ use datafusion::optimizer::AnalyzerRule;
use std::sync::Arc;
use substrait::proto::expression_reference::ExprType;
-use arrow_buffer::ToByteSlice;
use datafusion::arrow::datatypes::{Field, IntervalUnit};
use datafusion::logical_expr::{
Distinct, FetchType, Like, Partitioning, SkipType, WindowFrameUnits,
@@ -39,10 +38,11 @@ use crate::variation_const::{
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
- INTERVAL_MONTH_DAY_NANO_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF,
- UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
+ LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
+ VIEW_CONTAINER_TYPE_VARIATION_REF,
};
use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
+use datafusion::arrow::temporal_conversions::NANOSECONDS;
use datafusion::common::{
exec_err, internal_err, not_impl_err, plan_err, substrait_datafusion_err,
substrait_err, DFSchemaRef, ToDFSchema,
@@ -58,8 +58,8 @@ use substrait::proto::exchange_rel::{ExchangeKind,
RoundRobin, ScatterFields};
use
substrait::proto::expression::literal::interval_day_to_second::PrecisionMode;
use substrait::proto::expression::literal::map::KeyValue;
use substrait::proto::expression::literal::{
- user_defined, IntervalDayToSecond, IntervalYearToMonth, List, Map,
- PrecisionTimestamp, Struct, UserDefined,
+ IntervalCompound, IntervalDayToSecond, IntervalYearToMonth, List, Map,
+ PrecisionTimestamp, Struct,
};
use substrait::proto::expression::subquery::InPredicate;
use substrait::proto::expression::window_function::BoundsType;
@@ -114,7 +114,7 @@ pub fn to_substrait_plan(plan: &LogicalPlan, ctx:
&SessionContext) -> Result<Box
let plan_rels = vec![PlanRel {
rel_type: Some(plan_rel::RelType::Root(RelRoot {
input: Some(*to_substrait_rel(&plan, ctx, &mut extensions)?),
- names: to_substrait_named_struct(plan.schema(), &mut
extensions)?.names,
+ names: to_substrait_named_struct(plan.schema())?.names,
})),
}];
@@ -166,7 +166,7 @@ pub fn to_substrait_extended_expr(
})
})
.collect::<Result<Vec<_>>>()?;
- let substrait_schema = to_substrait_named_struct(schema, &mut extensions)?;
+ let substrait_schema = to_substrait_named_struct(schema)?;
Ok(Box::new(ExtendedExpression {
advanced_extensions: None,
@@ -203,7 +203,7 @@ pub fn to_substrait_rel(
});
let table_schema = scan.source.schema().to_dfschema_ref()?;
- let base_schema = to_substrait_named_struct(&table_schema,
extensions)?;
+ let base_schema = to_substrait_named_struct(&table_schema)?;
Ok(Box::new(Rel {
rel_type: Some(RelType::Read(Box::new(ReadRel {
@@ -229,7 +229,7 @@ pub fn to_substrait_rel(
Ok(Box::new(Rel {
rel_type: Some(RelType::Read(Box::new(ReadRel {
common: None,
- base_schema: Some(to_substrait_named_struct(&e.schema,
extensions)?),
+ base_schema: Some(to_substrait_named_struct(&e.schema)?),
filter: None,
best_effort_filter: None,
projection: None,
@@ -268,7 +268,7 @@ pub fn to_substrait_rel(
Ok(Box::new(Rel {
rel_type: Some(RelType::Read(Box::new(ReadRel {
common: None,
- base_schema: Some(to_substrait_named_struct(&v.schema,
extensions)?),
+ base_schema: Some(to_substrait_named_struct(&v.schema)?),
filter: None,
best_effort_filter: None,
projection: None,
@@ -664,10 +664,7 @@ fn flatten_names(field: &Field, skip_self: bool, names:
&mut Vec<String>) -> Res
Ok(())
}
-fn to_substrait_named_struct(
- schema: &DFSchemaRef,
- extensions: &mut Extensions,
-) -> Result<NamedStruct> {
+fn to_substrait_named_struct(schema: &DFSchemaRef) -> Result<NamedStruct> {
let mut names = Vec::with_capacity(schema.fields().len());
for field in schema.fields() {
flatten_names(field, false, &mut names)?;
@@ -677,7 +674,7 @@ fn to_substrait_named_struct(
types: schema
.fields()
.iter()
- .map(|f| to_substrait_type(f.data_type(), f.is_nullable(),
extensions))
+ .map(|f| to_substrait_type(f.data_type(), f.is_nullable()))
.collect::<Result<_>>()?,
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
nullability: r#type::Nullability::Unspecified as i32,
@@ -1150,7 +1147,7 @@ pub fn to_substrait_rex(
Ok(Expression {
rex_type: Some(RexType::Cast(Box::new(
substrait::proto::expression::Cast {
- r#type: Some(to_substrait_type(data_type, true,
extensions)?),
+ r#type: Some(to_substrait_type(data_type, true)?),
input: Some(Box::new(to_substrait_rex(
ctx,
expr,
@@ -1356,11 +1353,7 @@ pub fn to_substrait_rex(
}
}
-fn to_substrait_type(
- dt: &DataType,
- nullable: bool,
- extensions: &mut Extensions,
-) -> Result<substrait::proto::Type> {
+fn to_substrait_type(dt: &DataType, nullable: bool) ->
Result<substrait::proto::Type> {
let nullability = if nullable {
r#type::Nullability::Nullable as i32
} else {
@@ -1489,16 +1482,14 @@ fn to_substrait_type(
})),
}),
IntervalUnit::MonthDayNano => {
- // Substrait doesn't currently support this type, so we
represent it as a UDT
Ok(substrait::proto::Type {
- kind:
Some(r#type::Kind::UserDefined(r#type::UserDefined {
- type_reference: extensions.register_type(
- INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
- ),
- type_variation_reference:
DEFAULT_TYPE_VARIATION_REF,
- nullability,
- type_parameters: vec![],
- })),
+ kind: Some(r#type::Kind::IntervalCompound(
+ r#type::IntervalCompound {
+ type_variation_reference:
DEFAULT_TYPE_VARIATION_REF,
+ nullability,
+ precision: 9, // nanos
+ },
+ )),
})
}
}
@@ -1547,8 +1538,7 @@ fn to_substrait_type(
})),
}),
DataType::List(inner) => {
- let inner_type =
- to_substrait_type(inner.data_type(), inner.is_nullable(),
extensions)?;
+ let inner_type = to_substrait_type(inner.data_type(),
inner.is_nullable())?;
Ok(substrait::proto::Type {
kind: Some(r#type::Kind::List(Box::new(r#type::List {
r#type: Some(Box::new(inner_type)),
@@ -1558,8 +1548,7 @@ fn to_substrait_type(
})
}
DataType::LargeList(inner) => {
- let inner_type =
- to_substrait_type(inner.data_type(), inner.is_nullable(),
extensions)?;
+ let inner_type = to_substrait_type(inner.data_type(),
inner.is_nullable())?;
Ok(substrait::proto::Type {
kind: Some(r#type::Kind::List(Box::new(r#type::List {
r#type: Some(Box::new(inner_type)),
@@ -1573,12 +1562,10 @@ fn to_substrait_type(
let key_type = to_substrait_type(
key_and_value[0].data_type(),
key_and_value[0].is_nullable(),
- extensions,
)?;
let value_type = to_substrait_type(
key_and_value[1].data_type(),
key_and_value[1].is_nullable(),
- extensions,
)?;
Ok(substrait::proto::Type {
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
@@ -1594,9 +1581,7 @@ fn to_substrait_type(
DataType::Struct(fields) => {
let field_types = fields
.iter()
- .map(|field| {
- to_substrait_type(field.data_type(), field.is_nullable(),
extensions)
- })
+ .map(|field| to_substrait_type(field.data_type(),
field.is_nullable()))
.collect::<Result<Vec<_>>>()?;
Ok(substrait::proto::Type {
kind: Some(r#type::Kind::Struct(r#type::Struct {
@@ -1783,7 +1768,6 @@ fn to_substrait_literal(
literal_type: Some(LiteralType::Null(to_substrait_type(
&value.data_type(),
true,
- extensions,
)?)),
});
}
@@ -1892,23 +1876,21 @@ fn to_substrait_literal(
}),
DEFAULT_TYPE_VARIATION_REF,
),
- ScalarValue::IntervalMonthDayNano(Some(i)) => {
- // IntervalMonthDayNano is internally represented as a 128-bit
integer, containing
- // months (32bit), days (32bit), and nanoseconds (64bit)
- let bytes = i.to_byte_slice();
- (
- LiteralType::UserDefined(UserDefined {
- type_reference: extensions
-
.register_type(INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()),
- type_parameters: vec![],
- val: Some(user_defined::Val::Value(ProtoAny {
- type_url:
INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string(),
- value: bytes.to_vec().into(),
- })),
+ ScalarValue::IntervalMonthDayNano(Some(i)) => (
+ LiteralType::IntervalCompound(IntervalCompound {
+ interval_year_to_month: Some(IntervalYearToMonth {
+ years: i.months / 12,
+ months: i.months % 12,
}),
- DEFAULT_TYPE_VARIATION_REF,
- )
- }
+ interval_day_to_second: Some(IntervalDayToSecond {
+ days: i.days,
+ seconds: (i.nanoseconds / NANOSECONDS) as i32,
+ subseconds: i.nanoseconds % NANOSECONDS,
+ precision_mode: Some(PrecisionMode::Precision(9)), //
nanoseconds
+ }),
+ }),
+ DEFAULT_TYPE_VARIATION_REF,
+ ),
ScalarValue::IntervalDayTime(Some(i)) => (
LiteralType::IntervalDayToSecond(IntervalDayToSecond {
days: i.days,
@@ -1964,7 +1946,7 @@ fn to_substrait_literal(
),
ScalarValue::Map(m) => {
let map = if m.is_empty() || m.value(0).is_empty() {
- let mt = to_substrait_type(m.data_type(), m.is_nullable(),
extensions)?;
+ let mt = to_substrait_type(m.data_type(), m.is_nullable())?;
let mt = match mt {
substrait::proto::Type {
kind: Some(r#type::Kind::Map(mt)),
@@ -2049,11 +2031,7 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
.collect::<Result<Vec<_>>>()?;
if values.is_empty() {
- let lt = match to_substrait_type(
- array.data_type(),
- array.is_nullable(),
- extensions,
- )? {
+ let lt = match to_substrait_type(array.data_type(),
array.is_nullable())? {
substrait::proto::Type {
kind: Some(r#type::Kind::List(lt)),
} => lt.as_ref().to_owned(),
@@ -2179,7 +2157,6 @@ mod test {
use datafusion::arrow::datatypes::{Field, Fields, Schema};
use datafusion::common::scalar::ScalarStructBuilder;
use datafusion::common::DFSchema;
- use std::collections::HashMap;
#[test]
fn round_trip_literals() -> Result<()> {
@@ -2310,39 +2287,6 @@ mod test {
Ok(())
}
- #[test]
- fn custom_type_literal_extensions() -> Result<()> {
- let mut extensions = Extensions::default();
- // IntervalMonthDayNano is represented as a custom type in Substrait
- let scalar =
ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::new(
- 17, 25, 1234567890,
- )));
- let substrait_literal = to_substrait_literal(&scalar, &mut
extensions)?;
- let roundtrip_scalar =
- from_substrait_literal_without_names(&substrait_literal,
&extensions)?;
- assert_eq!(scalar, roundtrip_scalar);
-
- assert_eq!(
- extensions,
- Extensions {
- functions: HashMap::new(),
- types: HashMap::from([(
- 0,
- INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
- )]),
- type_variations: HashMap::new(),
- }
- );
-
- // Check we fail if we don't propagate extensions
- assert!(from_substrait_literal_without_names(
- &substrait_literal,
- &Extensions::default()
- )
- .is_err());
- Ok(())
- }
-
#[test]
fn round_trip_types() -> Result<()> {
round_trip_type(DataType::Boolean)?;
@@ -2414,50 +2358,17 @@ mod test {
fn round_trip_type(dt: DataType) -> Result<()> {
println!("Checking round trip of {dt:?}");
- let mut extensions = Extensions::default();
-
// As DataFusion doesn't consider nullability as a property of the
type, but field,
// it doesn't matter if we set nullability to true or false here.
- let substrait = to_substrait_type(&dt, true, &mut extensions)?;
- let roundtrip_dt = from_substrait_type_without_names(&substrait,
&extensions)?;
+ let substrait = to_substrait_type(&dt, true)?;
+ let roundtrip_dt =
+ from_substrait_type_without_names(&substrait,
&Extensions::default())?;
assert_eq!(dt, roundtrip_dt);
Ok(())
}
- #[test]
- fn custom_type_extensions() -> Result<()> {
- let mut extensions = Extensions::default();
- // IntervalMonthDayNano is represented as a custom type in Substrait
- let dt = DataType::Interval(IntervalUnit::MonthDayNano);
-
- let substrait = to_substrait_type(&dt, true, &mut extensions)?;
- let roundtrip_dt = from_substrait_type_without_names(&substrait,
&extensions)?;
- assert_eq!(dt, roundtrip_dt);
-
- assert_eq!(
- extensions,
- Extensions {
- functions: HashMap::new(),
- types: HashMap::from([(
- 0,
- INTERVAL_MONTH_DAY_NANO_TYPE_NAME.to_string()
- )]),
- type_variations: HashMap::new(),
- }
- );
-
- // Check we fail if we don't propagate extensions
- assert!(
- from_substrait_type_without_names(&substrait,
&Extensions::default())
- .is_err()
- );
-
- Ok(())
- }
-
#[test]
fn named_struct_names() -> Result<()> {
- let mut extensions = Extensions::default();
let schema = DFSchemaRef::new(DFSchema::try_from(Schema::new(vec![
Field::new("int", DataType::Int32, true),
Field::new(
@@ -2472,7 +2383,7 @@ mod test {
Field::new("trailer", DataType::Float64, true),
]))?);
- let named_struct = to_substrait_named_struct(&schema, &mut
extensions)?;
+ let named_struct = to_substrait_named_struct(&schema)?;
// Struct field names should be flattened DFS style
// List field names should be omitted
@@ -2481,7 +2392,8 @@ mod test {
vec!["int", "struct", "inner", "trailer"]
);
- let roundtrip_schema = from_substrait_named_struct(&named_struct,
&extensions)?;
+ let roundtrip_schema =
+ from_substrait_named_struct(&named_struct,
&Extensions::default())?;
assert_eq!(schema.as_ref(), &roundtrip_schema);
Ok(())
}
diff --git a/datafusion/substrait/src/variation_const.rs
b/datafusion/substrait/src/variation_const.rs
index a3e76389d5..58774db424 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -96,7 +96,7 @@ pub const INTERVAL_DAY_TIME_TYPE_REF: u32 = 2;
/// [`ScalarValue::IntervalMonthDayNano`]:
datafusion::common::ScalarValue::IntervalMonthDayNano
#[deprecated(
since = "41.0.0",
- note = "Use Substrait `UserDefinedType` with name
`INTERVAL_MONTH_DAY_NANO_TYPE_NAME` instead"
+ note = "Use Substrait `IntervalCompund` type instead"
)]
pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
@@ -104,4 +104,8 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
///
/// [`DataType::Interval`]: datafusion::arrow::datatypes::DataType::Interval
/// [`IntervalUnit::MonthDayNano`]:
datafusion::arrow::datatypes::IntervalUnit::MonthDayNano
+#[deprecated(
+ since = "43.0.0",
+ note = "Use Substrait `IntervalCompund` type instead"
+)]
pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 8108b9ad67..04530dd34d 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -39,10 +39,7 @@ use std::hash::Hash;
use std::sync::Arc;
use datafusion::execution::session_state::SessionStateBuilder;
-use substrait::proto::extensions::simple_extension_declaration::{
- ExtensionType, MappingType,
-};
-use substrait::proto::extensions::SimpleExtensionDeclaration;
+use substrait::proto::extensions::simple_extension_declaration::MappingType;
use substrait::proto::rel::RelType;
use substrait::proto::{plan_rel, Plan, Rel};
@@ -230,23 +227,6 @@ async fn select_with_reused_functions() -> Result<()> {
Ok(())
}
-#[tokio::test]
-async fn roundtrip_udt_extensions() -> Result<()> {
- let ctx = create_context().await?;
- let proto =
- roundtrip_with_ctx("SELECT INTERVAL '1 YEAR 1 DAY 1 SECOND' FROM
data", ctx)
- .await?;
- let expected_type = SimpleExtensionDeclaration {
- mapping_type: Some(MappingType::ExtensionType(ExtensionType {
- extension_uri_reference: u32::MAX,
- type_anchor: 0,
- name: "interval-month-day-nano".to_string(),
- })),
- };
- assert_eq!(proto.extensions, vec![expected_type]);
- Ok(())
-}
-
#[tokio::test]
async fn select_with_filter_date() -> Result<()> {
roundtrip("SELECT * FROM data WHERE c > CAST('2020-01-01' AS DATE)").await
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]