This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 4010a55398 Add support for nested lists in substrait consumer (#20953)
4010a55398 is described below
commit 4010a553984ca4b9d49c9b1cef463c3eb413d9fa
Author: alexanderbianchi <[email protected]>
AuthorDate: Thu Mar 19 08:57:29 2026 -0400
Add support for nested lists in substrait consumer (#20953)
## Rationale for this change
Adds support for nested array expressions to the substrait consumer.
Defined in
[algebra.proto.](https://github.com/substrait-io/substrait/blob/main/proto/substrait/algebra.proto#L1162)
## What changes are included in this PR?
Implements the previously unimplemented `consume_nested` for
`NestedType::List`.
## Are these changes tested?
Yes, unit tests match the testing pattern for substrait literals in
`consumer/expr/literal.rs`. Snapshot test is added for `make_array()`
path.
## Are there any user-facing changes?
User's will now be able to send nested list expressions. This change is
purely additive all previous consumable Substrait plans will continue to
work.
---
.../src/logical_plan/consumer/expr/mod.rs | 2 +
.../src/logical_plan/consumer/expr/nested.rs | 151 +++++++++++++++++++++
.../logical_plan/consumer/substrait_consumer.rs | 8 +-
datafusion/substrait/tests/cases/logical_plans.rs | 23 ++++
.../nested_list_expressions.substrait.json | 77 +++++++++++
5 files changed, 257 insertions(+), 4 deletions(-)
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
index a0468dbd45..295456e95f 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/mod.rs
@@ -21,6 +21,7 @@ mod field_reference;
mod function_arguments;
mod if_then;
mod literal;
+mod nested;
mod scalar_function;
mod singular_or_list;
mod subquery;
@@ -32,6 +33,7 @@ pub use field_reference::*;
pub use function_arguments::*;
pub use if_then::*;
pub use literal::*;
+pub use nested::*;
pub use scalar_function::*;
pub use singular_or_list::*;
pub use subquery::*;
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs
b/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs
new file mode 100644
index 0000000000..f94a701342
--- /dev/null
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/nested.rs
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::logical_plan::consumer::SubstraitConsumer;
+use datafusion::common::{DFSchema, not_impl_err, substrait_err};
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::Expr;
+use substrait::proto::expression::Nested;
+use substrait::proto::expression::nested::NestedType;
+
+/// Converts a Substrait [Nested] expression into a DataFusion [Expr].
+///
+/// Substrait Nested expressions represent complex type constructors (list,
struct, map)
+/// where elements are full expressions rather than just literals. This is
used by
+/// producers that emit `Nested { list: ... }` for array construction, as
opposed to
+/// `Literal { list: ... }` which only supports scalar values.
+pub async fn from_nested(
+ consumer: &impl SubstraitConsumer,
+ nested: &Nested,
+ input_schema: &DFSchema,
+) -> datafusion::common::Result<Expr> {
+ let Some(nested_type) = &nested.nested_type else {
+ return substrait_err!("Nested expression requires a nested_type");
+ };
+
+ match nested_type {
+ NestedType::List(list) => {
+ if list.values.is_empty() {
+ return substrait_err!(
+ "Empty Nested lists are not supported; use
Literal.empty_list instead"
+ );
+ }
+
+ let mut args = Vec::with_capacity(list.values.len());
+ for value in &list.values {
+ args.push(consumer.consume_expression(value,
input_schema).await?);
+ }
+
+ let make_array_udf =
consumer.get_function_registry().udf("make_array")?;
+ Ok(Expr::ScalarFunction(
+ datafusion::logical_expr::expr::ScalarFunction::new_udf(
+ make_array_udf,
+ args,
+ ),
+ ))
+ }
+ NestedType::Struct(_) => {
+ not_impl_err!("Nested struct expressions are not yet supported")
+ }
+ NestedType::Map(_) => {
+ not_impl_err!("Nested map expressions are not yet supported")
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::logical_plan::consumer::utils::tests::test_consumer;
+ use substrait::proto::expression::Literal;
+ use substrait::proto::expression::nested::List;
+ use substrait::proto::{self, Expression};
+
+ fn make_i64_literal(value: i64) -> Expression {
+ Expression {
+ rex_type: Some(proto::expression::RexType::Literal(Literal {
+ nullable: false,
+ type_variation_reference: 0,
+ literal_type:
Some(proto::expression::literal::LiteralType::I64(value)),
+ })),
+ }
+ }
+
+ #[tokio::test]
+ async fn nested_list_with_literals() -> datafusion::common::Result<()> {
+ let consumer = test_consumer();
+ let schema = DFSchema::empty();
+ let nested = Nested {
+ nullable: false,
+ type_variation_reference: 0,
+ nested_type: Some(NestedType::List(List {
+ values: vec![
+ make_i64_literal(1),
+ make_i64_literal(2),
+ make_i64_literal(3),
+ ],
+ })),
+ };
+
+ let expr = from_nested(&consumer, &nested, &schema).await?;
+ assert_eq!(
+ format!("{expr}"),
+ "make_array(Int64(1), Int64(2), Int64(3))"
+ );
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn nested_list_empty_rejected() -> datafusion::common::Result<()> {
+ let consumer = test_consumer();
+ let schema = DFSchema::empty();
+ let nested = Nested {
+ nullable: true,
+ type_variation_reference: 0,
+ nested_type: Some(NestedType::List(List { values: vec![] })),
+ };
+
+ let result = from_nested(&consumer, &nested, &schema).await;
+ assert!(result.is_err());
+ assert!(
+ result
+ .unwrap_err()
+ .to_string()
+ .contains("Empty Nested lists are not supported")
+ );
+
+ Ok(())
+ }
+
+ #[tokio::test]
+ async fn nested_missing_type() -> datafusion::common::Result<()> {
+ let consumer = test_consumer();
+ let schema = DFSchema::empty();
+ let nested = Nested {
+ nullable: false,
+ type_variation_reference: 0,
+ nested_type: None,
+ };
+
+ let result = from_nested(&consumer, &nested, &schema).await;
+ assert!(result.is_err());
+ assert!(result.unwrap_err().to_string().contains("nested_type"));
+
+ Ok(())
+ }
+}
diff --git
a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
index 730ceab8cc..14385888a8 100644
--- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
@@ -18,7 +18,7 @@
use super::{
from_aggregate_rel, from_cast, from_cross_rel, from_exchange_rel,
from_fetch_rel,
from_field_reference, from_filter_rel, from_if_then, from_join_rel,
from_literal,
- from_project_rel, from_read_rel, from_scalar_function, from_set_rel,
+ from_nested, from_project_rel, from_read_rel, from_scalar_function,
from_set_rel,
from_singular_or_list, from_sort_rel, from_subquery, from_substrait_rel,
from_substrait_rex, from_window_function,
};
@@ -350,10 +350,10 @@ pub trait SubstraitConsumer: Send + Sync + Sized {
async fn consume_nested(
&self,
- _expr: &Nested,
- _input_schema: &DFSchema,
+ expr: &Nested,
+ input_schema: &DFSchema,
) -> datafusion::common::Result<Expr> {
- not_impl_err!("Nested expression not supported")
+ from_nested(self, expr, input_schema).await
}
async fn consume_enum(
diff --git a/datafusion/substrait/tests/cases/logical_plans.rs
b/datafusion/substrait/tests/cases/logical_plans.rs
index 9de7cb8f38..663a372fe2 100644
--- a/datafusion/substrait/tests/cases/logical_plans.rs
+++ b/datafusion/substrait/tests/cases/logical_plans.rs
@@ -270,4 +270,27 @@ mod tests {
Ok(())
}
+
+ #[tokio::test]
+ async fn nested_list_expressions() -> Result<()> {
+ // Tests that a Substrait Nested list expression containing non-literal
+ // expressions (column references) uses the make_array UDF.
+ let proto_plan =
+
read_json("tests/testdata/test_plans/nested_list_expressions.substrait.json");
+ let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+ let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+ assert_snapshot!(
+ plan,
+ @r"
+ Projection: make_array(DATA.a, DATA.b) AS my_list
+ TableScan: DATA
+ "
+ );
+
+ // Trigger execution to ensure plan validity
+ DataFrame::new(ctx.state(), plan).show().await?;
+
+ Ok(())
+ }
}
diff --git
a/datafusion/substrait/tests/testdata/test_plans/nested_list_expressions.substrait.json
b/datafusion/substrait/tests/testdata/test_plans/nested_list_expressions.substrait.json
new file mode 100644
index 0000000000..85a69c41c5
--- /dev/null
+++
b/datafusion/substrait/tests/testdata/test_plans/nested_list_expressions.substrait.json
@@ -0,0 +1,77 @@
+{
+ "relations": [
+ {
+ "root": {
+ "input": {
+ "project": {
+ "common": {
+ "emit": {
+ "outputMapping": [2]
+ }
+ },
+ "input": {
+ "read": {
+ "common": {
+ "direct": {}
+ },
+ "baseSchema": {
+ "names": ["a", "b"],
+ "struct": {
+ "types": [
+ {
+ "i32": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "i32": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ }
+ ],
+ "nullability": "NULLABILITY_REQUIRED"
+ }
+ },
+ "namedTable": {
+ "names": ["DATA"]
+ }
+ }
+ },
+ "expressions": [
+ {
+ "nested": {
+ "nullable": false,
+ "list": {
+ "values": [
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 0
+ }
+ },
+ "rootReference": {}
+ }
+ },
+ {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 1
+ }
+ },
+ "rootReference": {}
+ }
+ }
+ ]
+ }
+ }
+ }
+ ]
+ }
+ },
+ "names": ["my_list"]
+ }
+ }
+ ]
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]