vbarua commented on code in PR #20953:
URL: https://github.com/apache/datafusion/pull/20953#discussion_r2940599696


##########
datafusion/substrait/src/logical_plan/consumer/expr/nested.rs:
##########
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::logical_plan::consumer::SubstraitConsumer;
+use datafusion::common::{DFSchema, not_impl_err, substrait_err};
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::Expr;
+use substrait::proto::expression::Nested;
+use substrait::proto::expression::nested::NestedType;
+
+/// Converts a Substrait [Nested] expression into a DataFusion [Expr].
+///
+/// Substrait Nested expressions represent complex type constructors (list, 
struct, map)
+/// where elements are full expressions rather than just literals. This is 
used by
+/// producers that emit `Nested { list: ... }` for array construction, as 
opposed to
+/// `Literal { list: ... }` which only supports scalar values.
+pub async fn from_nested(
+    consumer: &impl SubstraitConsumer,
+    nested: &Nested,
+    input_schema: &DFSchema,
+) -> datafusion::common::Result<Expr> {
+    let Some(nested_type) = &nested.nested_type else {
+        return substrait_err!("Nested expression requires a nested_type");
+    };
+
+    match nested_type {
+        NestedType::List(list) => {
+            let mut args = Vec::with_capacity(list.values.len());
+            for value in &list.values {
+                args.push(consumer.consume_expression(value, 
input_schema).await?);
+            }
+
+            let make_array_udf = 
consumer.get_function_registry().udf("make_array")?;
+            Ok(Expr::ScalarFunction(
+                datafusion::logical_expr::expr::ScalarFunction::new_udf(
+                    make_array_udf,
+                    args,
+                ),
+            ))
+        }
+        NestedType::Struct(_) => {
+            not_impl_err!("Nested struct expressions are not yet supported")
+        }
+        NestedType::Map(_) => {
+            not_impl_err!("Nested map expressions are not yet supported")
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::logical_plan::consumer::utils::tests::test_consumer;
+    use substrait::proto::expression::Literal;
+    use substrait::proto::expression::nested::List;
+    use substrait::proto::{self, Expression};
+
+    fn make_i64_literal(value: i64) -> Expression {
+        Expression {
+            rex_type: Some(proto::expression::RexType::Literal(Literal {
+                nullable: false,
+                type_variation_reference: 0,
+                literal_type: 
Some(proto::expression::literal::LiteralType::I64(value)),
+            })),
+        }
+    }
+
+    #[tokio::test]
+    async fn nested_list_with_literals() -> datafusion::common::Result<()> {
+        let consumer = test_consumer();
+        let schema = DFSchema::empty();
+        let nested = Nested {
+            nullable: false,
+            type_variation_reference: 0,
+            nested_type: Some(NestedType::List(List {
+                values: vec![
+                    make_i64_literal(1),
+                    make_i64_literal(2),
+                    make_i64_literal(3),
+                ],
+            })),
+        };
+
+        let expr = from_nested(&consumer, &nested, &schema).await?;
+        assert_eq!(
+            format!("{expr}"),
+            "make_array(Int64(1), Int64(2), Int64(3))"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn nested_list_empty() -> datafusion::common::Result<()> {
+        let consumer = test_consumer();
+        let schema = DFSchema::empty();
+        let nested = Nested {
+            nullable: true,
+            type_variation_reference: 0,
+            nested_type: Some(NestedType::List(List { values: vec![] })),
+        };
+
+        let expr = from_nested(&consumer, &nested, &schema).await?;
+        assert_eq!(format!("{expr}"), "make_array()");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn nested_list_missing_type() -> datafusion::common::Result<()> {

Review Comment:
   minor: Your naming in this test made me think it was about the _list_ 
missing a type, when actually it's about the `Nested` message not having 
`nested_type` set.
   
   ```suggestion
       async fn nested_missing_type() -> datafusion::common::Result<()> {
   ```



##########
datafusion/substrait/src/logical_plan/consumer/expr/nested.rs:
##########
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::logical_plan::consumer::SubstraitConsumer;
+use datafusion::common::{DFSchema, not_impl_err, substrait_err};
+use datafusion::execution::FunctionRegistry;
+use datafusion::logical_expr::Expr;
+use substrait::proto::expression::Nested;
+use substrait::proto::expression::nested::NestedType;
+
+/// Converts a Substrait [Nested] expression into a DataFusion [Expr].
+///
+/// Substrait Nested expressions represent complex type constructors (list, 
struct, map)
+/// where elements are full expressions rather than just literals. This is 
used by
+/// producers that emit `Nested { list: ... }` for array construction, as 
opposed to
+/// `Literal { list: ... }` which only supports scalar values.
+pub async fn from_nested(
+    consumer: &impl SubstraitConsumer,
+    nested: &Nested,
+    input_schema: &DFSchema,
+) -> datafusion::common::Result<Expr> {
+    let Some(nested_type) = &nested.nested_type else {
+        return substrait_err!("Nested expression requires a nested_type");
+    };
+
+    match nested_type {
+        NestedType::List(list) => {
+            let mut args = Vec::with_capacity(list.values.len());
+            for value in &list.values {
+                args.push(consumer.consume_expression(value, 
input_schema).await?);
+            }
+
+            let make_array_udf = 
consumer.get_function_registry().udf("make_array")?;
+            Ok(Expr::ScalarFunction(
+                datafusion::logical_expr::expr::ScalarFunction::new_udf(
+                    make_array_udf,
+                    args,
+                ),
+            ))
+        }
+        NestedType::Struct(_) => {
+            not_impl_err!("Nested struct expressions are not yet supported")
+        }
+        NestedType::Map(_) => {
+            not_impl_err!("Nested map expressions are not yet supported")
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::logical_plan::consumer::utils::tests::test_consumer;
+    use substrait::proto::expression::Literal;
+    use substrait::proto::expression::nested::List;
+    use substrait::proto::{self, Expression};
+
+    fn make_i64_literal(value: i64) -> Expression {
+        Expression {
+            rex_type: Some(proto::expression::RexType::Literal(Literal {
+                nullable: false,
+                type_variation_reference: 0,
+                literal_type: 
Some(proto::expression::literal::LiteralType::I64(value)),
+            })),
+        }
+    }
+
+    #[tokio::test]
+    async fn nested_list_with_literals() -> datafusion::common::Result<()> {
+        let consumer = test_consumer();
+        let schema = DFSchema::empty();
+        let nested = Nested {
+            nullable: false,
+            type_variation_reference: 0,
+            nested_type: Some(NestedType::List(List {
+                values: vec![
+                    make_i64_literal(1),
+                    make_i64_literal(2),
+                    make_i64_literal(3),
+                ],
+            })),
+        };
+
+        let expr = from_nested(&consumer, &nested, &schema).await?;
+        assert_eq!(
+            format!("{expr}"),
+            "make_array(Int64(1), Int64(2), Int64(3))"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn nested_list_empty() -> datafusion::common::Result<()> {
+        let consumer = test_consumer();
+        let schema = DFSchema::empty();
+        let nested = Nested {
+            nullable: true,
+            type_variation_reference: 0,
+            nested_type: Some(NestedType::List(List { values: vec![] })),
+        };
+
+        let expr = from_nested(&consumer, &nested, &schema).await?;

Review Comment:
   I think we might want to reject empty nested lists actual
   >    // A homogeneously-typed list of one or more expressions that form the
         // list entries. To specify an empty list, use Literal.empty_list
         // (otherwise type information would be missing).
   
[from](https://github.com/substrait-io/substrait/blob/150d86d10ce10384685513715c78d63a9a6c1e37/proto/substrait/algebra.proto#L1185-L1189)
   
   If the list is empty, we don't have a type for it's elements. I don't know 
how that works for Arrow, but that type is malformed in Substrait.
   



##########
datafusion/substrait/tests/testdata/test_plans/nested_list_expressions.substrait.json:
##########
@@ -0,0 +1,92 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": 
"https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml";
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 0,
+        "name": "add:i32_i32"
+      }

Review Comment:
   You included a function in this plan, but it's never used. Did you intend to 
use it when constructing the list below?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to