alamb commented on code in PR #10075:
URL: 
https://github.com/apache/arrow-datafusion/pull/10075#discussion_r1564773570


##########
datafusion/proto/src/physical_plan/from_proto.rs:
##########
@@ -76,9 +76,10 @@ impl From<&protobuf::PhysicalColumn> for Column {
 /// # Arguments
 ///
 /// * `proto` - Input proto with physical sort expression node
-/// * `registry` - A registry knows how to build logical expressions out of 
user-defined function' names
+/// * `registry` - A registry knows how to build logical expressions out of 
user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining 
expression data types
 ///                    when performing type coercion.
+/// * `codec` - An extension codec used to decode custom UDFs.

Review Comment:
   thank you for improving the comments here as well ❤️ 



##########
datafusion/proto/tests/cases/roundtrip_physical_plan.rs:
##########
@@ -747,32 +737,52 @@ fn roundtrip_scalar_udf_extension_codec() {
         }
     }
 
+    let field_text = Field::new("text", DataType::Utf8, true);
+    let field_published = Field::new("published", DataType::Boolean, false);
+    let field_author = Field::new("author", DataType::Utf8, false);
+    let schema = Arc::new(Schema::new(vec![field_text, field_published, 
field_author]));
+    let input = Arc::new(EmptyExec::new(schema.clone()));
+
     let pattern = ".*";
     let udf = ScalarUDF::from(MyRegexUdf::new(pattern.to_string()));
-    let test_expr = ScalarFunctionExpr::new(
+    let udf_expr = Arc::new(ScalarFunctionExpr::new(
         udf.name(),
         ScalarFunctionDefinition::UDF(Arc::new(udf.clone())),
-        vec![],
-        DataType::Int32,
+        vec![col("text", &schema).expect("text")],
+        DataType::Boolean,
         None,
         false,
+    ));
+
+    let filter = Arc::new(
+        FilterExec::try_new(
+            Arc::new(BinaryExpr::new(
+                col("published", &schema).expect("published"),
+                Operator::And,
+                udf_expr.clone(),
+            )),
+            input,
+        )
+        .expect("filter"),
     );
-    let fmt_expr = format!("{test_expr:?}");
-    let ctx = SessionContext::new();
 
-    ctx.register_udf(udf.clone());
-    let extension_codec = ScalarUDFExtensionCodec {};
-    let proto: protobuf::PhysicalExprNode =
-        match serialize_physical_expr(Arc::new(test_expr), &extension_codec) {
-            Ok(proto) => proto,
-            Err(e) => panic!("failed to serialize expr: {e:?}"),
-        };
-    let field_a = Field::new("a", DataType::Int32, false);
-    let schema = Arc::new(Schema::new(vec![field_a]));
-    let round_trip =
-        parse_physical_expr(&proto, &ctx, &schema, &extension_codec).unwrap();
-    assert_eq!(fmt_expr, format!("{round_trip:?}"));
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            PhysicalGroupBy::new(vec![], vec![], vec![]),
+            vec![Arc::new(Count::new(udf_expr, "count", DataType::Boolean))],
+            vec![None],
+            filter,
+            schema.clone(),
+        )
+        .expect("aggregate"),
+    );
+
+    let ctx = SessionContext::new();
+    let codec = ScalarUDFExtensionCodec {};
+    roundtrip_test_and_return(aggregate, &ctx, &codec).unwrap();
 }
+
 #[test]
 fn roundtrip_distinct_count() -> Result<()> {

Review Comment:
   it might be worth a test for round tripping window functions with a user 
defined function too



##########
datafusion/proto/src/physical_plan/from_proto.rs:
##########
@@ -123,25 +125,39 @@ pub fn parse_physical_sort_exprs(
 ///
 /// # Arguments
 ///
-/// * `proto` - Input proto with physical window exprression node.
+/// * `proto` - Input proto with physical window expression node.
 /// * `name` - Name of the window expression.
-/// * `registry` - A registry knows how to build logical expressions out of 
user-defined function' names
+/// * `registry` - A registry knows how to build logical expressions out of 
user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining 
expression data types
 ///                    when performing type coercion.
 pub fn parse_physical_window_expr(
     proto: &protobuf::PhysicalWindowExprNode,
     registry: &dyn FunctionRegistry,
     input_schema: &Schema,
 ) -> Result<Arc<dyn WindowExpr>> {
-    let codec = DefaultPhysicalExtensionCodec {};
+    parse_physical_window_expr_ext(
+        proto,
+        registry,
+        input_schema,
+        &DefaultPhysicalExtensionCodec {},
+    )
+}
+
+// TODO: Make this the public function on next major release.

Review Comment:
   I think what is on main will effectively become the next major release 
(`38.0.0`) so there is no reason to avoid API changes here.
   
   If we want to backport this to a stable `37.1.0` release (e.g. #9904 ) then 
we should avoid API changes



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to