Copilot commented on code in PR #22705:
URL: https://github.com/apache/datafusion/pull/22705#discussion_r3338905908


##########
datafusion/ffi/src/udwf/expression_args.rs:
##########
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::ffi::FFI_ArrowSchema;
+use arrow_schema::FieldRef;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::function::ExpressionArgs;
+use datafusion_physical_expr::PhysicalExpr;
+use stabby::vec::Vec as SVec;
+
+use crate::arrow_wrappers::WrappedSchema;
+use crate::physical_expr::FFI_PhysicalExpr;
+use crate::util::rvec_wrapped_to_vec_fieldref;
+
+/// A stable struct for sharing [`ExpressionArgs`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug)]
+pub struct FFI_ExpressionArgs {
+    input_exprs: SVec<FFI_PhysicalExpr>,
+    input_fields: SVec<WrappedSchema>,
+}
+
+impl TryFrom<ExpressionArgs<'_>> for FFI_ExpressionArgs {
+    type Error = DataFusionError;
+
+    fn try_from(args: ExpressionArgs) -> Result<Self, DataFusionError> {

Review Comment:
   `datafusion_common::Result` is a type alias that takes a single generic 
parameter (the `Ok` type). `Result<Self, DataFusionError>` will not compile. 
Use `fn try_from(...) -> Result<Self>` (or `std::result::Result<Self, 
DataFusionError>` if you want the explicit form).



##########
datafusion/ffi/src/udwf/mod.rs:
##########
@@ -358,6 +473,75 @@ impl WindowUDFImpl for ForeignWindowUDF {
         }
     }
 
+    fn documentation(&self) -> Option<&Documentation> {
+        unsafe {
+            let ptr = (self.udf.documentation)(&self.udf);
+            ptr.as_ref()
+        }
+    }
+
+    fn expressions(
+        &self,
+        expr_args: datafusion_expr::function::ExpressionArgs,
+    ) -> Vec<Arc<dyn PhysicalExpr>> {
+        unsafe {
+            let args = FFI_ExpressionArgs::try_from(expr_args).unwrap();
+            (self.udf.expressions)(&self.udf, args)
+                .into_iter()
+                .map(|e| Arc::<dyn PhysicalExpr>::from(&e))
+                .collect()
+        }
+    }

Review Comment:
   This `unwrap()` can also panic, and this method can be reached in normal 
query planning/execution (not just tests). Even though this is Rust-to-Rust in 
many deployments, panicking here makes the FFI wrapper brittle. Prefer handling 
the conversion error explicitly (e.g., fall back to returning 
`expr_args.input_exprs().to_vec()` or an empty vec), or redesign the FFI 
`expressions` callback to return `FFI_Result`.



##########
datafusion/ffi/src/udwf/mod.rs:
##########
@@ -358,6 +473,75 @@ impl WindowUDFImpl for ForeignWindowUDF {
         }
     }
 
+    fn documentation(&self) -> Option<&Documentation> {
+        unsafe {
+            let ptr = (self.udf.documentation)(&self.udf);
+            ptr.as_ref()
+        }
+    }
+
+    fn expressions(
+        &self,
+        expr_args: datafusion_expr::function::ExpressionArgs,
+    ) -> Vec<Arc<dyn PhysicalExpr>> {
+        unsafe {
+            let args = FFI_ExpressionArgs::try_from(expr_args).unwrap();
+            (self.udf.expressions)(&self.udf, args)
+                .into_iter()
+                .map(|e| Arc::<dyn PhysicalExpr>::from(&e))
+                .collect()
+        }
+    }
+
+    fn simplify(&self) -> Option<WindowFunctionSimplification> {
+        let udf = self.udf.clone();
+        Some(Box::new(move |wf, info| {

Review Comment:
   `ForeignWindowUDF::simplify` always returns `Some(...)`, even when the 
underlying UDWF has no simplifier (it then roundtrips through FFI and returns 
the original expr). This introduces avoidable serialization/FFI overhead for 
every simplify call. Consider adding an explicit capability signal to the FFI 
struct (e.g., `has_simplify: bool` or making the function pointer optional via 
an `FFI_Option<fn(...)>`) so `simplify()` can return `None` when unsupported.



##########
datafusion/ffi/src/udwf/mod.rs:
##########
@@ -68,17 +76,29 @@ pub struct FFI_WindowUDF {
     )
         -> FFI_Result<FFI_PartitionEvaluator>,
 
-    pub field: unsafe extern "C" fn(
+pub field: unsafe extern "C" fn(
         udwf: &Self,
         input_types: SVec<WrappedSchema>,
         display_name: SString,
     ) -> FFI_Result<WrappedSchema>,
 
-    /// Performs type coercion. To simply this interface, all UDFs are treated 
as having
-    /// user defined signatures, which will in turn call coerce_types to be 
called. This
-    /// call should be transparent to most users as the internal function 
performs the
-    /// appropriate calls on the underlying [`WindowUDF`]
+    pub documentation: unsafe extern "C" fn(udwf: &Self) -> *const 
Documentation,
+
+    pub expressions: unsafe extern "C" fn(
+        udwf: &Self,
+        args: FFI_ExpressionArgs,
+    ) -> SVec<FFI_PhysicalExpr>,
+
+    pub simplify: unsafe extern "C" fn(
+        udwf: &Self,
+        window_function: SVec<u8>,
+        schema: WrappedSchema,
+    ) -> FFI_Result<FFI_Option<SVec<u8>>>,
+
+    pub reverse_expr: unsafe extern "C" fn(udwf: &Self) -> FFI_ReversedUDWF,

Review Comment:
   The new FFI callbacks are public ABI surface but don’t document key 
contracts (pointer ownership/lifetime for `documentation`, serialization 
format/codec expectations for `simplify` bytes, and whether `expressions` may 
fail / what failure behavior is expected). Adding doc comments describing these 
contracts will make the ABI safer to implement correctly in foreign libraries.



##########
datafusion/ffi/src/udwf/mod.rs:
##########
@@ -68,17 +76,29 @@ pub struct FFI_WindowUDF {
     )
         -> FFI_Result<FFI_PartitionEvaluator>,
 
-    pub field: unsafe extern "C" fn(
+pub field: unsafe extern "C" fn(
         udwf: &Self,
         input_types: SVec<WrappedSchema>,
         display_name: SString,
     ) -> FFI_Result<WrappedSchema>,
 
-    /// Performs type coercion. To simply this interface, all UDFs are treated 
as having
-    /// user defined signatures, which will in turn call coerce_types to be 
called. This
-    /// call should be transparent to most users as the internal function 
performs the
-    /// appropriate calls on the underlying [`WindowUDF`]
+    pub documentation: unsafe extern "C" fn(udwf: &Self) -> *const 
Documentation,

Review Comment:
   Returning `*const Documentation` across FFI has two concrete problems: (1) 
`Documentation` is a Rust type that is not guaranteed ABI-stable across 
crates/versions, and (2) ownership/lifetime of the returned pointer is 
undefined (who allocates/frees it; how long it remains valid). Prefer returning 
an FFI-stable representation (e.g., serialize to bytes / protobuf / JSON in an 
`SVec<u8>` with clear ownership), or introduce an FFI-safe `FFI_Documentation` 
struct and a matching release function.



##########
datafusion/ffi/src/udwf/mod.rs:
##########
@@ -155,6 +175,93 @@ unsafe extern "C" fn field_fn_wrapper(
     }
 }
 
+unsafe extern "C" fn documentation_fn_wrapper(udwf: &FFI_WindowUDF) -> *const 
Documentation {
+    unsafe {
+        let inner = udwf.inner();
+        match inner.documentation() {
+            Some(doc) => doc as *const Documentation,
+            None => std::ptr::null(),
+        }
+    }
+}
+
+unsafe extern "C" fn expressions_fn_wrapper(
+    udwf: &FFI_WindowUDF,
+    args: FFI_ExpressionArgs,
+) -> SVec<FFI_PhysicalExpr> {
+    unsafe {
+        let inner = udwf.inner();
+        let args = ForeignExpressionArgs::try_from(args).unwrap();
+        let expressions = inner.expressions((&args).into());
+        expressions.into_iter().map(FFI_PhysicalExpr::from).collect()
+    }
+}

Review Comment:
   `unwrap()` inside an `extern "C"` boundary is dangerous (a panic can unwind 
across FFI, which is UB unless the build is configured to abort on panic). 
Replace the `unwrap()` with explicit error handling (e.g., return an empty 
vector, or change the ABI to return an `FFI_Result<SVec<...>>` so failures can 
be propagated safely).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to