alamb commented on code in PR #8985:
URL: https://github.com/apache/arrow-datafusion/pull/8985#discussion_r1468154114


##########
datafusion-examples/examples/return_types_udf.rs:
##########
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_schema::{Field, Schema};
+use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
+
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use datafusion_common::{
+    internal_err, DFSchema, DataFusionError, ScalarValue, ToDFSchema,
+};
+use datafusion_expr::{
+    expr::ScalarFunction, ColumnarValue, ExprSchemable, ScalarUDF, 
ScalarUDFImpl,
+    Signature,
+};
+
+#[derive(Debug)]
+struct UDFWithExprReturn {
+    signature: Signature,
+}
+
+impl UDFWithExprReturn {
+    fn new() -> Self {
+        Self {
+            signature: Signature::any(3, Volatility::Immutable),
+        }
+    }
+}
+
+//Implement the ScalarUDFImpl trait for UDFWithExprReturn
+impl ScalarUDFImpl for UDFWithExprReturn {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "udf_with_expr_return"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Int32)

Review Comment:
   This is pretty confusing I think -- as it seems inconsistent with the 
return_type_from_exprs



##########
datafusion-examples/examples/return_types_udf.rs:
##########
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_schema::{Field, Schema};
+use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility};
+
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use datafusion_common::{
+    internal_err, DFSchema, DataFusionError, ScalarValue, ToDFSchema,
+};
+use datafusion_expr::{
+    expr::ScalarFunction, ColumnarValue, ExprSchemable, ScalarUDF, 
ScalarUDFImpl,
+    Signature,
+};
+
+#[derive(Debug)]
+struct UDFWithExprReturn {
+    signature: Signature,
+}
+
+impl UDFWithExprReturn {
+    fn new() -> Self {
+        Self {
+            signature: Signature::any(3, Volatility::Immutable),
+        }
+    }
+}
+
+//Implement the ScalarUDFImpl trait for UDFWithExprReturn
+impl ScalarUDFImpl for UDFWithExprReturn {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "udf_with_expr_return"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Int32)
+    }
+    // An example of how to use the exprs to determine the return type
+    // If the third argument is '0', return the type of the first argument
+    // If the third argument is '1', return the type of the second argument
+    fn return_type_from_exprs(
+        &self,
+        arg_exprs: &[Expr],
+        schema: &DFSchema,
+    ) -> Result<DataType> {
+        if arg_exprs.len() != 3 {
+            return internal_err!("The size of the args must be 3.");
+        }
+        let take_idx = match arg_exprs.get(2).unwrap() {
+            Expr::Literal(ScalarValue::Int64(Some(idx))) if (idx == &0 || idx 
== &1) => {
+                *idx as usize
+            }
+            _ => unreachable!(),
+        };
+        arg_exprs.get(take_idx).unwrap().get_type(schema)
+    }
+    // The actual implementation would add one to the argument
+    fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        unimplemented!()
+    }
+}
+
+#[derive(Debug)]
+struct UDFDefault {
+    signature: Signature,
+}
+
+impl UDFDefault {
+    fn new() -> Self {
+        Self {
+            signature: Signature::any(3, Volatility::Immutable),
+        }
+    }
+}
+
+// Implement the ScalarUDFImpl trait for UDFDefault
+// This is the same as UDFWithExprReturn, except without return_type_from_exprs
+impl ScalarUDFImpl for UDFDefault {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "udf_default"
+    }
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+    fn return_type(&self, _: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Boolean)
+    }
+    // The actual implementation would add one to the argument
+    fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        unimplemented!()
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {

Review Comment:
    I think this example is missing actually using the function in a query / 
dataframe. As @Weijun-H  pointed out the logic added to `ScalarUDFImpl` doesn't 
seem to be connected anywhere else 🤔 
   
   What I think the example needs to do is someething like
   1. Create a ScalarUDF
   2. register the function with a `SessionContext`
   3. Run a query that uses that function (ideally both with SQL and dataframe 
APIs)
   
   So for example, a good example function might be a function that takes a 
string argument `select my_cast(<arg>, 'string')` that converts the argument  
based on the value of the string
   
   Then for example run queries like 
   ```sql
   select my_cast(c1, 'i32'), arrow_typeof(my_cast(c1, 'i32')); -- returns 
value and DataType::Int32
   select my_cast(c1, 'i64'), arrow_typeof(my_cast(c1, 'i64')); -- returns 
value and DataType::Int64
   ```
   
   
   
   Does that make sense?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to