This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 43d0ff987 [VL] Allow udf type conversion (#6660)
43d0ff987 is described below

commit 43d0ff987fb526a885264f0efc252654c450a09f
Author: Rong Ma <[email protected]>
AuthorDate: Tue Aug 6 15:45:35 2024 +0800

    [VL] Allow udf type conversion (#6660)
---
 .../gluten/backendsapi/velox/VeloxBackend.scala    |   1 +
 .../apache/spark/sql/expression/UDFResolver.scala  | 135 ++++++++++++++++-----
 .../apache/gluten/expression/VeloxUdfSuite.scala   |  27 +++++
 cpp/velox/jni/JniUdf.cc                            |  22 +++-
 cpp/velox/udf/Udaf.h                               |   1 +
 cpp/velox/udf/Udf.h                                |   1 +
 cpp/velox/udf/UdfLoader.cc                         |   7 +-
 cpp/velox/udf/UdfLoader.h                          |  20 ++-
 cpp/velox/udf/examples/MyUDF.cc                    |  25 ++++
 docs/developers/VeloxUDF.md                        |  26 ++--
 10 files changed, 216 insertions(+), 49 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 0eb612687..33efdbc5e 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -66,6 +66,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
   val GLUTEN_VELOX_UDF_LIB_PATHS = getBackendConfigPrefix() + 
".udfLibraryPaths"
   val GLUTEN_VELOX_DRIVER_UDF_LIB_PATHS = getBackendConfigPrefix() + 
".driver.udfLibraryPaths"
   val GLUTEN_VELOX_INTERNAL_UDF_LIB_PATHS = getBackendConfigPrefix() + 
".internal.udfLibraryPaths"
+  val GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION = getBackendConfigPrefix() + 
".udfAllowTypeConversion"
 
   val MAXIMUM_BATCH_SIZE: Int = 32768
 
diff --git 
a/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
 
b/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
index 99f9faf99..a2b6d5259 100644
--- 
a/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
+++ 
b/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
@@ -27,11 +27,12 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, 
Expression, ExpressionInfo, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, 
Expression, ExpressionInfo, Unevaluable}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
ExprCode}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 import org.apache.spark.util.Utils
 
@@ -74,18 +75,21 @@ trait UDFSignatureBase {
   val expressionType: ExpressionType
   val children: Seq[DataType]
   val variableArity: Boolean
+  val allowTypeConversion: Boolean
 }
 
 case class UDFSignature(
     expressionType: ExpressionType,
     children: Seq[DataType],
-    variableArity: Boolean)
+    variableArity: Boolean,
+    allowTypeConversion: Boolean)
   extends UDFSignatureBase
 
 case class UDAFSignature(
     expressionType: ExpressionType,
     children: Seq[DataType],
     variableArity: Boolean,
+    allowTypeConversion: Boolean,
     intermediateAttrs: Seq[AttributeReference])
   extends UDFSignatureBase
 
@@ -130,26 +134,30 @@ object UDFResolver extends Logging {
       name: String,
       returnType: Array[Byte],
       argTypes: Array[Byte],
-      variableArity: Boolean): Unit = {
+      variableArity: Boolean,
+      allowTypeConversion: Boolean): Unit = {
     registerUDF(
       name,
       ConverterUtils.parseFromBytes(returnType),
       ConverterUtils.parseFromBytes(argTypes),
-      variableArity)
+      variableArity,
+      allowTypeConversion)
   }
 
   private def registerUDF(
       name: String,
       returnType: ExpressionType,
       argTypes: ExpressionType,
-      variableArity: Boolean): Unit = {
+      variableArity: Boolean,
+      allowTypeConversion: Boolean): Unit = {
     assert(argTypes.dataType.isInstanceOf[StructType])
     val v =
       UDFMap.getOrElseUpdate(name, mutable.MutableList[UDFSignature]())
     v += UDFSignature(
       returnType,
       argTypes.dataType.asInstanceOf[StructType].fields.map(_.dataType),
-      variableArity)
+      variableArity,
+      allowTypeConversion)
     UDFNames += name
     logInfo(s"Registered UDF: $name($argTypes) -> $returnType")
   }
@@ -159,13 +167,15 @@ object UDFResolver extends Logging {
       returnType: Array[Byte],
       argTypes: Array[Byte],
       intermediateTypes: Array[Byte],
-      variableArity: Boolean): Unit = {
+      variableArity: Boolean,
+      enableTypeConversion: Boolean): Unit = {
     registerUDAF(
       name,
       ConverterUtils.parseFromBytes(returnType),
       ConverterUtils.parseFromBytes(argTypes),
       ConverterUtils.parseFromBytes(intermediateTypes),
-      variableArity
+      variableArity,
+      enableTypeConversion
     )
   }
 
@@ -174,7 +184,8 @@ object UDFResolver extends Logging {
       returnType: ExpressionType,
       argTypes: ExpressionType,
       intermediateTypes: ExpressionType,
-      variableArity: Boolean): Unit = {
+      variableArity: Boolean,
+      allowTypeConversion: Boolean): Unit = {
     assert(argTypes.dataType.isInstanceOf[StructType])
 
     val aggBufferAttributes: Seq[AttributeReference] =
@@ -194,6 +205,7 @@ object UDFResolver extends Logging {
       returnType,
       argTypes.dataType.asInstanceOf[StructType].fields.map(_.dataType),
       variableArity,
+      allowTypeConversion,
       aggBufferAttributes)
     UDAFNames += name
     logInfo(s"Registered UDAF: $name($argTypes) -> $returnType")
@@ -346,16 +358,27 @@ object UDFResolver extends Logging {
     }
   }
 
+  private def checkAllowTypeConversion: Boolean = {
+    SQLConf.get
+      
.getConfString(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION, 
"false")
+      .toBoolean
+  }
+
   private def getUdfExpression(name: String)(children: Seq[Expression]) = {
     def errorMessage: String =
       s"UDF $name -> ${children.map(_.dataType.simpleString).mkString(", ")} 
is not registered."
 
+    val allowTypeConversion = checkAllowTypeConversion
     val signatures =
       UDFMap.getOrElse(name, throw new 
UnsupportedOperationException(errorMessage));
-
-    signatures.find(sig => tryBind(sig, children.map(_.dataType))) match {
+    signatures.find(sig => tryBind(sig, children.map(_.dataType), 
allowTypeConversion)) match {
       case Some(sig) =>
-        UDFExpression(name, sig.expressionType.dataType, 
sig.expressionType.nullable, children)
+        UDFExpression(
+          name,
+          sig.expressionType.dataType,
+          sig.expressionType.nullable,
+          if (!allowTypeConversion && !sig.allowTypeConversion) children
+          else applyCast(children, sig))
       case None =>
         throw new UnsupportedOperationException(errorMessage)
     }
@@ -365,50 +388,77 @@ object UDFResolver extends Logging {
     def errorMessage: String =
       s"UDAF $name -> ${children.map(_.dataType.simpleString).mkString(", ")} 
is not registered."
 
+    val allowTypeConversion = checkAllowTypeConversion
     val signatures =
       UDAFMap.getOrElse(
         name,
         throw new UnsupportedOperationException(errorMessage)
       )
-
-    signatures.find(sig => tryBind(sig, children.map(_.dataType))) match {
+    signatures.find(sig => tryBind(sig, children.map(_.dataType), 
allowTypeConversion)) match {
       case Some(sig) =>
         UserDefinedAggregateFunction(
           name,
           sig.expressionType.dataType,
           sig.expressionType.nullable,
-          children,
-          sig.intermediateAttrs)
+          if (!allowTypeConversion && !sig.allowTypeConversion) children
+          else applyCast(children, sig),
+          sig.intermediateAttrs
+        )
       case None =>
         throw new UnsupportedOperationException(errorMessage)
     }
   }
 
+  private def tryBind(
+      sig: UDFSignatureBase,
+      requiredDataTypes: Seq[DataType],
+      allowTypeConversion: Boolean): Boolean = {
+    if (
+      !tryBindStrict(sig, requiredDataTypes) && (allowTypeConversion || 
sig.allowTypeConversion)
+    ) {
+      tryBindWithTypeConversion(sig, requiredDataTypes)
+    } else {
+      true
+    }
+  }
+
   // Returns true if required data types match the function signature.
   // If the function signature is variable arity, the number of the last 
argument can be zero
   // or more.
-  private def tryBind(sig: UDFSignatureBase, requiredDataTypes: 
Seq[DataType]): Boolean = {
+  private def tryBindWithTypeConversion(
+      sig: UDFSignatureBase,
+      requiredDataTypes: Seq[DataType]): Boolean = {
+    tryBind0(sig, requiredDataTypes, Cast.canCast)
+  }
+
+  private def tryBindStrict(sig: UDFSignatureBase, requiredDataTypes: 
Seq[DataType]): Boolean = {
+    tryBind0(sig, requiredDataTypes, DataTypeUtils.sameType)
+  }
+
+  private def tryBind0(
+      sig: UDFSignatureBase,
+      requiredDataTypes: Seq[DataType],
+      checkType: (DataType, DataType) => Boolean): Boolean = {
     if (!sig.variableArity) {
       sig.children.size == requiredDataTypes.size &&
-      sig.children
-        .zip(requiredDataTypes)
-        .forall { case (candidate, required) => 
DataTypeUtils.sameType(candidate, required) }
+      requiredDataTypes
+        .zip(sig.children)
+        .forall { case (required, candidate) => checkType(required, candidate) 
}
     } else {
       // If variableArity is true, there must be at least one argument in the 
signature.
       if (requiredDataTypes.size < sig.children.size - 1) {
         false
       } else if (requiredDataTypes.size == sig.children.size - 1) {
-        sig.children
-          .dropRight(1)
-          .zip(requiredDataTypes)
-          .forall { case (candidate, required) => 
DataTypeUtils.sameType(candidate, required) }
+        requiredDataTypes
+          .zip(sig.children.dropRight(1))
+          .forall { case (required, candidate) => checkType(required, 
candidate) }
       } else {
         val varArgStartIndex = sig.children.size - 1
         // First check all var args has the same type with the last argument 
of the signature.
         if (
           !requiredDataTypes
             .drop(varArgStartIndex)
-            .forall(argType => DataTypeUtils.sameType(sig.children.last, 
argType))
+            .forall(argType => checkType(argType, sig.children.last))
         ) {
           false
         } else if (varArgStartIndex == 0) {
@@ -416,11 +466,38 @@ object UDFResolver extends Logging {
           true
         } else {
           // Whether fixed args matches.
-          sig.children
-            .dropRight(1)
-            .zip(requiredDataTypes.dropRight(1 + requiredDataTypes.size - 
sig.children.size))
-            .forall { case (candidate, required) => 
DataTypeUtils.sameType(candidate, required) }
+          requiredDataTypes
+            .dropRight(1 + requiredDataTypes.size - sig.children.size)
+            .zip(sig.children.dropRight(1))
+            .forall { case (required, candidate) => checkType(required, 
candidate) }
+        }
+      }
+    }
+  }
+
+  private def applyCast(children: Seq[Expression], sig: UDFSignatureBase): 
Seq[Expression] = {
+    def maybeCast(expr: Expression, toType: DataType): Expression = {
+      if (!expr.dataType.sameType(toType)) {
+        Cast(expr, toType)
+      } else {
+        expr
+      }
+    }
+
+    if (!sig.variableArity) {
+      children.zip(sig.children).map { case (expr, toType) => maybeCast(expr, 
toType) }
+    } else {
+      val fixedArgs = Math.min(children.size, sig.children.size)
+      val newChildren = 
children.take(fixedArgs).zip(sig.children.take(fixedArgs)).map {
+        case (expr, toType) => maybeCast(expr, toType)
+      }
+      if (children.size > sig.children.size) {
+        val varArgType = sig.children.last
+        newChildren ++ children.takeRight(children.size - 
sig.children.size).map {
+          expr => maybeCast(expr, varArgType)
         }
+      } else {
+        newChildren
       }
     }
   }
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
index 534a8d9f1..008337b94 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.gluten.expression
 
+import org.apache.gluten.backendsapi.velox.VeloxBackendSettings
 import org.apache.gluten.tags.{SkipTestTags, UDFTest}
 
 import org.apache.spark.SparkConf
@@ -88,6 +89,23 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with 
SQLHelper {
         .sameElements(Array(Row(105L, 6, 6L, 5, 6, 11, 6L, 11L, 
Date.valueOf("2024-03-30")))))
   }
 
+  test("test udf allow type conversion") {
+    withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION -> 
"true") {
+      val df = spark.sql("""select myudf1("100"), myudf1(1), 
mydate('2024-03-25', 5)""")
+      assert(
+        df.collect()
+          .sameElements(Array(Row(105L, 6L, Date.valueOf("2024-03-30")))))
+    }
+
+    withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION -> 
"false") {
+      assert(
+        spark
+          .sql("select mydate2('2024-03-25', 5)")
+          .collect()
+          .sameElements(Array(Row(Date.valueOf("2024-03-30")))))
+    }
+  }
+
   test("test udaf") {
     val df = spark.sql("""select
                          |  myavg(1),
@@ -101,6 +119,15 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with 
SQLHelper {
       df.collect()
         .sameElements(Array(Row(1.0, 1.0, 1.0, 1.0, 1L))))
   }
+
+  test("test udaf allow type conversion") {
+    withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION -> 
"true") {
+      val df = spark.sql("""select myavg("1"), myavg("1.0"), 
mycount_if("true")""")
+      assert(
+        df.collect()
+          .sameElements(Array(Row(1.0, 1.0, 1L))))
+    }
+  }
 }
 
 @UDFTest
diff --git a/cpp/velox/jni/JniUdf.cc b/cpp/velox/jni/JniUdf.cc
index cab90b325..8230724f1 100644
--- a/cpp/velox/jni/JniUdf.cc
+++ b/cpp/velox/jni/JniUdf.cc
@@ -41,8 +41,8 @@ void gluten::initVeloxJniUDF(JNIEnv* env) {
   udfResolverClass = createGlobalClassReferenceOrError(env, 
kUdfResolverClassPath.c_str());
 
   // methods
-  registerUDFMethod = getMethodIdOrError(env, udfResolverClass, "registerUDF", 
"(Ljava/lang/String;[B[BZ)V");
-  registerUDAFMethod = getMethodIdOrError(env, udfResolverClass, 
"registerUDAF", "(Ljava/lang/String;[B[B[BZ)V");
+  registerUDFMethod = getMethodIdOrError(env, udfResolverClass, "registerUDF", 
"(Ljava/lang/String;[B[BZZ)V");
+  registerUDAFMethod = getMethodIdOrError(env, udfResolverClass, 
"registerUDAF", "(Ljava/lang/String;[B[B[BZZ)V");
 }
 
 void gluten::finalizeVeloxJniUDF(JNIEnv* env) {
@@ -71,9 +71,23 @@ void gluten::jniGetFunctionSignatures(JNIEnv* env) {
           signature->intermediateType.length(),
           reinterpret_cast<const jbyte*>(signature->intermediateType.c_str()));
       env->CallVoidMethod(
-          instance, registerUDAFMethod, name, returnType, argTypes, 
intermediateType, signature->variableArity);
+          instance,
+          registerUDAFMethod,
+          name,
+          returnType,
+          argTypes,
+          intermediateType,
+          signature->variableArity,
+          signature->allowTypeConversion);
     } else {
-      env->CallVoidMethod(instance, registerUDFMethod, name, returnType, 
argTypes, signature->variableArity);
+      env->CallVoidMethod(
+          instance,
+          registerUDFMethod,
+          name,
+          returnType,
+          argTypes,
+          signature->variableArity,
+          signature->allowTypeConversion);
     }
     checkException(env);
   }
diff --git a/cpp/velox/udf/Udaf.h b/cpp/velox/udf/Udaf.h
index 2f292fbc6..4555bdfdf 100644
--- a/cpp/velox/udf/Udaf.h
+++ b/cpp/velox/udf/Udaf.h
@@ -28,6 +28,7 @@ struct UdafEntry {
 
   const char* intermediateType{nullptr};
   bool variableArity{false};
+  bool allowTypeConversion{false};
 };
 
 #define GLUTEN_GET_NUM_UDAF getNumUdaf
diff --git a/cpp/velox/udf/Udf.h b/cpp/velox/udf/Udf.h
index a32bdaefe..e0b3a7000 100644
--- a/cpp/velox/udf/Udf.h
+++ b/cpp/velox/udf/Udf.h
@@ -27,6 +27,7 @@ struct UdfEntry {
   const char** argTypes;
 
   bool variableArity{false};
+  bool allowTypeConversion{false};
 };
 
 #define GLUTEN_GET_NUM_UDF getNumUdf
diff --git a/cpp/velox/udf/UdfLoader.cc b/cpp/velox/udf/UdfLoader.cc
index 02aa410a9..8a9918166 100644
--- a/cpp/velox/udf/UdfLoader.cc
+++ b/cpp/velox/udf/UdfLoader.cc
@@ -86,7 +86,8 @@ std::unordered_set<std::shared_ptr<UdfLoader::UdfSignature>> 
UdfLoader::getRegis
         const auto& entry = udfEntries[i];
         auto dataType = toSubstraitTypeStr(entry.dataType);
         auto argTypes = toSubstraitTypeStr(entry.numArgs, entry.argTypes);
-        signatures_.insert(std::make_shared<UdfSignature>(entry.name, 
dataType, argTypes, entry.variableArity));
+        signatures_.insert(std::make_shared<UdfSignature>(
+            entry.name, dataType, argTypes, entry.variableArity, 
entry.allowTypeConversion));
       }
       free(udfEntries);
     } else {
@@ -110,8 +111,8 @@ 
std::unordered_set<std::shared_ptr<UdfLoader::UdfSignature>> UdfLoader::getRegis
         auto dataType = toSubstraitTypeStr(entry.dataType);
         auto argTypes = toSubstraitTypeStr(entry.numArgs, entry.argTypes);
         auto intermediateType = toSubstraitTypeStr(entry.intermediateType);
-        signatures_.insert(
-            std::make_shared<UdfSignature>(entry.name, dataType, argTypes, 
intermediateType, entry.variableArity));
+        signatures_.insert(std::make_shared<UdfSignature>(
+            entry.name, dataType, argTypes, intermediateType, 
entry.variableArity, entry.allowTypeConversion));
       }
       free(udafEntries);
     } else {
diff --git a/cpp/velox/udf/UdfLoader.h b/cpp/velox/udf/UdfLoader.h
index 2783beb85..51264e67c 100644
--- a/cpp/velox/udf/UdfLoader.h
+++ b/cpp/velox/udf/UdfLoader.h
@@ -37,21 +37,33 @@ class UdfLoader {
     std::string intermediateType{};
 
     bool variableArity;
+    bool allowTypeConversion;
 
-    UdfSignature(std::string name, std::string returnType, std::string 
argTypes, bool variableArity)
-        : name(name), returnType(returnType), argTypes(argTypes), 
variableArity(variableArity) {}
+    UdfSignature(
+        std::string name,
+        std::string returnType,
+        std::string argTypes,
+        bool variableArity,
+        bool allowTypeConversion)
+        : name(name),
+          returnType(returnType),
+          argTypes(argTypes),
+          variableArity(variableArity),
+          allowTypeConversion(allowTypeConversion) {}
 
     UdfSignature(
         std::string name,
         std::string returnType,
         std::string argTypes,
         std::string intermediateType,
-        bool variableArity)
+        bool variableArity,
+        bool allowTypeConversion)
         : name(name),
           returnType(returnType),
           argTypes(argTypes),
           intermediateType(intermediateType),
-          variableArity(variableArity) {}
+          variableArity(variableArity),
+          allowTypeConversion(allowTypeConversion) {}
 
     ~UdfSignature() = default;
   };
diff --git a/cpp/velox/udf/examples/MyUDF.cc b/cpp/velox/udf/examples/MyUDF.cc
index ee20ca39d..db1c5d770 100644
--- a/cpp/velox/udf/examples/MyUDF.cc
+++ b/cpp/velox/udf/examples/MyUDF.cc
@@ -222,6 +222,30 @@ class MyDateRegisterer final : public 
gluten::UdfRegisterer {
   const std::string name_ = "mydate";
   const char* myDateArg_[2] = {kDate, kInteger};
 };
+
+// name: mydate
+// signatures:
+//    date, integer -> bigint
+// type: SimpleFunction
+// enable type conversion
+class MyDate2Registerer final : public gluten::UdfRegisterer {
+ public:
+  int getNumUdf() override {
+    return 1;
+  }
+
+  void populateUdfEntries(int& index, gluten::UdfEntry* udfEntries) override {
+    udfEntries[index++] = {name_.c_str(), kDate, 2, myDateArg_, false, true};
+  }
+
+  void registerSignatures() override {
+    facebook::velox::registerFunction<mydate::MyDateSimpleFunction, Date, 
Date, int32_t>({name_});
+  }
+
+ private:
+  const std::string name_ = "mydate2";
+  const char* myDateArg_[2] = {kDate, kInteger};
+};
 } // namespace mydate
 
 std::vector<std::shared_ptr<gluten::UdfRegisterer>>& globalRegisters() {
@@ -239,6 +263,7 @@ void setupRegisterers() {
   registerers.push_back(std::make_shared<myudf::MyUdf2Registerer>());
   registerers.push_back(std::make_shared<myudf::MyUdf3Registerer>());
   registerers.push_back(std::make_shared<mydate::MyDateRegisterer>());
+  registerers.push_back(std::make_shared<mydate::MyDate2Registerer>());
   inited = true;
 }
 } // namespace
diff --git a/docs/developers/VeloxUDF.md b/docs/developers/VeloxUDF.md
index c896fd672..25b896929 100644
--- a/docs/developers/VeloxUDF.md
+++ b/docs/developers/VeloxUDF.md
@@ -21,18 +21,18 @@ The following steps demonstrate how to set up a UDF library 
project:
 - **Implement the Interface Functions:**
   Implement the following interface functions that integrate UDF into Project 
Gluten:
 
-  - `getNumUdf()`:
-    This function should return the number of UDF in the library.
-    This is used to allocating udfEntries array as the argument for the next 
function `getUdfEntries`.
+    - `getNumUdf()`:
+      This function should return the number of UDF in the library.
+      This is used to allocating udfEntries array as the argument for the next 
function `getUdfEntries`.
 
-  - `getUdfEntries(gluten::UdfEntry* udfEntries)`:
-    This function should populate the provided udfEntries array with the 
details of the UDF, including function names and signatures.
+    - `getUdfEntries(gluten::UdfEntry* udfEntries)`:
+      This function should populate the provided udfEntries array with the 
details of the UDF, including function names and signatures.
 
-  - `registerUdf()`:
-    This function is called to register the UDF to Velox function registry.
-    This is where users should register functions by calling 
`facebook::velox::exec::registerVecotorFunction` or other Velox APIs.
+    - `registerUdf()`:
+      This function is called to register the UDF to Velox function registry.
+      This is where users should register functions by calling 
`facebook::velox::exec::registerVecotorFunction` or other Velox APIs.
 
-  - The interface functions are mapped to marcos in 
[Udf.h](../../cpp/velox/udf/Udf.h). Here's an example of how to implement these 
functions:
+    - The interface functions are mapped to marcos in 
[Udf.h](../../cpp/velox/udf/Udf.h). Here's an example of how to implement these 
functions:
 
   ```
   // Filename MyUDF.cc
@@ -176,6 +176,14 @@ The output from spark-shell will be like
 +------------------+----------------+
 ```
 
+## Configurations
+
+| Parameters                                                     | Description 
                                                                                
                |
+|----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------|
+| spark.gluten.sql.columnar.backend.velox.udfLibraryPaths        | Path to the 
udf/udaf libraries.                                                             
                |
+| spark.gluten.sql.columnar.backend.velox.driver.udfLibraryPaths | Path to the 
udf/udaf libraries on driver node. Only applicable on yarn-client mode.         
                |
+| spark.gluten.sql.columnar.backend.velox.udfAllowTypeConversion | Whether to 
inject possible `cast` to convert mismatched data types from input to one 
registered signatures. |
+
 # Pandas UDFs (a.k.a. Vectorized UDFs)
 
 ## Introduction


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to