This is an automated email from the ASF dual-hosted git repository.
marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 43d0ff987 [VL] Allow udf type conversion (#6660)
43d0ff987 is described below
commit 43d0ff987fb526a885264f0efc252654c450a09f
Author: Rong Ma <[email protected]>
AuthorDate: Tue Aug 6 15:45:35 2024 +0800
[VL] Allow udf type conversion (#6660)
---
.../gluten/backendsapi/velox/VeloxBackend.scala | 1 +
.../apache/spark/sql/expression/UDFResolver.scala | 135 ++++++++++++++++-----
.../apache/gluten/expression/VeloxUdfSuite.scala | 27 +++++
cpp/velox/jni/JniUdf.cc | 22 +++-
cpp/velox/udf/Udaf.h | 1 +
cpp/velox/udf/Udf.h | 1 +
cpp/velox/udf/UdfLoader.cc | 7 +-
cpp/velox/udf/UdfLoader.h | 20 ++-
cpp/velox/udf/examples/MyUDF.cc | 25 ++++
docs/developers/VeloxUDF.md | 26 ++--
10 files changed, 216 insertions(+), 49 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 0eb612687..33efdbc5e 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -66,6 +66,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
val GLUTEN_VELOX_UDF_LIB_PATHS = getBackendConfigPrefix() +
".udfLibraryPaths"
val GLUTEN_VELOX_DRIVER_UDF_LIB_PATHS = getBackendConfigPrefix() +
".driver.udfLibraryPaths"
val GLUTEN_VELOX_INTERNAL_UDF_LIB_PATHS = getBackendConfigPrefix() +
".internal.udfLibraryPaths"
+ val GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION = getBackendConfigPrefix() +
".udfAllowTypeConversion"
val MAXIMUM_BATCH_SIZE: Int = 32768
diff --git
a/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
b/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
index 99f9faf99..a2b6d5259 100644
---
a/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
+++
b/backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala
@@ -27,11 +27,12 @@ import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference,
Expression, ExpressionInfo, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast,
Expression, ExpressionInfo, Unevaluable}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DataType, StructField, StructType}
import org.apache.spark.util.Utils
@@ -74,18 +75,21 @@ trait UDFSignatureBase {
val expressionType: ExpressionType
val children: Seq[DataType]
val variableArity: Boolean
+ val allowTypeConversion: Boolean
}
case class UDFSignature(
expressionType: ExpressionType,
children: Seq[DataType],
- variableArity: Boolean)
+ variableArity: Boolean,
+ allowTypeConversion: Boolean)
extends UDFSignatureBase
case class UDAFSignature(
expressionType: ExpressionType,
children: Seq[DataType],
variableArity: Boolean,
+ allowTypeConversion: Boolean,
intermediateAttrs: Seq[AttributeReference])
extends UDFSignatureBase
@@ -130,26 +134,30 @@ object UDFResolver extends Logging {
name: String,
returnType: Array[Byte],
argTypes: Array[Byte],
- variableArity: Boolean): Unit = {
+ variableArity: Boolean,
+ allowTypeConversion: Boolean): Unit = {
registerUDF(
name,
ConverterUtils.parseFromBytes(returnType),
ConverterUtils.parseFromBytes(argTypes),
- variableArity)
+ variableArity,
+ allowTypeConversion)
}
private def registerUDF(
name: String,
returnType: ExpressionType,
argTypes: ExpressionType,
- variableArity: Boolean): Unit = {
+ variableArity: Boolean,
+ allowTypeConversion: Boolean): Unit = {
assert(argTypes.dataType.isInstanceOf[StructType])
val v =
UDFMap.getOrElseUpdate(name, mutable.MutableList[UDFSignature]())
v += UDFSignature(
returnType,
argTypes.dataType.asInstanceOf[StructType].fields.map(_.dataType),
- variableArity)
+ variableArity,
+ allowTypeConversion)
UDFNames += name
logInfo(s"Registered UDF: $name($argTypes) -> $returnType")
}
@@ -159,13 +167,15 @@ object UDFResolver extends Logging {
returnType: Array[Byte],
argTypes: Array[Byte],
intermediateTypes: Array[Byte],
- variableArity: Boolean): Unit = {
+ variableArity: Boolean,
+ enableTypeConversion: Boolean): Unit = {
registerUDAF(
name,
ConverterUtils.parseFromBytes(returnType),
ConverterUtils.parseFromBytes(argTypes),
ConverterUtils.parseFromBytes(intermediateTypes),
- variableArity
+ variableArity,
+ enableTypeConversion
)
}
@@ -174,7 +184,8 @@ object UDFResolver extends Logging {
returnType: ExpressionType,
argTypes: ExpressionType,
intermediateTypes: ExpressionType,
- variableArity: Boolean): Unit = {
+ variableArity: Boolean,
+ allowTypeConversion: Boolean): Unit = {
assert(argTypes.dataType.isInstanceOf[StructType])
val aggBufferAttributes: Seq[AttributeReference] =
@@ -194,6 +205,7 @@ object UDFResolver extends Logging {
returnType,
argTypes.dataType.asInstanceOf[StructType].fields.map(_.dataType),
variableArity,
+ allowTypeConversion,
aggBufferAttributes)
UDAFNames += name
logInfo(s"Registered UDAF: $name($argTypes) -> $returnType")
@@ -346,16 +358,27 @@ object UDFResolver extends Logging {
}
}
+ private def checkAllowTypeConversion: Boolean = {
+ SQLConf.get
+
.getConfString(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION,
"false")
+ .toBoolean
+ }
+
private def getUdfExpression(name: String)(children: Seq[Expression]) = {
def errorMessage: String =
s"UDF $name -> ${children.map(_.dataType.simpleString).mkString(", ")}
is not registered."
+ val allowTypeConversion = checkAllowTypeConversion
val signatures =
UDFMap.getOrElse(name, throw new
UnsupportedOperationException(errorMessage));
-
- signatures.find(sig => tryBind(sig, children.map(_.dataType))) match {
+ signatures.find(sig => tryBind(sig, children.map(_.dataType),
allowTypeConversion)) match {
case Some(sig) =>
- UDFExpression(name, sig.expressionType.dataType,
sig.expressionType.nullable, children)
+ UDFExpression(
+ name,
+ sig.expressionType.dataType,
+ sig.expressionType.nullable,
+ if (!allowTypeConversion && !sig.allowTypeConversion) children
+ else applyCast(children, sig))
case None =>
throw new UnsupportedOperationException(errorMessage)
}
@@ -365,50 +388,77 @@ object UDFResolver extends Logging {
def errorMessage: String =
s"UDAF $name -> ${children.map(_.dataType.simpleString).mkString(", ")}
is not registered."
+ val allowTypeConversion = checkAllowTypeConversion
val signatures =
UDAFMap.getOrElse(
name,
throw new UnsupportedOperationException(errorMessage)
)
-
- signatures.find(sig => tryBind(sig, children.map(_.dataType))) match {
+ signatures.find(sig => tryBind(sig, children.map(_.dataType),
allowTypeConversion)) match {
case Some(sig) =>
UserDefinedAggregateFunction(
name,
sig.expressionType.dataType,
sig.expressionType.nullable,
- children,
- sig.intermediateAttrs)
+ if (!allowTypeConversion && !sig.allowTypeConversion) children
+ else applyCast(children, sig),
+ sig.intermediateAttrs
+ )
case None =>
throw new UnsupportedOperationException(errorMessage)
}
}
+ private def tryBind(
+ sig: UDFSignatureBase,
+ requiredDataTypes: Seq[DataType],
+ allowTypeConversion: Boolean): Boolean = {
+ if (
+ !tryBindStrict(sig, requiredDataTypes) && (allowTypeConversion ||
sig.allowTypeConversion)
+ ) {
+ tryBindWithTypeConversion(sig, requiredDataTypes)
+ } else {
+ true
+ }
+ }
+
// Returns true if required data types match the function signature.
// If the function signature is variable arity, the number of the last
argument can be zero
// or more.
- private def tryBind(sig: UDFSignatureBase, requiredDataTypes:
Seq[DataType]): Boolean = {
+ private def tryBindWithTypeConversion(
+ sig: UDFSignatureBase,
+ requiredDataTypes: Seq[DataType]): Boolean = {
+ tryBind0(sig, requiredDataTypes, Cast.canCast)
+ }
+
+ private def tryBindStrict(sig: UDFSignatureBase, requiredDataTypes:
Seq[DataType]): Boolean = {
+ tryBind0(sig, requiredDataTypes, DataTypeUtils.sameType)
+ }
+
+ private def tryBind0(
+ sig: UDFSignatureBase,
+ requiredDataTypes: Seq[DataType],
+ checkType: (DataType, DataType) => Boolean): Boolean = {
if (!sig.variableArity) {
sig.children.size == requiredDataTypes.size &&
- sig.children
- .zip(requiredDataTypes)
- .forall { case (candidate, required) =>
DataTypeUtils.sameType(candidate, required) }
+ requiredDataTypes
+ .zip(sig.children)
+ .forall { case (required, candidate) => checkType(required, candidate)
}
} else {
// If variableArity is true, there must be at least one argument in the
signature.
if (requiredDataTypes.size < sig.children.size - 1) {
false
} else if (requiredDataTypes.size == sig.children.size - 1) {
- sig.children
- .dropRight(1)
- .zip(requiredDataTypes)
- .forall { case (candidate, required) =>
DataTypeUtils.sameType(candidate, required) }
+ requiredDataTypes
+ .zip(sig.children.dropRight(1))
+ .forall { case (required, candidate) => checkType(required,
candidate) }
} else {
val varArgStartIndex = sig.children.size - 1
// First check all var args has the same type with the last argument
of the signature.
if (
!requiredDataTypes
.drop(varArgStartIndex)
- .forall(argType => DataTypeUtils.sameType(sig.children.last,
argType))
+ .forall(argType => checkType(argType, sig.children.last))
) {
false
} else if (varArgStartIndex == 0) {
@@ -416,11 +466,38 @@ object UDFResolver extends Logging {
true
} else {
// Whether fixed args matches.
- sig.children
- .dropRight(1)
- .zip(requiredDataTypes.dropRight(1 + requiredDataTypes.size -
sig.children.size))
- .forall { case (candidate, required) =>
DataTypeUtils.sameType(candidate, required) }
+ requiredDataTypes
+ .dropRight(1 + requiredDataTypes.size - sig.children.size)
+ .zip(sig.children.dropRight(1))
+ .forall { case (required, candidate) => checkType(required,
candidate) }
+ }
+ }
+ }
+ }
+
+ private def applyCast(children: Seq[Expression], sig: UDFSignatureBase):
Seq[Expression] = {
+ def maybeCast(expr: Expression, toType: DataType): Expression = {
+ if (!expr.dataType.sameType(toType)) {
+ Cast(expr, toType)
+ } else {
+ expr
+ }
+ }
+
+ if (!sig.variableArity) {
+ children.zip(sig.children).map { case (expr, toType) => maybeCast(expr,
toType) }
+ } else {
+ val fixedArgs = Math.min(children.size, sig.children.size)
+ val newChildren =
children.take(fixedArgs).zip(sig.children.take(fixedArgs)).map {
+ case (expr, toType) => maybeCast(expr, toType)
+ }
+ if (children.size > sig.children.size) {
+ val varArgType = sig.children.last
+ newChildren ++ children.takeRight(children.size -
sig.children.size).map {
+ expr => maybeCast(expr, varArgType)
}
+ } else {
+ newChildren
}
}
}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
index 534a8d9f1..008337b94 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
@@ -16,6 +16,7 @@
*/
package org.apache.gluten.expression
+import org.apache.gluten.backendsapi.velox.VeloxBackendSettings
import org.apache.gluten.tags.{SkipTestTags, UDFTest}
import org.apache.spark.SparkConf
@@ -88,6 +89,23 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with
SQLHelper {
.sameElements(Array(Row(105L, 6, 6L, 5, 6, 11, 6L, 11L,
Date.valueOf("2024-03-30")))))
}
+ test("test udf allow type conversion") {
+ withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION ->
"true") {
+ val df = spark.sql("""select myudf1("100"), myudf1(1),
mydate('2024-03-25', 5)""")
+ assert(
+ df.collect()
+ .sameElements(Array(Row(105L, 6L, Date.valueOf("2024-03-30")))))
+ }
+
+ withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION ->
"false") {
+ assert(
+ spark
+ .sql("select mydate2('2024-03-25', 5)")
+ .collect()
+ .sameElements(Array(Row(Date.valueOf("2024-03-30")))))
+ }
+ }
+
test("test udaf") {
val df = spark.sql("""select
| myavg(1),
@@ -101,6 +119,15 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with
SQLHelper {
df.collect()
.sameElements(Array(Row(1.0, 1.0, 1.0, 1.0, 1L))))
}
+
+ test("test udaf allow type conversion") {
+ withSQLConf(VeloxBackendSettings.GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION ->
"true") {
+ val df = spark.sql("""select myavg("1"), myavg("1.0"),
mycount_if("true")""")
+ assert(
+ df.collect()
+ .sameElements(Array(Row(1.0, 1.0, 1L))))
+ }
+ }
}
@UDFTest
diff --git a/cpp/velox/jni/JniUdf.cc b/cpp/velox/jni/JniUdf.cc
index cab90b325..8230724f1 100644
--- a/cpp/velox/jni/JniUdf.cc
+++ b/cpp/velox/jni/JniUdf.cc
@@ -41,8 +41,8 @@ void gluten::initVeloxJniUDF(JNIEnv* env) {
udfResolverClass = createGlobalClassReferenceOrError(env,
kUdfResolverClassPath.c_str());
// methods
- registerUDFMethod = getMethodIdOrError(env, udfResolverClass, "registerUDF",
"(Ljava/lang/String;[B[BZ)V");
- registerUDAFMethod = getMethodIdOrError(env, udfResolverClass,
"registerUDAF", "(Ljava/lang/String;[B[B[BZ)V");
+ registerUDFMethod = getMethodIdOrError(env, udfResolverClass, "registerUDF",
"(Ljava/lang/String;[B[BZZ)V");
+ registerUDAFMethod = getMethodIdOrError(env, udfResolverClass,
"registerUDAF", "(Ljava/lang/String;[B[B[BZZ)V");
}
void gluten::finalizeVeloxJniUDF(JNIEnv* env) {
@@ -71,9 +71,23 @@ void gluten::jniGetFunctionSignatures(JNIEnv* env) {
signature->intermediateType.length(),
reinterpret_cast<const jbyte*>(signature->intermediateType.c_str()));
env->CallVoidMethod(
- instance, registerUDAFMethod, name, returnType, argTypes,
intermediateType, signature->variableArity);
+ instance,
+ registerUDAFMethod,
+ name,
+ returnType,
+ argTypes,
+ intermediateType,
+ signature->variableArity,
+ signature->allowTypeConversion);
} else {
- env->CallVoidMethod(instance, registerUDFMethod, name, returnType,
argTypes, signature->variableArity);
+ env->CallVoidMethod(
+ instance,
+ registerUDFMethod,
+ name,
+ returnType,
+ argTypes,
+ signature->variableArity,
+ signature->allowTypeConversion);
}
checkException(env);
}
diff --git a/cpp/velox/udf/Udaf.h b/cpp/velox/udf/Udaf.h
index 2f292fbc6..4555bdfdf 100644
--- a/cpp/velox/udf/Udaf.h
+++ b/cpp/velox/udf/Udaf.h
@@ -28,6 +28,7 @@ struct UdafEntry {
const char* intermediateType{nullptr};
bool variableArity{false};
+ bool allowTypeConversion{false};
};
#define GLUTEN_GET_NUM_UDAF getNumUdaf
diff --git a/cpp/velox/udf/Udf.h b/cpp/velox/udf/Udf.h
index a32bdaefe..e0b3a7000 100644
--- a/cpp/velox/udf/Udf.h
+++ b/cpp/velox/udf/Udf.h
@@ -27,6 +27,7 @@ struct UdfEntry {
const char** argTypes;
bool variableArity{false};
+ bool allowTypeConversion{false};
};
#define GLUTEN_GET_NUM_UDF getNumUdf
diff --git a/cpp/velox/udf/UdfLoader.cc b/cpp/velox/udf/UdfLoader.cc
index 02aa410a9..8a9918166 100644
--- a/cpp/velox/udf/UdfLoader.cc
+++ b/cpp/velox/udf/UdfLoader.cc
@@ -86,7 +86,8 @@ std::unordered_set<std::shared_ptr<UdfLoader::UdfSignature>>
UdfLoader::getRegis
const auto& entry = udfEntries[i];
auto dataType = toSubstraitTypeStr(entry.dataType);
auto argTypes = toSubstraitTypeStr(entry.numArgs, entry.argTypes);
- signatures_.insert(std::make_shared<UdfSignature>(entry.name,
dataType, argTypes, entry.variableArity));
+ signatures_.insert(std::make_shared<UdfSignature>(
+ entry.name, dataType, argTypes, entry.variableArity,
entry.allowTypeConversion));
}
free(udfEntries);
} else {
@@ -110,8 +111,8 @@
std::unordered_set<std::shared_ptr<UdfLoader::UdfSignature>> UdfLoader::getRegis
auto dataType = toSubstraitTypeStr(entry.dataType);
auto argTypes = toSubstraitTypeStr(entry.numArgs, entry.argTypes);
auto intermediateType = toSubstraitTypeStr(entry.intermediateType);
- signatures_.insert(
- std::make_shared<UdfSignature>(entry.name, dataType, argTypes,
intermediateType, entry.variableArity));
+ signatures_.insert(std::make_shared<UdfSignature>(
+ entry.name, dataType, argTypes, intermediateType,
entry.variableArity, entry.allowTypeConversion));
}
free(udafEntries);
} else {
diff --git a/cpp/velox/udf/UdfLoader.h b/cpp/velox/udf/UdfLoader.h
index 2783beb85..51264e67c 100644
--- a/cpp/velox/udf/UdfLoader.h
+++ b/cpp/velox/udf/UdfLoader.h
@@ -37,21 +37,33 @@ class UdfLoader {
std::string intermediateType{};
bool variableArity;
+ bool allowTypeConversion;
- UdfSignature(std::string name, std::string returnType, std::string
argTypes, bool variableArity)
- : name(name), returnType(returnType), argTypes(argTypes),
variableArity(variableArity) {}
+ UdfSignature(
+ std::string name,
+ std::string returnType,
+ std::string argTypes,
+ bool variableArity,
+ bool allowTypeConversion)
+ : name(name),
+ returnType(returnType),
+ argTypes(argTypes),
+ variableArity(variableArity),
+ allowTypeConversion(allowTypeConversion) {}
UdfSignature(
std::string name,
std::string returnType,
std::string argTypes,
std::string intermediateType,
- bool variableArity)
+ bool variableArity,
+ bool allowTypeConversion)
: name(name),
returnType(returnType),
argTypes(argTypes),
intermediateType(intermediateType),
- variableArity(variableArity) {}
+ variableArity(variableArity),
+ allowTypeConversion(allowTypeConversion) {}
~UdfSignature() = default;
};
diff --git a/cpp/velox/udf/examples/MyUDF.cc b/cpp/velox/udf/examples/MyUDF.cc
index ee20ca39d..db1c5d770 100644
--- a/cpp/velox/udf/examples/MyUDF.cc
+++ b/cpp/velox/udf/examples/MyUDF.cc
@@ -222,6 +222,30 @@ class MyDateRegisterer final : public
gluten::UdfRegisterer {
const std::string name_ = "mydate";
const char* myDateArg_[2] = {kDate, kInteger};
};
+
+// name: mydate
+// signatures:
+// date, integer -> bigint
+// type: SimpleFunction
+// enable type conversion
+class MyDate2Registerer final : public gluten::UdfRegisterer {
+ public:
+ int getNumUdf() override {
+ return 1;
+ }
+
+ void populateUdfEntries(int& index, gluten::UdfEntry* udfEntries) override {
+ udfEntries[index++] = {name_.c_str(), kDate, 2, myDateArg_, false, true};
+ }
+
+ void registerSignatures() override {
+ facebook::velox::registerFunction<mydate::MyDateSimpleFunction, Date,
Date, int32_t>({name_});
+ }
+
+ private:
+ const std::string name_ = "mydate2";
+ const char* myDateArg_[2] = {kDate, kInteger};
+};
} // namespace mydate
std::vector<std::shared_ptr<gluten::UdfRegisterer>>& globalRegisters() {
@@ -239,6 +263,7 @@ void setupRegisterers() {
registerers.push_back(std::make_shared<myudf::MyUdf2Registerer>());
registerers.push_back(std::make_shared<myudf::MyUdf3Registerer>());
registerers.push_back(std::make_shared<mydate::MyDateRegisterer>());
+ registerers.push_back(std::make_shared<mydate::MyDate2Registerer>());
inited = true;
}
} // namespace
diff --git a/docs/developers/VeloxUDF.md b/docs/developers/VeloxUDF.md
index c896fd672..25b896929 100644
--- a/docs/developers/VeloxUDF.md
+++ b/docs/developers/VeloxUDF.md
@@ -21,18 +21,18 @@ The following steps demonstrate how to set up a UDF library
project:
- **Implement the Interface Functions:**
Implement the following interface functions that integrate UDF into Project
Gluten:
- - `getNumUdf()`:
- This function should return the number of UDF in the library.
- This is used to allocating udfEntries array as the argument for the next
function `getUdfEntries`.
+ - `getNumUdf()`:
+ This function should return the number of UDF in the library.
+ This is used to allocating udfEntries array as the argument for the next
function `getUdfEntries`.
- - `getUdfEntries(gluten::UdfEntry* udfEntries)`:
- This function should populate the provided udfEntries array with the
details of the UDF, including function names and signatures.
+ - `getUdfEntries(gluten::UdfEntry* udfEntries)`:
+ This function should populate the provided udfEntries array with the
details of the UDF, including function names and signatures.
- - `registerUdf()`:
- This function is called to register the UDF to Velox function registry.
- This is where users should register functions by calling
`facebook::velox::exec::registerVecotorFunction` or other Velox APIs.
+ - `registerUdf()`:
+ This function is called to register the UDF to Velox function registry.
+ This is where users should register functions by calling
`facebook::velox::exec::registerVecotorFunction` or other Velox APIs.
- - The interface functions are mapped to marcos in
[Udf.h](../../cpp/velox/udf/Udf.h). Here's an example of how to implement these
functions:
+ - The interface functions are mapped to marcos in
[Udf.h](../../cpp/velox/udf/Udf.h). Here's an example of how to implement these
functions:
```
// Filename MyUDF.cc
@@ -176,6 +176,14 @@ The output from spark-shell will be like
+------------------+----------------+
```
+## Configurations
+
+| Parameters | Description
|
+|----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------|
+| spark.gluten.sql.columnar.backend.velox.udfLibraryPaths | Path to the
udf/udaf libraries.
|
+| spark.gluten.sql.columnar.backend.velox.driver.udfLibraryPaths | Path to the
udf/udaf libraries on driver node. Only applicable on yarn-client mode.
|
+| spark.gluten.sql.columnar.backend.velox.udfAllowTypeConversion | Whether to
inject possible `cast` to convert mismatched data types from input to one
registered signatures. |
+
# Pandas UDFs (a.k.a. Vectorized UDFs)
## Introduction
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]