PHILO-HE commented on code in PR #5130:
URL: https://github.com/apache/incubator-gluten/pull/5130#discussion_r1542250509
##########
cpp/velox/udf/examples/MyUDF.cpp:
##########
@@ -105,9 +111,134 @@ static
std::vector<std::shared_ptr<exec::FunctionSignature>> bigintSignatures()
return
{exec::FunctionSignatureBuilder().returnType("bigint").argumentType("bigint").build()};
}
+/////////////////////////////// Simple Aggregate Function
///////////////////////////////
+
+// Copied from velox/exec/tests/SimpleAverageAggregate.cpp
+
+// Implementation of the average aggregation function through the
+// SimpleAggregateAdapter.
+template <typename T>
+class AverageAggregate {
Review Comment:
I feel it may be better to create MyUDAF.cpp to hold this.
##########
cpp/velox/udf/UdfLoader.cc:
##########
@@ -77,11 +78,34 @@
std::unordered_set<std::shared_ptr<UdfLoader::UdfSignature>> UdfLoader::getRegis
const auto& entry = udfEntry[i];
auto dataType = toSubstraitTypeStr(entry.dataType);
auto argTypes = toSubstraitTypeStr(entry.numArgs, entry.argTypes);
- signatures.insert(std::make_shared<UdfSignature>(entry.name, dataType,
argTypes));
+ if (entry.intermediateType) {
+ auto intermediateType = toSubstraitTypeStr(entry.intermediateType);
+ signatures_.insert(std::make_shared<UdfSignature>(entry.name,
dataType, argTypes, intermediateType));
+ } else {
+ signatures_.insert(std::make_shared<UdfSignature>(entry.name,
dataType, argTypes));
+ }
}
free(udfEntry);
}
- return signatures;
+ return signatures_;
+}
+
+std::unordered_set<std::string> UdfLoader::getRegisteredUdafNames() {
+ if (handles_.empty()) {
+ return {};
+ }
+ if (!names_.empty()) {
+ return names_;
+ }
+ if (signatures_.empty()) {
+ getRegisteredUdfSignatures();
+ }
+ for (const auto& sig : signatures_) {
+ if (!sig->intermediateType.empty()) {
Review Comment:
Maybe, better to not mix UDF/UDAF signatures together?
##########
backends-velox/src/main/scala/org/apache/spark/sql/expression/UDFResolver.scala:
##########
@@ -78,9 +106,12 @@ case class UDFExpression(
object UDFResolver extends Logging {
private val UDFNames = mutable.HashSet[String]()
-
private val UDFMap = mutable.HashMap[(String, Seq[DataType]),
ExpressionType]()
+ private val UDAFNames = mutable.HashSet[String]()
+ private val UDAFMap =
+ mutable.HashMap[(String, Seq[DataType]), (ExpressionType,
Seq[AttributeReference])]()
Review Comment:
Better to add comments for this map & the above UDFMap.
##########
backends-velox/src/test/scala/io/glutenproject/expression/VeloxUdfSuite.scala:
##########
@@ -76,10 +76,16 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with
SQLHelper {
| myudf1(1),
| myudf1(1L),
| myudf2(100L),
- | mydate(cast('2024-03-25' as date), 5)
+ | mydate(cast('2024-03-25' as date), 5),
+ | myavg(1),
Review Comment:
Nit: it would be better to separate the tests of UDF/UDAF.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]