This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6d10695bac [GLUTEN-8782][VL] Add correct suffix to the name of
merge_extract companion function (#11210)
6d10695bac is described below
commit 6d10695bacf7473bd551aa88b0763b18e22beb0e
Author: Rui Mo <[email protected]>
AuthorDate: Fri Nov 28 22:37:28 2025 +0800
[GLUTEN-8782][VL] Add correct suffix to the name of merge_extract companion
function (#11210)
Add correct suffix to the name of merge_extract companion function, so we
no longer need the change made to Velox IBM/velox@b8da5d0.
---
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 65 +++++++++++++++++++---
cpp/velox/substrait/SubstraitToVeloxPlan.h | 5 +-
.../substrait/SubstraitToVeloxPlanValidator.cc | 3 +-
cpp/velox/tests/VeloxSubstraitRoundTripTest.cc | 8 +--
ep/build-velox/src/get-velox.sh | 4 +-
5 files changed, 70 insertions(+), 15 deletions(-)
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 6f1134edcc..eb10a67e85 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -162,6 +162,42 @@ RowTypePtr getJoinOutputType(
VELOX_FAIL("Output should include left or right columns.");
}
+// Get the function name suffix used by merge_extract companion function when
having the same intermediate type across
+// signatures. Correponds to Velox 'toSuffixString', and the base name can be
referred from
+// 'velox/expression/FunctionSignature.cpp'.
+std::string companionFunctionSuffix(const TypePtr& type) {
+ // For primitive and decimal types, return their names.
+ if (type->isDecimal()) {
+ return "DECIMAL";
+ }
+
+ if (type->isPrimitiveType()) {
+ return type->toString();
+ }
+
+ if (type->kind() == TypeKind::ARRAY) {
+ return "array_" + companionFunctionSuffix(std::dynamic_pointer_cast<const
ArrayType>(type)->elementType());
+ }
+ if (type->kind() == TypeKind::MAP) {
+ auto mapType = std::dynamic_pointer_cast<const MapType>(type);
+ return "map_" + companionFunctionSuffix(mapType->keyType()) + "_" +
companionFunctionSuffix(mapType->valueType());
+ }
+
+ std::string name;
+ if (type->kind() == TypeKind::ROW) {
+ name = "row";
+ }
+ std::string result = name;
+ const auto rowType = asRowType(type);
+ for (const auto& child : rowType->children()) {
+ result += '_';
+ result += companionFunctionSuffix(child);
+ }
+ result += "_end";
+ result += name;
+ return result;
+}
+
} // namespace
bool SplitInfo::canUseCudfConnector() {
@@ -231,15 +267,29 @@ core::AggregationNode::Step
SubstraitToVeloxPlanConverter::toAggregationFunction
std::string SubstraitToVeloxPlanConverter::toAggregationFunctionName(
const std::string& baseName,
- const core::AggregationNode::Step& step) {
+ const core::AggregationNode::Step& step,
+ const TypePtr& resultType) {
std::string suffix;
switch (step) {
case core::AggregationNode::Step::kPartial:
suffix = "_partial";
break;
- case core::AggregationNode::Step::kFinal:
- suffix = "_merge_extract";
- break;
+ case core::AggregationNode::Step::kFinal: {
+ auto functionName = baseName + "_merge_extract";
+ auto signatures = exec::getAggregateFunctionSignatures(functionName);
+ if (signatures.has_value() && signatures.value().size() > 0) {
+ // The merge_extract function is registered without suffix.
+ return functionName;
+ }
+ // The merge_extract function must be registered with suffix based on
result type.
+ functionName += ("_" + companionFunctionSuffix(resultType));
+ signatures = exec::getAggregateFunctionSignatures(functionName);
+ VELOX_CHECK(
+ signatures.has_value() && signatures.value().size() > 0,
+ "Cannot find function signature for {} in final aggregation step.",
+ functionName);
+ return functionName;
+ }
case core::AggregationNode::Step::kIntermediate:
suffix = "_merge";
break;
@@ -436,16 +486,17 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
}
}
const auto& aggFunction = measure.measure();
- auto baseFuncName = SubstraitParser::findVeloxFunction(functionMap_,
aggFunction.function_reference());
- auto funcName = toAggregationFunctionName(baseFuncName,
toAggregationFunctionStep(aggFunction));
std::vector<core::TypedExprPtr> aggParams;
aggParams.reserve(aggFunction.arguments().size());
for (const auto& arg : aggFunction.arguments()) {
aggParams.emplace_back(exprConverter_->toVeloxExpr(arg.value(),
inputType));
}
+
auto aggVeloxType = SubstraitParser::parseType(aggFunction.output_type());
- auto aggExpr = std::make_shared<const core::CallTypedExpr>(aggVeloxType,
std::move(aggParams), funcName);
+ auto baseFuncName = SubstraitParser::findVeloxFunction(functionMap_,
aggFunction.function_reference());
+ auto funcName = toAggregationFunctionName(baseFuncName,
toAggregationFunctionStep(aggFunction), aggVeloxType);
+ auto aggExpr = std::make_shared<const core::CallTypedExpr>(aggVeloxType,
std::move(aggParams), funcName);
std::vector<TypePtr> rawInputTypes =
SubstraitParser::sigToTypes(SubstraitParser::findFunctionSpec(functionMap_,
aggFunction.function_reference()));
aggregates.emplace_back(core::AggregationNode::Aggregate{aggExpr,
rawInputTypes, mask, {}, {}});
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h
b/cpp/velox/substrait/SubstraitToVeloxPlan.h
index 8c0639d051..48e5709ea8 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h
@@ -214,7 +214,10 @@ class SubstraitToVeloxPlanConverter {
core::AggregationNode::Step toAggregationFunctionStep(const
::substrait::AggregateFunction& sAggFuc);
/// We use companion functions if the aggregate is not single.
- std::string toAggregationFunctionName(const std::string& baseName, const
core::AggregationNode::Step& step);
+ std::string toAggregationFunctionName(
+ const std::string& baseName,
+ const core::AggregationNode::Step& step,
+ const TypePtr& resultType);
/// Helper Function to convert Substrait sortField to Velox sortingKeys and
/// sortingOrders.
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 0e1ad8c683..38d81320f9 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1150,7 +1150,8 @@ bool
SubstraitToVeloxPlanValidator::validateAggRelFunctionType(const ::substrait
}
auto baseFuncName =
SubstraitParser::mapToVeloxFunction(SubstraitParser::getNameBeforeDelimiter(funcSpec),
isDecimal);
- auto funcName = planConverter_->toAggregationFunctionName(baseFuncName,
funcStep);
+ auto resultType = SubstraitParser::parseType(aggFunction.output_type());
+ auto funcName = planConverter_->toAggregationFunctionName(baseFuncName,
funcStep, resultType);
auto signaturesOpt = exec::getAggregateFunctionSignatures(funcName);
if (!signaturesOpt) {
LOG_VALIDATION_MSG("can not find function signature for " + funcName + "
in AggregateRel.");
diff --git a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
index 8eeb2818ce..cec61afd07 100644
--- a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
+++ b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
@@ -477,7 +477,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumAndCountCompanion) {
auto plan = PlanBuilder()
.values(vectors)
.singleAggregation({}, {"sum_partial(c1)",
"count_partial(c4)"})
- .singleAggregation({}, {"sum_merge_extract(a0)",
"count_merge_extract(a1)"})
+ .singleAggregation({}, {"sum_merge_extract_bigint(a0)",
"count_merge_extract_bigint(a1)"})
.planNode();
assertPlanConversion(plan, "SELECT sum(c1), count(c4) FROM tmp");
@@ -492,7 +492,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumGlobalCompanion) {
.values(vectors)
.singleAggregation({"c0"}, {"sum_partial(c0)",
"sum_partial(c1)"})
.singleAggregation({"c0"}, {"sum_merge(a0)",
"sum_merge(a1)"})
- .singleAggregation({"c0"}, {"sum_merge_extract(a0)",
"sum_merge_extract(a1)"})
+ .singleAggregation({"c0"}, {"sum_merge_extract_bigint(a0)",
"sum_merge_extract_bigint(a1)"})
.planNode();
assertPlanConversion(plan, "SELECT c0, sum(c0), sum(c1) FROM tmp GROUP BY
c0");
}
@@ -505,7 +505,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumMaskCompanion) {
.values(vectors)
.project({"c0", "c1", "c2 % 2 < 10 AS m0", "c3 % 3 = 0 AS
m1"})
.singleAggregation({}, {"sum_partial(c0)",
"sum_partial(c0)", "sum_partial(c1)"}, {"m0", "m1", "m1"})
- .singleAggregation({}, {"sum_merge_extract(a0)",
"sum_merge_extract(a1)", "sum_merge_extract(a2)"})
+ .singleAggregation({}, {"sum_merge_extract_bigint(a0)",
"sum_merge_extract_bigint(a1)", "sum_merge_extract_bigint(a2)"})
.planNode();
assertPlanConversion(
@@ -538,7 +538,7 @@ TEST_F(VeloxSubstraitRoundTripTest, avgCompanion) {
auto plan = PlanBuilder()
.values(vectors)
.singleAggregation({}, {"avg_partial(c4)"})
- .singleAggregation({}, {"avg_merge_extract(a0)"})
+ .singleAggregation({}, {"avg_merge_extract_double(a0)"})
.planNode();
assertPlanConversion(plan, "SELECT avg(c4) FROM tmp");
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index b461c631c9..cf70ba0b96 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2025_11_26
-VELOX_ENHANCED_BRANCH=ibm-2025_11_26
+VELOX_BRANCH=dft-2025_11_26_fix
+VELOX_ENHANCED_BRANCH=ibm-2025_11_26_fix
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]