This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 6d10695bac [GLUTEN-8782][VL] Add correct suffix to the name of 
merge_extract companion function (#11210)
6d10695bac is described below

commit 6d10695bacf7473bd551aa88b0763b18e22beb0e
Author: Rui Mo <[email protected]>
AuthorDate: Fri Nov 28 22:37:28 2025 +0800

    [GLUTEN-8782][VL] Add correct suffix to the name of merge_extract companion 
function (#11210)
    
    Add correct suffix to the name of merge_extract companion function, so we 
no longer need the change made to Velox IBM/velox@b8da5d0.
---
 cpp/velox/substrait/SubstraitToVeloxPlan.cc        | 65 +++++++++++++++++++---
 cpp/velox/substrait/SubstraitToVeloxPlan.h         |  5 +-
 .../substrait/SubstraitToVeloxPlanValidator.cc     |  3 +-
 cpp/velox/tests/VeloxSubstraitRoundTripTest.cc     |  8 +--
 ep/build-velox/src/get-velox.sh                    |  4 +-
 5 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index 6f1134edcc..eb10a67e85 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -162,6 +162,42 @@ RowTypePtr getJoinOutputType(
   VELOX_FAIL("Output should include left or right columns.");
 }
 
+// Get the function name suffix used by merge_extract companion function when 
having the same intermediate type across
+// signatures. Correponds to Velox 'toSuffixString', and the base name can be 
referred from
+// 'velox/expression/FunctionSignature.cpp'.
+std::string companionFunctionSuffix(const TypePtr& type) {
+  // For primitive and decimal types, return their names.
+  if (type->isDecimal()) {
+    return "DECIMAL";
+  }
+
+  if (type->isPrimitiveType()) {
+    return type->toString();
+  }
+
+  if (type->kind() == TypeKind::ARRAY) {
+    return "array_" + companionFunctionSuffix(std::dynamic_pointer_cast<const 
ArrayType>(type)->elementType());
+  }
+  if (type->kind() == TypeKind::MAP) {
+    auto mapType = std::dynamic_pointer_cast<const MapType>(type);
+    return "map_" + companionFunctionSuffix(mapType->keyType()) + "_" + 
companionFunctionSuffix(mapType->valueType());
+  }
+
+  std::string name;
+  if (type->kind() == TypeKind::ROW) {
+    name = "row";
+  }
+  std::string result = name;
+  const auto rowType = asRowType(type);
+  for (const auto& child : rowType->children()) {
+    result += '_';
+    result += companionFunctionSuffix(child);
+  }
+  result += "_end";
+  result += name;
+  return result;
+}
+
 } // namespace
 
 bool SplitInfo::canUseCudfConnector() {
@@ -231,15 +267,29 @@ core::AggregationNode::Step 
SubstraitToVeloxPlanConverter::toAggregationFunction
 
 std::string SubstraitToVeloxPlanConverter::toAggregationFunctionName(
     const std::string& baseName,
-    const core::AggregationNode::Step& step) {
+    const core::AggregationNode::Step& step,
+    const TypePtr& resultType) {
   std::string suffix;
   switch (step) {
     case core::AggregationNode::Step::kPartial:
       suffix = "_partial";
       break;
-    case core::AggregationNode::Step::kFinal:
-      suffix = "_merge_extract";
-      break;
+    case core::AggregationNode::Step::kFinal: {
+      auto functionName = baseName + "_merge_extract";
+      auto signatures = exec::getAggregateFunctionSignatures(functionName);
+      if (signatures.has_value() && signatures.value().size() > 0) {
+        // The merge_extract function is registered without suffix.
+        return functionName;
+      }
+      // The merge_extract function must be registered with suffix based on 
result type.
+      functionName += ("_" + companionFunctionSuffix(resultType));
+      signatures = exec::getAggregateFunctionSignatures(functionName);
+      VELOX_CHECK(
+          signatures.has_value() && signatures.value().size() > 0,
+          "Cannot find function signature for {} in final aggregation step.",
+          functionName);
+      return functionName;
+    }
     case core::AggregationNode::Step::kIntermediate:
       suffix = "_merge";
       break;
@@ -436,16 +486,17 @@ core::PlanNodePtr 
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
       }
     }
     const auto& aggFunction = measure.measure();
-    auto baseFuncName = SubstraitParser::findVeloxFunction(functionMap_, 
aggFunction.function_reference());
-    auto funcName = toAggregationFunctionName(baseFuncName, 
toAggregationFunctionStep(aggFunction));
     std::vector<core::TypedExprPtr> aggParams;
     aggParams.reserve(aggFunction.arguments().size());
     for (const auto& arg : aggFunction.arguments()) {
       aggParams.emplace_back(exprConverter_->toVeloxExpr(arg.value(), 
inputType));
     }
+
     auto aggVeloxType = SubstraitParser::parseType(aggFunction.output_type());
-    auto aggExpr = std::make_shared<const core::CallTypedExpr>(aggVeloxType, 
std::move(aggParams), funcName);
+    auto baseFuncName = SubstraitParser::findVeloxFunction(functionMap_, 
aggFunction.function_reference());
+    auto funcName = toAggregationFunctionName(baseFuncName, 
toAggregationFunctionStep(aggFunction), aggVeloxType);
 
+    auto aggExpr = std::make_shared<const core::CallTypedExpr>(aggVeloxType, 
std::move(aggParams), funcName);
     std::vector<TypePtr> rawInputTypes =
         
SubstraitParser::sigToTypes(SubstraitParser::findFunctionSpec(functionMap_, 
aggFunction.function_reference()));
     aggregates.emplace_back(core::AggregationNode::Aggregate{aggExpr, 
rawInputTypes, mask, {}, {}});
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h 
b/cpp/velox/substrait/SubstraitToVeloxPlan.h
index 8c0639d051..48e5709ea8 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h
@@ -214,7 +214,10 @@ class SubstraitToVeloxPlanConverter {
   core::AggregationNode::Step toAggregationFunctionStep(const 
::substrait::AggregateFunction& sAggFuc);
 
   /// We use companion functions if the aggregate is not single.
-  std::string toAggregationFunctionName(const std::string& baseName, const 
core::AggregationNode::Step& step);
+  std::string toAggregationFunctionName(
+      const std::string& baseName,
+      const core::AggregationNode::Step& step,
+      const TypePtr& resultType);
 
   /// Helper Function to convert Substrait sortField to Velox sortingKeys and
   /// sortingOrders.
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 0e1ad8c683..38d81320f9 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1150,7 +1150,8 @@ bool 
SubstraitToVeloxPlanValidator::validateAggRelFunctionType(const ::substrait
     }
     auto baseFuncName =
         
SubstraitParser::mapToVeloxFunction(SubstraitParser::getNameBeforeDelimiter(funcSpec),
 isDecimal);
-    auto funcName = planConverter_->toAggregationFunctionName(baseFuncName, 
funcStep);
+    auto resultType = SubstraitParser::parseType(aggFunction.output_type());
+    auto funcName = planConverter_->toAggregationFunctionName(baseFuncName, 
funcStep, resultType);
     auto signaturesOpt = exec::getAggregateFunctionSignatures(funcName);
     if (!signaturesOpt) {
       LOG_VALIDATION_MSG("can not find function signature for " + funcName + " 
in AggregateRel.");
diff --git a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc 
b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
index 8eeb2818ce..cec61afd07 100644
--- a/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
+++ b/cpp/velox/tests/VeloxSubstraitRoundTripTest.cc
@@ -477,7 +477,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumAndCountCompanion) {
   auto plan = PlanBuilder()
                   .values(vectors)
                   .singleAggregation({}, {"sum_partial(c1)", 
"count_partial(c4)"})
-                  .singleAggregation({}, {"sum_merge_extract(a0)", 
"count_merge_extract(a1)"})
+                  .singleAggregation({}, {"sum_merge_extract_bigint(a0)", 
"count_merge_extract_bigint(a1)"})
                   .planNode();
 
   assertPlanConversion(plan, "SELECT sum(c1), count(c4) FROM tmp");
@@ -492,7 +492,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumGlobalCompanion) {
                   .values(vectors)
                   .singleAggregation({"c0"}, {"sum_partial(c0)", 
"sum_partial(c1)"})
                   .singleAggregation({"c0"}, {"sum_merge(a0)", 
"sum_merge(a1)"})
-                  .singleAggregation({"c0"}, {"sum_merge_extract(a0)", 
"sum_merge_extract(a1)"})
+                  .singleAggregation({"c0"}, {"sum_merge_extract_bigint(a0)", 
"sum_merge_extract_bigint(a1)"})
                   .planNode();
   assertPlanConversion(plan, "SELECT c0, sum(c0), sum(c1) FROM tmp GROUP BY 
c0");
 }
@@ -505,7 +505,7 @@ TEST_F(VeloxSubstraitRoundTripTest, sumMaskCompanion) {
                   .values(vectors)
                   .project({"c0", "c1", "c2 % 2 < 10 AS m0", "c3 % 3 = 0 AS 
m1"})
                   .singleAggregation({}, {"sum_partial(c0)", 
"sum_partial(c0)", "sum_partial(c1)"}, {"m0", "m1", "m1"})
-                  .singleAggregation({}, {"sum_merge_extract(a0)", 
"sum_merge_extract(a1)", "sum_merge_extract(a2)"})
+                  .singleAggregation({}, {"sum_merge_extract_bigint(a0)", 
"sum_merge_extract_bigint(a1)", "sum_merge_extract_bigint(a2)"})
                   .planNode();
 
   assertPlanConversion(
@@ -538,7 +538,7 @@ TEST_F(VeloxSubstraitRoundTripTest, avgCompanion) {
   auto plan = PlanBuilder()
                   .values(vectors)
                   .singleAggregation({}, {"avg_partial(c4)"})
-                  .singleAggregation({}, {"avg_merge_extract(a0)"})
+                  .singleAggregation({}, {"avg_merge_extract_double(a0)"})
                   .planNode();
 
   assertPlanConversion(plan, "SELECT avg(c4) FROM tmp");
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index b461c631c9..cf70ba0b96 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
 
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2025_11_26
-VELOX_ENHANCED_BRANCH=ibm-2025_11_26
+VELOX_BRANCH=dft-2025_11_26_fix
+VELOX_ENHANCED_BRANCH=ibm-2025_11_26_fix
 VELOX_HOME=""
 RUN_SETUP_SCRIPT=ON
 ENABLE_ENHANCED_FEATURES=OFF


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to