This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 985b19ae4f [VL] Fix native Union result name (#11832)
985b19ae4f is described below
commit 985b19ae4fe62813674361a28ec8c13f2a387549
Author: lifulong <[email protected]>
AuthorDate: Thu Apr 2 15:55:13 2026 +0800
[VL] Fix native Union result name (#11832)
Fix native union result use column type name as column name, which lead to
same data type column has same data, but is not right result. eg all string
columns has same data value as the first string column
const auto name = outRowType->childAt(colIdx)->name();
result name is column type name => Wrong
const auto name = outRowType->nameOf(colIdx);
result name is column name => Correct
Co-authored-by: lifulong <[email protected]>
---
.../gluten/execution/MiscOperatorSuite.scala | 49 ++++++++++++++++++++++
cpp/velox/substrait/SubstraitToVeloxPlan.cc | 3 +-
2 files changed, 51 insertions(+), 1 deletion(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index f5196cb8c0..9fbd99752e 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -582,6 +582,55 @@ class MiscOperatorSuite extends
VeloxWholeStageTransformerSuite with AdaptiveSpa
}
}
+ test("native union_all with two level union keeps distinct output columns") {
+ withTempView("union_src_a", "union_src_b", "union_src_c") {
+ Seq(
+ ("valueA", "value1", "value11", "value111"),
+ ("valueA", "value2", "value22", "value222")
+ ).toDF("col1", "col2", "col3", "col4")
+ .createOrReplaceTempView("union_src_a")
+ Seq(
+ ("valueB", "value3", "value33", "value333"),
+ ("valueB", "value4", "value44", "value444")
+ ).toDF("col1", "col2", "col3", "col4")
+ .createOrReplaceTempView("union_src_b")
+
+ withSQLConf(GlutenConfig.NATIVE_UNION_ENABLED.key -> "true") {
+ compareDfResultsAgainstVanillaSpark(
+ () =>
+ spark.sql("""
+ |with deduplicated_data as (
+ | select col1, col2, col3, col4
+ | from (
+ | select
+ | u.col1,
+ | u.col2,
+ | u.col3,
+ | u.col4,
+ | row_number() over (partition by u.col2 order by
u.col5 desc) as rn
+ | from (
+ | select col1, col2, col3, col4, 98 as col5 from
union_src_a
+ | union all
+ | select col1, col2, col3, col4, 100 as col5 from
union_src_b
+ | ) u
+ | ) t
+ | where t.rn = 1
+ |)
+ |select col1, col2, col3, col4
+ |from deduplicated_data
+ |where col1 != 'valueC'
+ |union all
+ |select col1, col2, col3, col4
+ |from deduplicated_data
+ |where col1 = 'valueC'
+ |""".stripMargin),
+ compareResult = true,
+ checkGlutenPlan[UnionExecTransformer]
+ )
+ }
+ }
+ }
+
test("union two tables") {
runQueryAndCompare("""
|select count(orderkey) from (
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
index adb7fc5f45..b964649f01 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc
@@ -1244,7 +1244,8 @@ core::PlanNodePtr
SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
const RowTypePtr outRowType = asRowType(children[0]->outputType());
std::vector<std::string> outNames;
for (int32_t colIdx = 0; colIdx < outRowType->size(); ++colIdx) {
- const auto name = outRowType->childAt(colIdx)->name();
+ // Using field names from the unified output row type instead child
type names
+ const auto name = outRowType->nameOf(colIdx);
outNames.push_back(name);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]