This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 51b1901a7 [GLUTEN-6235][CH] Fix crash on ExpandTransform::work()
(#6238)
51b1901a7 is described below
commit 51b1901a797c8dc43f11793fd9b679d2477a69aa
Author: exmy <[email protected]>
AuthorDate: Thu Jun 27 14:34:02 2024 +0800
[GLUTEN-6235][CH] Fix crash on ExpandTransform::work() (#6238)
[CH] Fix crash on ExpandTransform::work()
---
.../execution/GlutenClickHouseHiveTableSuite.scala | 25 ++++++++++++++++++++++
cpp-ch/local-engine/Operator/ExpandTransform.cpp | 2 +-
.../execution/BasicScanExecTransformer.scala | 19 +---------------
.../hive/execution/AbstractHiveTableScanExec.scala | 2 +-
.../hive/execution/AbstractHiveTableScanExec.scala | 2 +-
.../hive/execution/AbstractHiveTableScanExec.scala | 2 +-
.../hive/execution/AbstractHiveTableScanExec.scala | 2 +-
7 files changed, 31 insertions(+), 23 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
index 9b52f6a8c..4e190c087 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
@@ -1252,4 +1252,29 @@ class GlutenClickHouseHiveTableSuite
}
spark.sql("drop table test_tbl_3452")
}
+
+ test("GLUTEN-6235: Fix crash on ExpandTransform::work()") {
+ val tbl = "test_tbl_6235"
+ sql(s"drop table if exists $tbl")
+ val createSql =
+ s"""
+ |create table $tbl
+ |stored as textfile
+ |as select 1 as a1, 2 as a2, 3 as a3, 4 as a4, 5 as a5, 6 as a6, 7 as
a7, 8 as a8, 9 as a9
+ |""".stripMargin
+ sql(createSql)
+ val select_sql =
+ s"""
+ |select
+ |a5,a6,a7,a8,a3,a4,a9
+ |,count(distinct a2) as a2
+ |,count(distinct a1) as a1
+ |,count(distinct if(a3=1,a2,null)) as a33
+ |,count(distinct if(a4=2,a1,null)) as a43
+ |from $tbl
+ |group by a5,a6,a7,a8,a3,a4,a9 with cube
+ |""".stripMargin
+ compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+ sql(s"drop table if exists $tbl")
+ }
}
diff --git a/cpp-ch/local-engine/Operator/ExpandTransform.cpp
b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
index 106c38e2d..f5787163c 100644
--- a/cpp-ch/local-engine/Operator/ExpandTransform.cpp
+++ b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
@@ -104,7 +104,7 @@ void ExpandTransform::work()
if (kind == EXPAND_FIELD_KIND_SELECTION)
{
- const auto & original_col = original_cols[field.get<Int32>()];
+ const auto & original_col = original_cols.at(field.get<Int32>());
if (type->isNullable() == original_col->isNullable())
{
cols.push_back(original_col);
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
index 9d231bbc2..64071fb14 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
@@ -110,7 +110,7 @@ trait BasicScanExecTransformer extends LeafTransformSupport
with BaseDataSource
}
override protected def doTransform(context: SubstraitContext):
TransformContext = {
- val output = filterRedundantField(outputAttributes())
+ val output = outputAttributes()
val typeNodes = ConverterUtils.collectAttributeTypeNodes(output)
val nameList = ConverterUtils.collectAttributeNamesWithoutExprId(output)
val columnTypeNodes = output.map {
@@ -156,21 +156,4 @@ trait BasicScanExecTransformer extends
LeafTransformSupport with BaseDataSource
context.nextOperatorId(this.nodeName))
TransformContext(output, output, readNode)
}
-
- private def filterRedundantField(outputs: Seq[Attribute]): Seq[Attribute] = {
- var finalOutput: List[Attribute] = List()
- val outputList = outputs.toArray
- for (i <- outputList.indices) {
- var dup = false
- for (j <- 0 until i) {
- if (outputList(i).name == outputList(j).name) {
- dup = true
- }
- }
- if (!dup) {
- finalOutput = finalOutput :+ outputList(i)
- }
- }
- finalOutput
- }
}
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 95106b4ed..46b59ac30 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -75,7 +75,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
override val output: Seq[Attribute] = {
// Retrieve the original attributes based on expression ID so that
capitalization matches.
- requestedAttributes.map(originalAttributes)
+ requestedAttributes.map(originalAttributes).distinct
}
// Bind all partition key attribute references in the partition pruning
predicate for later
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 78f5ff7f1..dd095f0ff 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -75,7 +75,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
override val output: Seq[Attribute] = {
// Retrieve the original attributes based on expression ID so that
capitalization matches.
- requestedAttributes.map(originalAttributes)
+ requestedAttributes.map(originalAttributes).distinct
}
// Bind all partition key attribute references in the partition pruning
predicate for later
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 77f15ac57..87aba00b0 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -77,7 +77,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
override val output: Seq[Attribute] = {
// Retrieve the original attributes based on expression ID so that
capitalization matches.
- requestedAttributes.map(originalAttributes)
+ requestedAttributes.map(originalAttributes).distinct
}
// Bind all partition key attribute references in the partition pruning
predicate for later
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 77f15ac57..87aba00b0 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -77,7 +77,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
override val output: Seq[Attribute] = {
// Retrieve the original attributes based on expression ID so that
capitalization matches.
- requestedAttributes.map(originalAttributes)
+ requestedAttributes.map(originalAttributes).distinct
}
// Bind all partition key attribute references in the partition pruning
predicate for later
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]