(incubator-gluten) branch main updated: [GLUTEN-6235][CH] Fix crash on ExpandTransform::work() (#6238)

zhangzc Wed, 26 Jun 2024 23:34:10 -0700

This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 51b1901a7 [GLUTEN-6235][CH] Fix crash on ExpandTransform::work() 
(#6238)
51b1901a7 is described below

commit 51b1901a797c8dc43f11793fd9b679d2477a69aa
Author: exmy <[email protected]>
AuthorDate: Thu Jun 27 14:34:02 2024 +0800

    [GLUTEN-6235][CH] Fix crash on ExpandTransform::work() (#6238)
    
    [CH] Fix crash on ExpandTransform::work()
---
 .../execution/GlutenClickHouseHiveTableSuite.scala | 25 ++++++++++++++++++++++
 cpp-ch/local-engine/Operator/ExpandTransform.cpp   |  2 +-
 .../execution/BasicScanExecTransformer.scala       | 19 +---------------
 .../hive/execution/AbstractHiveTableScanExec.scala |  2 +-
 .../hive/execution/AbstractHiveTableScanExec.scala |  2 +-
 .../hive/execution/AbstractHiveTableScanExec.scala |  2 +-
 .../hive/execution/AbstractHiveTableScanExec.scala |  2 +-
 7 files changed, 31 insertions(+), 23 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
index 9b52f6a8c..4e190c087 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseHiveTableSuite.scala
@@ -1252,4 +1252,29 @@ class GlutenClickHouseHiveTableSuite
     }
     spark.sql("drop table test_tbl_3452")
   }
+
+  test("GLUTEN-6235: Fix crash on ExpandTransform::work()") {
+    val tbl = "test_tbl_6235"
+    sql(s"drop table if exists $tbl")
+    val createSql =
+      s"""
+         |create table $tbl
+         |stored as textfile
+         |as select 1 as a1, 2 as a2, 3 as a3, 4 as a4, 5 as a5, 6 as a6, 7 as 
a7, 8 as a8, 9 as a9
+         |""".stripMargin
+    sql(createSql)
+    val select_sql =
+      s"""
+         |select
+         |a5,a6,a7,a8,a3,a4,a9
+         |,count(distinct a2) as a2
+         |,count(distinct a1) as a1
+         |,count(distinct if(a3=1,a2,null)) as a33
+         |,count(distinct if(a4=2,a1,null)) as a43
+         |from $tbl
+         |group by a5,a6,a7,a8,a3,a4,a9 with cube
+         |""".stripMargin
+    compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+    sql(s"drop table if exists $tbl")
+  }
 }
diff --git a/cpp-ch/local-engine/Operator/ExpandTransform.cpp 
b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
index 106c38e2d..f5787163c 100644
--- a/cpp-ch/local-engine/Operator/ExpandTransform.cpp
+++ b/cpp-ch/local-engine/Operator/ExpandTransform.cpp
@@ -104,7 +104,7 @@ void ExpandTransform::work()
 
         if (kind == EXPAND_FIELD_KIND_SELECTION)
         {
-            const auto & original_col = original_cols[field.get<Int32>()];
+            const auto & original_col = original_cols.at(field.get<Int32>());
             if (type->isNullable() == original_col->isNullable())
             {
                 cols.push_back(original_col);
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
index 9d231bbc2..64071fb14 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala
@@ -110,7 +110,7 @@ trait BasicScanExecTransformer extends LeafTransformSupport 
with BaseDataSource
   }
 
   override protected def doTransform(context: SubstraitContext): 
TransformContext = {
-    val output = filterRedundantField(outputAttributes())
+    val output = outputAttributes()
     val typeNodes = ConverterUtils.collectAttributeTypeNodes(output)
     val nameList = ConverterUtils.collectAttributeNamesWithoutExprId(output)
     val columnTypeNodes = output.map {
@@ -156,21 +156,4 @@ trait BasicScanExecTransformer extends 
LeafTransformSupport with BaseDataSource
       context.nextOperatorId(this.nodeName))
     TransformContext(output, output, readNode)
   }
-
-  private def filterRedundantField(outputs: Seq[Attribute]): Seq[Attribute] = {
-    var finalOutput: List[Attribute] = List()
-    val outputList = outputs.toArray
-    for (i <- outputList.indices) {
-      var dup = false
-      for (j <- 0 until i) {
-        if (outputList(i).name == outputList(j).name) {
-          dup = true
-        }
-      }
-      if (!dup) {
-        finalOutput = finalOutput :+ outputList(i)
-      }
-    }
-    finalOutput
-  }
 }
diff --git 
a/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
 
b/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 95106b4ed..46b59ac30 100644
--- 
a/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++ 
b/shims/spark32/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -75,7 +75,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
 
   override val output: Seq[Attribute] = {
     // Retrieve the original attributes based on expression ID so that 
capitalization matches.
-    requestedAttributes.map(originalAttributes)
+    requestedAttributes.map(originalAttributes).distinct
   }
 
   // Bind all partition key attribute references in the partition pruning 
predicate for later
diff --git 
a/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
 
b/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 78f5ff7f1..dd095f0ff 100644
--- 
a/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++ 
b/shims/spark33/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -75,7 +75,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
 
   override val output: Seq[Attribute] = {
     // Retrieve the original attributes based on expression ID so that 
capitalization matches.
-    requestedAttributes.map(originalAttributes)
+    requestedAttributes.map(originalAttributes).distinct
   }
 
   // Bind all partition key attribute references in the partition pruning 
predicate for later
diff --git 
a/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
 
b/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 77f15ac57..87aba00b0 100644
--- 
a/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++ 
b/shims/spark34/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -77,7 +77,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
 
   override val output: Seq[Attribute] = {
     // Retrieve the original attributes based on expression ID so that 
capitalization matches.
-    requestedAttributes.map(originalAttributes)
+    requestedAttributes.map(originalAttributes).distinct
   }
 
   // Bind all partition key attribute references in the partition pruning 
predicate for later
diff --git 
a/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
 
b/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
index 77f15ac57..87aba00b0 100644
--- 
a/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
+++ 
b/shims/spark35/src/main/scala/org/apache/spark/sql/hive/execution/AbstractHiveTableScanExec.scala
@@ -77,7 +77,7 @@ abstract private[hive] class AbstractHiveTableScanExec(
 
   override val output: Seq[Attribute] = {
     // Retrieve the original attributes based on expression ID so that 
capitalization matches.
-    requestedAttributes.map(originalAttributes)
+    requestedAttributes.map(originalAttributes).distinct
   }
 
   // Bind all partition key attribute references in the partition pruning 
predicate for later


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-6235][CH] Fix crash on ExpandTransform::work() (#6238)

Reply via email to