This is an automated email from the ASF dual-hosted git repository.

lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 31dc8ed27 [GLUTEN-6938][CH] Fix core dump when range partition include 
literal (#6964)
31dc8ed27 is described below

commit 31dc8ed2711662c2a82a8c74dc4cf6199a2edc03
Author: Chang chen <[email protected]>
AuthorDate: Thu Aug 22 12:05:44 2024 +0800

    [GLUTEN-6938][CH] Fix core dump when range partition include literal (#6964)
    
    * [GLUTEN-6938][CH] Fix core dump when range partition include literal
    
    * update per comment
---
 .../apache/spark/sql/execution/utils/CHExecUtil.scala    |  4 ++--
 .../GlutenClickhouseFunctionSuite.scala                  | 16 ++++++++++++++--
 .../GlutenClickhouseStringFunctionsSuite.scala           |  4 +++-
 .../execution/parquet/GlutenParquetFilterSuite.scala     |  9 ++++-----
 .../scala/org/apache/gluten/test/GlutenTPCHBase.scala    | 10 ++++++----
 cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp          | 16 ++++++++++++++--
 cpp-ch/local-engine/Shuffle/SelectorBuilder.h            |  2 +-
 7 files changed, 44 insertions(+), 17 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
index 38c15fde7..4496d893f 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
@@ -319,7 +319,7 @@ object CHExecUtil extends Logging {
     // Thus in Columnar Shuffle we never use the "key" part.
     val isOrderSensitive = isRoundRobin && !SQLConf.get.sortBeforeRepartition
 
-    val rddWithpartitionKey: RDD[Product2[Int, ColumnarBatch]] =
+    val rddWithPartitionKey: RDD[Product2[Int, ColumnarBatch]] =
       if (
         GlutenConfig.getConf.isUseColumnarShuffleManager
         || GlutenConfig.getConf.isUseCelebornShuffleManager
@@ -345,7 +345,7 @@ object CHExecUtil extends Logging {
 
     val dependency =
       new ColumnarShuffleDependency[Int, ColumnarBatch, ColumnarBatch](
-        rddWithpartitionKey,
+        rddWithPartitionKey,
         new PartitionIdPassthrough(newPartitioning.numPartitions),
         serializer,
         shuffleWriterProcessor = 
ShuffleExchangeExec.createShuffleWriteProcessor(writeMetrics),
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
similarity index 94%
rename from 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
rename to 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 4130ea348..11d5290c0 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -14,9 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.gluten.execution
+package org.apache.gluten.execution.compatibility
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite
 import org.apache.gluten.utils.UTSystemParameters
 
 import org.apache.spark.SparkConf
@@ -78,6 +79,18 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("https://github.com/apache/incubator-gluten/issues/6938";) {
+    val testSQL =
+      s"""
+         |select * from (
+         |  select 1 as x, r_name as y, 's' as z from region
+         |  union all
+         |  select 2 as x, n_name as y, null as z from nation
+         |) order by y,x,z
+         |""".stripMargin
+    runQueryAndCompare(testSQL)(_ => ())
+  }
+
   test("Support In list option contains non-foldable expression") {
     runQueryAndCompare(
       """
@@ -217,5 +230,4 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
       )
     }
   }
-
 }
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
similarity index 97%
rename from 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
rename to 
backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
index 163a8feda..98c0c2b35 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
@@ -14,7 +14,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.gluten.execution
+package org.apache.gluten.execution.compatibility
+
+import org.apache.gluten.execution.GlutenClickHouseWholeStageTransformerSuite
 
 import org.apache.spark.SparkConf
 
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
index 0a8d1729c..a2f897d37 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
@@ -17,13 +17,12 @@
 package org.apache.gluten.execution.parquet
 
 import org.apache.gluten.execution.{FileSourceScanExecTransformer, 
GlutenClickHouseWholeStageTransformerSuite}
-import org.apache.gluten.test.GlutenSQLTestUtils
+import org.apache.gluten.test.{GlutenSQLTestUtils, GlutenTPCHBase}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.gluten.test.GlutenTPCHBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.Decimal
 
@@ -385,13 +384,13 @@ class GlutenParquetFilterSuite
         'p_size.int >= 1,
         'p_partkey.long.isNotNull,
         ('p_brand.string === "Brand#12" &&
-          ('p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG")) &&
+          'p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG") &&
           'p_size.int <= 5) ||
           ('p_brand.string === "Brand#23" &&
-            ('p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED 
PACK")) &&
+            'p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED 
PACK") &&
             'p_size.int <= 10) ||
           ('p_brand.string === "Brand#34" &&
-            ('p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG")) 
&&
+            'p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG") &&
             'p_size.int <= 15)
       )
     ),
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
index 685f185ac..5a07ebab1 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
@@ -14,9 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.gluten.test
-
-import org.apache.gluten.test.GlutenTPCBase
+package org.apache.gluten.test
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 
@@ -51,7 +49,11 @@ trait GlutenTPCHBase extends GlutenTPCBase {
 
   override def dropTables(): Unit = {
     tpchCreateTable.keys.foreach {
-      tableName => 
spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true)
+      tableName =>
+        spark.sessionState.catalog.dropTable(
+          TableIdentifier(tableName),
+          ignoreIfNotExists = true,
+          purge = true)
     }
   }
 
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp 
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
index b9bd02c3e..272a6f2f6 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
@@ -367,19 +367,31 @@ void 
RangeSelectorBuilder::computePartitionIdByBinarySearch(DB::Block & block, D
         selector.emplace_back(selected_partition);
     }
 }
+namespace {
+int doCompareAt(const ColumnPtr & lhs, size_t n, size_t m, const IColumn & 
rhs, int nan_direction_hint)
+{
+    if (const auto * l_const = typeid_cast<const ColumnConst *>(lhs.get()))
+    {
+        // we know rhs never be Const
+        chassert(l_const->getDataType() == rhs.getDataType());
+        return l_const->getDataColumn().compareAt(0, m, rhs, 
nan_direction_hint);
+    }
+    return lhs->compareAt(n, m, rhs, nan_direction_hint);
+}
+}
 
 int RangeSelectorBuilder::compareRow(
     const DB::Columns & columns,
     const std::vector<size_t> & required_columns,
     size_t row,
     const DB::Columns & bound_columns,
-    size_t bound_row)
+    size_t bound_row) const
 {
     for (size_t i = 0, n = required_columns.size(); i < n; ++i)
     {
         auto lpos = required_columns[i];
         auto rpos = i;
-        auto res = columns[lpos]->compareAt(row, bound_row, 
*bound_columns[rpos], sort_descriptions[i].nulls_direction)
+        auto res = doCompareAt(columns[lpos], row, bound_row, 
*bound_columns[rpos], sort_descriptions[i].nulls_direction)
             * sort_descriptions[i].direction;
         if (res != 0)
             return res;
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h 
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
index 97894daa3..7349849f5 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
@@ -118,7 +118,7 @@ private:
         const std::vector<size_t> & required_columns,
         size_t row,
         const DB::Columns & bound_columns,
-        size_t bound_row);
+        size_t bound_row) const;
 
     int binarySearchBound(
         const DB::Columns & bound_columns,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to