This is an automated email from the ASF dual-hosted git repository.
lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 31dc8ed27 [GLUTEN-6938][CH] Fix core dump when range partition include
literal (#6964)
31dc8ed27 is described below
commit 31dc8ed2711662c2a82a8c74dc4cf6199a2edc03
Author: Chang chen <[email protected]>
AuthorDate: Thu Aug 22 12:05:44 2024 +0800
[GLUTEN-6938][CH] Fix core dump when range partition include literal (#6964)
* [GLUTEN-6938][CH] Fix core dump when range partition include literal
* update per comment
---
.../apache/spark/sql/execution/utils/CHExecUtil.scala | 4 ++--
.../GlutenClickhouseFunctionSuite.scala | 16 ++++++++++++++--
.../GlutenClickhouseStringFunctionsSuite.scala | 4 +++-
.../execution/parquet/GlutenParquetFilterSuite.scala | 9 ++++-----
.../scala/org/apache/gluten/test/GlutenTPCHBase.scala | 10 ++++++----
cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp | 16 ++++++++++++++--
cpp-ch/local-engine/Shuffle/SelectorBuilder.h | 2 +-
7 files changed, 44 insertions(+), 17 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
index 38c15fde7..4496d893f 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala
@@ -319,7 +319,7 @@ object CHExecUtil extends Logging {
// Thus in Columnar Shuffle we never use the "key" part.
val isOrderSensitive = isRoundRobin && !SQLConf.get.sortBeforeRepartition
- val rddWithpartitionKey: RDD[Product2[Int, ColumnarBatch]] =
+ val rddWithPartitionKey: RDD[Product2[Int, ColumnarBatch]] =
if (
GlutenConfig.getConf.isUseColumnarShuffleManager
|| GlutenConfig.getConf.isUseCelebornShuffleManager
@@ -345,7 +345,7 @@ object CHExecUtil extends Logging {
val dependency =
new ColumnarShuffleDependency[Int, ColumnarBatch, ColumnarBatch](
- rddWithpartitionKey,
+ rddWithPartitionKey,
new PartitionIdPassthrough(newPartitioning.numPartitions),
serializer,
shuffleWriterProcessor =
ShuffleExchangeExec.createShuffleWriteProcessor(writeMetrics),
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
similarity index 94%
rename from
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
rename to
backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 4130ea348..11d5290c0 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -14,9 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.execution
+package org.apache.gluten.execution.compatibility
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite
import org.apache.gluten.utils.UTSystemParameters
import org.apache.spark.SparkConf
@@ -78,6 +79,18 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
}
}
+ test("https://github.com/apache/incubator-gluten/issues/6938") {
+ val testSQL =
+ s"""
+ |select * from (
+ | select 1 as x, r_name as y, 's' as z from region
+ | union all
+ | select 2 as x, n_name as y, null as z from nation
+ |) order by y,x,z
+ |""".stripMargin
+ runQueryAndCompare(testSQL)(_ => ())
+ }
+
test("Support In list option contains non-foldable expression") {
runQueryAndCompare(
"""
@@ -217,5 +230,4 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
)
}
}
-
}
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
similarity index 97%
rename from
backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
rename to
backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
index 163a8feda..98c0c2b35 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala
@@ -14,7 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.execution
+package org.apache.gluten.execution.compatibility
+
+import org.apache.gluten.execution.GlutenClickHouseWholeStageTransformerSuite
import org.apache.spark.SparkConf
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
index 0a8d1729c..a2f897d37 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala
@@ -17,13 +17,12 @@
package org.apache.gluten.execution.parquet
import org.apache.gluten.execution.{FileSourceScanExecTransformer,
GlutenClickHouseWholeStageTransformerSuite}
-import org.apache.gluten.test.GlutenSQLTestUtils
+import org.apache.gluten.test.{GlutenSQLTestUtils, GlutenTPCHBase}
import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.gluten.test.GlutenTPCHBase
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.Decimal
@@ -385,13 +384,13 @@ class GlutenParquetFilterSuite
'p_size.int >= 1,
'p_partkey.long.isNotNull,
('p_brand.string === "Brand#12" &&
- ('p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG")) &&
+ 'p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG") &&
'p_size.int <= 5) ||
('p_brand.string === "Brand#23" &&
- ('p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED
PACK")) &&
+ 'p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED
PACK") &&
'p_size.int <= 10) ||
('p_brand.string === "Brand#34" &&
- ('p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG"))
&&
+ 'p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG") &&
'p_size.int <= 15)
)
),
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
index 685f185ac..5a07ebab1 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala
@@ -14,9 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.spark.sql.gluten.test
-
-import org.apache.gluten.test.GlutenTPCBase
+package org.apache.gluten.test
import org.apache.spark.sql.catalyst.TableIdentifier
@@ -51,7 +49,11 @@ trait GlutenTPCHBase extends GlutenTPCBase {
override def dropTables(): Unit = {
tpchCreateTable.keys.foreach {
- tableName =>
spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true)
+ tableName =>
+ spark.sessionState.catalog.dropTable(
+ TableIdentifier(tableName),
+ ignoreIfNotExists = true,
+ purge = true)
}
}
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
index b9bd02c3e..272a6f2f6 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
@@ -367,19 +367,31 @@ void
RangeSelectorBuilder::computePartitionIdByBinarySearch(DB::Block & block, D
selector.emplace_back(selected_partition);
}
}
+namespace {
+int doCompareAt(const ColumnPtr & lhs, size_t n, size_t m, const IColumn &
rhs, int nan_direction_hint)
+{
+ if (const auto * l_const = typeid_cast<const ColumnConst *>(lhs.get()))
+ {
+ // we know rhs never be Const
+ chassert(l_const->getDataType() == rhs.getDataType());
+ return l_const->getDataColumn().compareAt(0, m, rhs,
nan_direction_hint);
+ }
+ return lhs->compareAt(n, m, rhs, nan_direction_hint);
+}
+}
int RangeSelectorBuilder::compareRow(
const DB::Columns & columns,
const std::vector<size_t> & required_columns,
size_t row,
const DB::Columns & bound_columns,
- size_t bound_row)
+ size_t bound_row) const
{
for (size_t i = 0, n = required_columns.size(); i < n; ++i)
{
auto lpos = required_columns[i];
auto rpos = i;
- auto res = columns[lpos]->compareAt(row, bound_row,
*bound_columns[rpos], sort_descriptions[i].nulls_direction)
+ auto res = doCompareAt(columns[lpos], row, bound_row,
*bound_columns[rpos], sort_descriptions[i].nulls_direction)
* sort_descriptions[i].direction;
if (res != 0)
return res;
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
index 97894daa3..7349849f5 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h
@@ -118,7 +118,7 @@ private:
const std::vector<size_t> & required_columns,
size_t row,
const DB::Columns & bound_columns,
- size_t bound_row);
+ size_t bound_row) const;
int binarySearchBound(
const DB::Columns & bound_columns,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]