This is an automated email from the ASF dual-hosted git repository.
mahongbin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4a953a222 [Gluten-4912][CH] fix bug when a query has no shuffle (#5081)
4a953a222 is described below
commit 4a953a22214f0d74757d5a412218d93dda03aca6
Author: Hongbin Ma <[email protected]>
AuthorDate: Fri Mar 22 18:18:59 2024 +0800
[Gluten-4912][CH] fix bug when a query has no shuffle (#5081)
---
.../execution/GlutenClickHouseMergeTreeWriteSuite.scala | 15 +++++++++++++--
cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp | 13 +++++++------
cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp | 2 +-
cpp-ch/local-engine/tests/benchmark_local_engine.cpp | 2 +-
4 files changed, 22 insertions(+), 10 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
index 6750e251c..0862fd41a 100644
---
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
+++
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -1239,7 +1239,7 @@ class GlutenClickHouseMergeTreeWriteSuite
|)
|USING clickhouse
|LOCATION '$basePath/lineitem_mergetree_lowcard'
- |TBLPROPERTIES('lowCardKey'='l_returnflag,L_LINESTATUS')
+
|TBLPROPERTIES('lowCardKey'='l_returnflag,L_LINESTATUS,l_quantity')
|""".stripMargin)
spark.sql(s"""
@@ -1285,7 +1285,7 @@ class GlutenClickHouseMergeTreeWriteSuite
val sqlStr2 =
s"""
|SELECT
- | max(l_returnflag)
+ | max(l_returnflag), min(l_quantity)
|FROM
| lineitem_mergetree_lowcard
|GROUP BY
@@ -1298,6 +1298,17 @@ class GlutenClickHouseMergeTreeWriteSuite
// total rows should remain unchanged
spark.sql(sqlStr2).collect().apply(0).get(0) == "R"
)
+
+ // test select *
+ val sqlStr3 =
+ s"""
+ |SELECT
+ | *
+ |FROM
+ | lineitem_mergetree_lowcard limit 1
+ |
+ |""".stripMargin
+ spark.sql(sqlStr3).collect()
}
test("test mergetree with primary keys filter") {
diff --git a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
index c3ff32a97..cb5f0111c 100644
--- a/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
+++ b/cpp-ch/local-engine/Parser/CHColumnToSparkRow.cpp
@@ -325,12 +325,12 @@ SparkRowInfo::SparkRowInfo(
{
const auto & col = cols[col_idx];
/// No need to calculate backing data length for fixed length types
- const auto type_without_nullable = removeNullable(col.type);
+ const auto type_without_nullable =
removeLowCardinalityAndNullable(col.type);
if
(BackingDataLengthCalculator::isVariableLengthDataType(type_without_nullable))
{
if
(BackingDataLengthCalculator::isDataTypeSupportRawData(type_without_nullable))
{
- auto column = col.column->convertToFullColumnIfConst();
+ auto column = col.column->convertToFullIfNeeded();
const auto * nullable_column =
checkAndGetColumn<ColumnNullable>(*column);
if (nullable_column)
{
@@ -348,13 +348,13 @@ SparkRowInfo::SparkRowInfo(
for (size_t i = 0; i < num_rows; ++i)
{
size_t row_idx = masks == nullptr ? i : masks->at(i);
- lengths[i] +=
roundNumberOfBytesToNearestWord(col.column->getDataAt(row_idx).size);
+ lengths[i] +=
roundNumberOfBytesToNearestWord(column->getDataAt(row_idx).size);
}
}
}
else
{
- BackingDataLengthCalculator calculator(col.type);
+ BackingDataLengthCalculator calculator(type_without_nullable);
for (size_t i = 0; i < num_rows; ++i)
{
size_t row_idx = masks == nullptr ? i : masks->at(i);
@@ -462,11 +462,12 @@ std::unique_ptr<SparkRowInfo>
CHColumnToSparkRow::convertCHColumnToSparkRow(cons
const auto & col = block.getByPosition(col_idx);
int64_t field_offset = spark_row_info->getFieldOffset(col_idx);
- ColumnWithTypeAndName
col_not_const{col.column->convertToFullColumnIfConst(), col.type, col.name};
+ ColumnWithTypeAndName col_full{col.column->convertToFullIfNeeded(),
+ removeLowCardinality(col.type), col.name};
writeValue(
spark_row_info->getBufferAddress(),
field_offset,
- col_not_const,
+ col_full,
col_idx,
spark_row_info->getNumRows(),
spark_row_info->getOffsets(),
diff --git a/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp
b/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp
index 28c57c31f..9baf3c469 100644
--- a/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp
+++ b/cpp-ch/local-engine/Shuffle/ShuffleSplitter.cpp
@@ -301,7 +301,7 @@ void ColumnsBuffer::appendSelective(
accumulated_columns.reserve(source.columns());
for (size_t i = 0; i < source.columns(); i++)
{
- auto column =
source.getColumns()[i]->convertToFullColumnIfConst()->convertToFullIfNeeded()->cloneEmpty();
+ auto column =
source.getColumns()[i]->convertToFullIfNeeded()->cloneEmpty();
column->reserve(prefer_buffer_size);
accumulated_columns.emplace_back(std::move(column));
}
diff --git a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp
b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp
index a482e4702..51ee7ad12 100644
--- a/cpp-ch/local-engine/tests/benchmark_local_engine.cpp
+++ b/cpp-ch/local-engine/tests/benchmark_local_engine.cpp
@@ -586,7 +586,7 @@ DB::ContextMutablePtr global_context;
std::vector<ColumnData> columns(arguments.size() + 1);
for (size_t i = 0; i < arguments.size(); ++i)
{
- auto column =
block.getByPosition(i).column->convertToFullColumnIfConst();
+ auto column = block.getByPosition(i).column->convertToFullIfNeeded();
columns[i] = getColumnData(column.get());
}
for (auto _ : state)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]