This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 49f6657c16 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228)
(#8368)
49f6657c16 is described below
commit 49f6657c166b222d15308f3539b4f426690ae2bd
Author: Kyligence Git <[email protected]>
AuthorDate: Sat Dec 28 05:16:45 2024 -0600
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228) (#8368)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228)
* Fix UT due to https://github.com/ClickHouse/ClickHouse/pull/73422
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
---
.../org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala | 5 ++---
.../org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala | 6 ++++++
.../GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala | 4 ++--
cpp-ch/clickhouse.version | 4 ++--
cpp-ch/local-engine/Common/CHUtil.cpp | 5 +++++
5 files changed, 17 insertions(+), 7 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
index b93c002561..0099598d04 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
@@ -91,15 +91,14 @@ class CHListenerApi extends ListenerApi with Logging {
"local_engine.settings.log_processors_profiles" -> "true")
conf.setCHSettings("spark_version", SPARK_VERSION)
// add memory limit for external sort
- val externalSortKey =
CHConf.runtimeSettings("max_bytes_before_external_sort")
- if (conf.getLong(externalSortKey, -1) < 0) {
+ if (conf.getLong(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, -1) <
0) {
if (conf.getBoolean("spark.memory.offHeap.enabled", defaultValue =
false)) {
val memSize =
JavaUtils.byteStringAsBytes(conf.get("spark.memory.offHeap.size"))
if (memSize > 0L) {
val cores = conf.getInt("spark.executor.cores", 1).toLong
val sortMemLimit = ((memSize / cores) * 0.8).toLong
logDebug(s"max memory for sorting: $sortMemLimit")
- conf.set(externalSortKey, sortMemLimit.toString)
+ conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key,
sortMemLimit.toString)
}
}
}
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
index 1031778cd1..140b09a9ab 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
@@ -30,6 +30,12 @@ object RuntimeSettings {
.doc("https://clickhouse.com/docs/en/operations/settings/settings#min_insert_block_size_rows")
.longConf
.createWithDefault(1048449)
+
+ val MAX_BYTES_BEFORE_EXTERNAL_SORT =
+ buildConf(runtimeSettings("max_bytes_before_external_sort"))
+
.doc("https://clickhouse.com/docs/en/operations/settings/query-complexity#settings-max_bytes_before_external_sort")
+ .longConf
+ .createWithDefault(0)
// scalastyle:on line.size.limit
/** Gluten Configuration */
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
index b1885874f5..e35ddd1e7e 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
@@ -17,7 +17,7 @@
package org.apache.gluten.execution.mergetree
import org.apache.gluten.GlutenConfig
-import org.apache.gluten.backendsapi.clickhouse.CHConf
+import org.apache.gluten.backendsapi.clickhouse.{CHConf, RuntimeSettings}
import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite
import org.apache.spark.SparkConf
@@ -53,7 +53,7 @@ class GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite
test("GLUTEN-6470: Fix Task not serializable error when inserting mergetree
data") {
- val externalSortKey =
CHConf.runtimeSettings("max_bytes_before_external_sort")
+ val externalSortKey = RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key
assertResult(3435973836L)(spark.conf.get(externalSortKey).toLong)
spark.sql(s"""
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 85b0902663..83bbbf9a21 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20241224
-CH_COMMIT=b38537577c5
+CH_BRANCH=rebase_ch/20241228
+CH_COMMIT=bf8e58b57e9
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 03c89b741b..c85104e5ae 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -82,6 +82,7 @@ namespace Setting
{
extern const SettingsUInt64 prefer_external_sort_block_bytes;
extern const SettingsUInt64 max_bytes_before_external_sort;
+extern const SettingsDouble max_bytes_ratio_before_external_sort;
extern const SettingsBool query_plan_merge_filters;
extern const SettingsBool compile_expressions;
extern const SettingsShortCircuitFunctionEvaluation
short_circuit_function_evaluation;
@@ -644,6 +645,10 @@ void BackendInitializerUtil::initSettings(const
SparkConfigs::ConfigMap & spark_
settings[Setting::short_circuit_function_evaluation] =
ShortCircuitFunctionEvaluation::DISABLE;
///
+ // After https://github.com/ClickHouse/ClickHouse/pull/73422
+ // Since we already set max_bytes_before_external_sort, set
max_bytes_ratio_before_external_sort to 0
+ settings[Setting::max_bytes_ratio_before_external_sort] = 0.;
+
for (const auto & [key, value] : spark_conf_map)
{
// Firstly apply
spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.* to
settings
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]