This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 49f6657c16 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228) 
(#8368)
49f6657c16 is described below

commit 49f6657c166b222d15308f3539b4f426690ae2bd
Author: Kyligence Git <[email protected]>
AuthorDate: Sat Dec 28 05:16:45 2024 -0600

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228) (#8368)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241228)
    
    * Fix UT due to https://github.com/ClickHouse/ClickHouse/pull/73422
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 .../org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala    | 5 ++---
 .../org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala  | 6 ++++++
 .../GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala    | 4 ++--
 cpp-ch/clickhouse.version                                           | 4 ++--
 cpp-ch/local-engine/Common/CHUtil.cpp                               | 5 +++++
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
index b93c002561..0099598d04 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
@@ -91,15 +91,14 @@ class CHListenerApi extends ListenerApi with Logging {
       "local_engine.settings.log_processors_profiles" -> "true")
     conf.setCHSettings("spark_version", SPARK_VERSION)
     // add memory limit for external sort
-    val externalSortKey = 
CHConf.runtimeSettings("max_bytes_before_external_sort")
-    if (conf.getLong(externalSortKey, -1) < 0) {
+    if (conf.getLong(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, -1) < 
0) {
       if (conf.getBoolean("spark.memory.offHeap.enabled", defaultValue = 
false)) {
         val memSize = 
JavaUtils.byteStringAsBytes(conf.get("spark.memory.offHeap.size"))
         if (memSize > 0L) {
           val cores = conf.getInt("spark.executor.cores", 1).toLong
           val sortMemLimit = ((memSize / cores) * 0.8).toLong
           logDebug(s"max memory for sorting: $sortMemLimit")
-          conf.set(externalSortKey, sortMemLimit.toString)
+          conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, 
sortMemLimit.toString)
         }
       }
     }
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
index 1031778cd1..140b09a9ab 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeSettings.scala
@@ -30,6 +30,12 @@ object RuntimeSettings {
       
.doc("https://clickhouse.com/docs/en/operations/settings/settings#min_insert_block_size_rows";)
       .longConf
       .createWithDefault(1048449)
+
+  val MAX_BYTES_BEFORE_EXTERNAL_SORT =
+    buildConf(runtimeSettings("max_bytes_before_external_sort"))
+      
.doc("https://clickhouse.com/docs/en/operations/settings/query-complexity#settings-max_bytes_before_external_sort";)
+      .longConf
+      .createWithDefault(0)
   // scalastyle:on line.size.limit
 
   /** Gluten Configuration */
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
index b1885874f5..e35ddd1e7e 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.gluten.execution.mergetree
 
 import org.apache.gluten.GlutenConfig
-import org.apache.gluten.backendsapi.clickhouse.CHConf
+import org.apache.gluten.backendsapi.clickhouse.{CHConf, RuntimeSettings}
 import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite
 
 import org.apache.spark.SparkConf
@@ -53,7 +53,7 @@ class GlutenClickHouseMergeTreeWriteTaskNotSerializableSuite
 
   test("GLUTEN-6470: Fix Task not serializable error when inserting mergetree 
data") {
 
-    val externalSortKey = 
CHConf.runtimeSettings("max_bytes_before_external_sort")
+    val externalSortKey = RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key
     assertResult(3435973836L)(spark.conf.get(externalSortKey).toLong)
 
     spark.sql(s"""
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 85b0902663..83bbbf9a21 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20241224
-CH_COMMIT=b38537577c5
+CH_BRANCH=rebase_ch/20241228
+CH_COMMIT=bf8e58b57e9
diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp 
b/cpp-ch/local-engine/Common/CHUtil.cpp
index 03c89b741b..c85104e5ae 100644
--- a/cpp-ch/local-engine/Common/CHUtil.cpp
+++ b/cpp-ch/local-engine/Common/CHUtil.cpp
@@ -82,6 +82,7 @@ namespace Setting
 {
 extern const SettingsUInt64 prefer_external_sort_block_bytes;
 extern const SettingsUInt64 max_bytes_before_external_sort;
+extern const SettingsDouble max_bytes_ratio_before_external_sort;
 extern const SettingsBool query_plan_merge_filters;
 extern const SettingsBool compile_expressions;
 extern const SettingsShortCircuitFunctionEvaluation 
short_circuit_function_evaluation;
@@ -644,6 +645,10 @@ void BackendInitializerUtil::initSettings(const 
SparkConfigs::ConfigMap & spark_
     settings[Setting::short_circuit_function_evaluation] = 
ShortCircuitFunctionEvaluation::DISABLE;
     ///
 
+    // After https://github.com/ClickHouse/ClickHouse/pull/73422
+    // Since we already set max_bytes_before_external_sort, set 
max_bytes_ratio_before_external_sort to 0
+    settings[Setting::max_bytes_ratio_before_external_sort] = 0.;
+
     for (const auto & [key, value] : spark_conf_map)
     {
         // Firstly apply 
spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.* to 
settings


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to