This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 31e1b7449f [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241130) 
(#8112)
31e1b7449f is described below

commit 31e1b7449fba2ab6938d92f5cf013b2e82df5446
Author: Kyligence Git <[email protected]>
AuthorDate: Sat Nov 30 07:34:46 2024 -0600

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241130) (#8112)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241130)
    
    * Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/71406
    
    * Fix build due to https://github.com/ClickHouse/ClickHouse/pull/72460
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 cpp-ch/clickhouse.version                          |  4 +--
 cpp-ch/local-engine/Common/AggregateUtil.cpp       | 39 ++++++++++++----------
 cpp-ch/local-engine/Common/AggregateUtil.h         |  2 +-
 .../Parser/RelParsers/CrossRelParser.cpp           | 11 ++++--
 .../Parser/RelParsers/JoinRelParser.cpp            | 18 +++++++---
 cpp-ch/local-engine/tests/gtest_ch_join.cpp        |  6 +++-
 6 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index edb13fdc57..565220d786 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20241129
-CH_COMMIT=101ba3f944d1
+CH_BRANCH=rebase_ch/20241130
+CH_COMMIT=d5d38588bd3
diff --git a/cpp-ch/local-engine/Common/AggregateUtil.cpp 
b/cpp-ch/local-engine/Common/AggregateUtil.cpp
index 0707d18aa0..4a768eb17b 100644
--- a/cpp-ch/local-engine/Common/AggregateUtil.cpp
+++ b/cpp-ch/local-engine/Common/AggregateUtil.cpp
@@ -48,6 +48,9 @@ extern const SettingsUInt64 
aggregation_in_order_max_block_bytes;
 extern const SettingsUInt64 group_by_two_level_threshold;
 extern const SettingsFloat min_hit_rate_to_use_consecutive_keys_optimization;
 extern const SettingsUInt64 max_block_size;
+extern const SettingsBool compile_aggregate_expressions;
+extern const SettingsUInt64 min_count_to_compile_aggregate_expression;
+extern const SettingsBool enable_software_prefetch_in_aggregation;
 }
 
 template <typename Method>
@@ -186,7 +189,7 @@ DB::Block AggregateDataBlockConverter::next()
 }
 
 DB::Aggregator::Params AggregatorParamsHelper::buildParams(
-    DB::ContextPtr context,
+    const DB::ContextPtr & context,
     const DB::Names & grouping_keys,
     const DB::AggregateDescriptions & agg_descriptions,
     Mode mode,
@@ -194,7 +197,7 @@ DB::Aggregator::Params AggregatorParamsHelper::buildParams(
 {
     const auto & settings = context->getSettingsRef();
     size_t max_rows_to_group_by = mode == Mode::PARTIAL_TO_FINISHED ? 0 : 
static_cast<size_t>(settings[DB::Setting::max_rows_to_group_by]);
-    DB::OverflowMode group_by_overflow_mode = 
settings[DB::Setting::group_by_overflow_mode];
+
     size_t group_by_two_level_threshold
         = algorithm == Algorithm::GlutenGraceAggregate ? 
static_cast<size_t>(settings[DB::Setting::group_by_two_level_threshold]) : 0;
     size_t group_by_two_level_threshold_bytes = algorithm == 
Algorithm::GlutenGraceAggregate
@@ -207,39 +210,39 @@ DB::Aggregator::Params 
AggregatorParamsHelper::buildParams(
         ? false
         : (mode == Mode::PARTIAL_TO_FINISHED ? false : 
static_cast<bool>(settings[DB::Setting::empty_result_for_aggregation_by_empty_set]));
     DB::TemporaryDataOnDiskScopePtr tmp_data_scope = algorithm == 
Algorithm::GlutenGraceAggregate ? nullptr : context->getTempDataOnDisk();
-    size_t max_threads = settings[DB::Setting::max_threads];
+
     size_t min_free_disk_space = algorithm == Algorithm::GlutenGraceAggregate
         ? 0
         : 
static_cast<size_t>(settings[DB::Setting::min_free_disk_space_for_temporary_data]);
-    bool compile_aggregate_expressions = mode == Mode::PARTIAL_TO_FINISHED ? 
false : true;
-    size_t min_count_to_compile_aggregate_expression = mode == 
Mode::PARTIAL_TO_FINISHED ? 0 : 3;
+    bool compile_aggregate_expressions = mode == Mode::PARTIAL_TO_FINISHED ? 
false : settings[DB::Setting::compile_aggregate_expressions];
+    size_t min_count_to_compile_aggregate_expression = mode == 
Mode::PARTIAL_TO_FINISHED ? 0 : 
settings[DB::Setting::min_count_to_compile_aggregate_expression];
     size_t max_block_size = 
PODArrayUtil::adjustMemoryEfficientSize(settings[DB::Setting::max_block_size]);
-    bool enable_prefetch = mode == Mode::PARTIAL_TO_FINISHED ? false : true;
+    bool enable_prefetch = mode != Mode::PARTIAL_TO_FINISHED;
     bool only_merge = mode == Mode::PARTIAL_TO_FINISHED;
     bool optimize_group_by_constant_keys
         = mode == Mode::PARTIAL_TO_FINISHED ? false : 
settings[DB::Setting::optimize_group_by_constant_keys];
-    double min_hit_rate_to_use_consecutive_keys_optimization = 
settings[DB::Setting::min_hit_rate_to_use_consecutive_keys_optimization];
+
+    DB::Settings aggregate_settings{settings};
+    aggregate_settings[DB::Setting::max_rows_to_group_by] = 
max_rows_to_group_by;
+    aggregate_settings[DB::Setting::max_bytes_before_external_group_by] = 
max_bytes_before_external_group_by;
+    aggregate_settings[DB::Setting::min_free_disk_space_for_temporary_data] = 
min_free_disk_space;
+    aggregate_settings[DB::Setting::compile_aggregate_expressions] = 
compile_aggregate_expressions;
+    aggregate_settings[DB::Setting::min_count_to_compile_aggregate_expression] 
= min_count_to_compile_aggregate_expression;
+    aggregate_settings[DB::Setting::max_block_size] = max_block_size;
+    aggregate_settings[DB::Setting::enable_software_prefetch_in_aggregation] = 
enable_prefetch;
+    aggregate_settings[DB::Setting::optimize_group_by_constant_keys] = 
optimize_group_by_constant_keys;
     DB::Aggregator::Params params(
+        aggregate_settings,
         grouping_keys,
         agg_descriptions,
         false,
-        max_rows_to_group_by,
-        group_by_overflow_mode,
         group_by_two_level_threshold,
         group_by_two_level_threshold_bytes,
-        max_bytes_before_external_group_by,
         empty_result_for_aggregation_by_empty_set,
         tmp_data_scope,
-        max_threads,
-        min_free_disk_space,
-        compile_aggregate_expressions,
-        min_count_to_compile_aggregate_expression,
-        max_block_size,
-        enable_prefetch,
         only_merge,
-        optimize_group_by_constant_keys,
-        min_hit_rate_to_use_consecutive_keys_optimization,
         {});
+
     return params;
 }
 
diff --git a/cpp-ch/local-engine/Common/AggregateUtil.h 
b/cpp-ch/local-engine/Common/AggregateUtil.h
index 380e1ea355..8fd36987ac 100644
--- a/cpp-ch/local-engine/Common/AggregateUtil.h
+++ b/cpp-ch/local-engine/Common/AggregateUtil.h
@@ -71,7 +71,7 @@ public:
 
     // for using grace aggregating, never enable ch spill, otherwise there 
will be data lost.
     static DB::Aggregator::Params buildParams(
-        DB::ContextPtr context,
+        const DB::ContextPtr & context,
         const DB::Names & grouping_keys,
         const DB::AggregateDescriptions & agg_descriptions,
         Mode mode,
diff --git a/cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp
index 5a6f229744..ae0a50d2d3 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/CrossRelParser.cpp
@@ -207,7 +207,9 @@ DB::QueryPlanPtr CrossRelParser::parseJoin(const 
substrait::CrossRel & join, DB:
             context->getSettingsRef()[Setting::max_block_size],
             context->getSettingsRef()[Setting::min_joined_block_size_bytes],
             1,
-            false);
+            /* required_output_ = */ NameSet{},
+            false,
+            /* use_new_analyzer_ = */ false);
         join_step->setStepDescription("CROSS_JOIN");
         steps.emplace_back(join_step.get());
         std::vector<QueryPlanPtr> plans;
@@ -254,7 +256,12 @@ void CrossRelParser::addConvertStep(TableJoin & 
table_join, DB::QueryPlan & left
     NameSet left_columns_set;
     for (const auto & col : left.getCurrentHeader().getNames())
         left_columns_set.emplace(col);
-    
table_join.setColumnsFromJoinedTable(right.getCurrentHeader().getNamesAndTypesList(),
 left_columns_set, getUniqueName("right") + ".");
+        
+    table_join.setColumnsFromJoinedTable(
+        right.getCurrentHeader().getNamesAndTypesList(),
+        left_columns_set,
+        getUniqueName("right") + ".",
+        left.getCurrentHeader().getNamesAndTypesList());
 
     // fix right table key duplicate
     NamesWithAliases right_table_alias;
diff --git a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
index 7493471697..6a5f9bc937 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/JoinRelParser.cpp
@@ -322,7 +322,9 @@ DB::QueryPlanPtr JoinRelParser::parseJoin(const 
substrait::JoinRel & join, DB::Q
             context->getSettingsRef()[Setting::max_block_size],
             context->getSettingsRef()[Setting::min_joined_block_size_bytes],
             1,
-            false);
+            /* required_output_ = */ NameSet{},
+            false,
+            /* use_new_analyzer_ = */ false);
 
         join_step->setStepDescription("SORT_MERGE_JOIN");
         steps.emplace_back(join_step.get());
@@ -390,7 +392,11 @@ void JoinRelParser::addConvertStep(TableJoin & table_join, 
DB::QueryPlan & left,
     NameSet left_columns_set;
     for (const auto & col : left.getCurrentHeader().getNames())
         left_columns_set.emplace(col);
-    
table_join.setColumnsFromJoinedTable(right.getCurrentHeader().getNamesAndTypesList(),
 left_columns_set, getUniqueName("right") + ".");
+    table_join.setColumnsFromJoinedTable(
+        right.getCurrentHeader().getNamesAndTypesList(),
+        left_columns_set,
+        getUniqueName("right") + ".",
+        left.getCurrentHeader().getNamesAndTypesList());
 
     // fix right table key duplicate
     NamesWithAliases right_table_alias;
@@ -787,7 +793,9 @@ DB::QueryPlanPtr JoinRelParser::buildMultiOnClauseHashJoin(
         context->getSettingsRef()[Setting::max_block_size],
         context->getSettingsRef()[Setting::min_joined_block_size_bytes],
         1,
-        false);
+        /* required_output_ = */ NameSet{},
+        false,
+        /* use_new_analyzer_ = */ false);
     join_step->setStepDescription("Multi join on clause hash join");
     steps.emplace_back(join_step.get());
     std::vector<QueryPlanPtr> plans;
@@ -827,7 +835,9 @@ DB::QueryPlanPtr JoinRelParser::buildSingleOnClauseHashJoin(
         context->getSettingsRef()[Setting::max_block_size],
         context->getSettingsRef()[Setting::min_joined_block_size_bytes],
         1,
-        false);
+        /* required_output_ = */ NameSet{},
+        false,
+        /* use_new_analyzer_ = */ false);
 
     join_step->setStepDescription("HASH_JOIN");
     steps.emplace_back(join_step.get());
diff --git a/cpp-ch/local-engine/tests/gtest_ch_join.cpp 
b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
index 02d4312474..5df5eaff8c 100644
--- a/cpp-ch/local-engine/tests/gtest_ch_join.cpp
+++ b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
@@ -97,6 +97,10 @@ TEST(TestJoin, simple)
     for (const auto & column : join->columnsFromJoinedTable())
         join->addJoinedColumn(column);
 
+    auto columns_from_left_table = 
left_plan.getCurrentHeader().getNamesAndTypesList();
+    for (auto & column_from_joined_table : columns_from_left_table)
+        join->setUsedColumn(column_from_joined_table, JoinTableSide::Left);
+
     auto left_keys = left.getNamesAndTypesList();
     join->addJoinedColumnsAndCorrectTypes(left_keys, true);
     std::cerr << "after join:\n";
@@ -123,7 +127,7 @@ TEST(TestJoin, simple)
     auto hash_join = std::make_shared<HashJoin>(join, 
right_plan.getCurrentHeader());
 
     QueryPlanStepPtr join_step
-        = std::make_unique<JoinStep>(left_plan.getCurrentHeader(), 
right_plan.getCurrentHeader(), hash_join, 8192, 8192, 1, false);
+        = std::make_unique<JoinStep>(left_plan.getCurrentHeader(), 
right_plan.getCurrentHeader(), hash_join, 8192, 8192, 1,  NameSet{}, false, 
false);
 
     std::cerr << "join step:" << join_step->getOutputHeader().dumpStructure() 
<< std::endl;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to