(incubator-gluten) branch main updated: [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) (#6170)

changchen Fri, 21 Jun 2024 06:10:43 -0700

This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new ef7b2c516 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) 
(#6170)
ef7b2c516 is described below

commit ef7b2c516bee6c82161a86574dbda91e05ab1b3a
Author: Kyligence Git <[email protected]>
AuthorDate: Fri Jun 21 08:10:34 2024 -0500

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) (#6170)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621)
    
    * Fix Build/UT due to https://github.com/ClickHouse/ClickHouse/pull/65234
    
    * fix style
    
    * refactor test
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 cpp-ch/clickhouse.version                          |   4 +-
 cpp-ch/local-engine/Parser/JoinRelParser.cpp       |   2 +-
 cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp  |   2 +-
 cpp-ch/local-engine/Parser/ProjectRelParser.cpp    |   1 -
 .../local-engine/Parser/SerializedPlanParser.cpp   |   9 +-
 cpp-ch/local-engine/Parser/SerializedPlanParser.h  |  10 +-
 cpp-ch/local-engine/local_engine_jni.cpp           |  72 ++---
 cpp-ch/local-engine/tests/gluten_test_util.cpp     |   4 +-
 cpp-ch/local-engine/tests/gtest_parser.cpp         | 320 +++++++++++++++++++++
 9 files changed, 373 insertions(+), 51 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index 1e3ac8d88..4a3088e54 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240620
-CH_COMMIT=f9c3886a767
+CH_BRANCH=rebase_ch/20240621
+CH_COMMIT=acf666c1c4f
diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp 
b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
index 937e449b0..58b156c3c 100644
--- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp
@@ -459,7 +459,7 @@ void JoinRelParser::addConvertStep(TableJoin & table_join, 
DB::QueryPlan & left,
                 rename_dag->getOutputs()[pos] = &alias;
             }
         }
-        rename_dag->projectInput();
+
         QueryPlanStepPtr project_step = 
std::make_unique<ExpressionStep>(right.getCurrentDataStream(), rename_dag);
         project_step->setStepDescription("Right Table Rename");
         steps.emplace_back(project_step.get());
diff --git a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp 
b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp
index c36db6b74..b51b76b97 100644
--- a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp
@@ -211,7 +211,7 @@ PrewhereInfoPtr MergeTreeRelParser::parsePreWhereInfo(const 
substrait::Expressio
     prewhere_info->prewhere_column_name = filter_name;
     prewhere_info->need_filter = true;
     prewhere_info->remove_prewhere_column = true;
-    prewhere_info->prewhere_actions->projectInput(false);
+
     for (const auto & name : input.getNames())
         prewhere_info->prewhere_actions->tryRestoreColumn(name);
     return prewhere_info;
diff --git a/cpp-ch/local-engine/Parser/ProjectRelParser.cpp 
b/cpp-ch/local-engine/Parser/ProjectRelParser.cpp
index caf779ac1..eb190101f 100644
--- a/cpp-ch/local-engine/Parser/ProjectRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/ProjectRelParser.cpp
@@ -99,7 +99,6 @@ ProjectRelParser::SplittedActionsDAGs 
ProjectRelParser::splitActionsDAGInGenerat
     std::unordered_set<const ActionsDAG::Node *> 
first_split_nodes(array_join_node->children.begin(), 
array_join_node->children.end());
     auto first_split_result = actions_dag->split(first_split_nodes);
     res.before_array_join = first_split_result.first;
-    res.before_array_join->projectInput(true);
 
     array_join_node = findArrayJoinNode(first_split_result.second);
     std::unordered_set<const ActionsDAG::Node *> second_split_nodes = 
{array_join_node};
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index 40e01e305..f9ea783a2 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -234,6 +234,7 @@ std::shared_ptr<ActionsDAG> 
SerializedPlanParser::expressionsToActionsDAG(
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported projection 
type {}.", magic_enum::enum_name(expr.rex_type_case()));
     }
     actions_dag->project(required_columns);
+    actions_dag->appendInputsForUnusedColumns(header);
     return actions_dag;
 }
 
@@ -1790,7 +1791,7 @@ QueryPlanPtr SerializedPlanParser::parse(const 
std::string & plan)
 QueryPlanPtr SerializedPlanParser::parseJson(const std::string & json_plan)
 {
     auto plan_ptr = std::make_unique<substrait::Plan>();
-    auto s = 
google::protobuf::util::JsonStringToMessage(absl::string_view(json_plan.c_str()),
 plan_ptr.get());
+    auto s = 
google::protobuf::util::JsonStringToMessage(absl::string_view(json_plan), 
plan_ptr.get());
     if (!s.ok())
         throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse 
substrait::Plan from json string failed: {}", s.ToString());
     return parse(std::move(plan_ptr));
@@ -1831,7 +1832,7 @@ void SerializedPlanParser::collectJoinKeys(
     }
 }
 
-ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & 
name_and_types, const ASTPtr & ast)
+ActionsDAG ASTParser::convertToActions(const NamesAndTypesList & 
name_and_types, const ASTPtr & ast) const
 {
     NamesAndTypesList aggregation_keys;
     ColumnNumbersList aggregation_keys_indexes_list;
@@ -1840,9 +1841,9 @@ ActionsDAGPtr ASTParser::convertToActions(const 
NamesAndTypesList & name_and_typ
     ActionsMatcher::Data visitor_data(
         context,
         size_limits_for_set,
-        size_t(0),
+        static_cast<size_t>(0),
         name_and_types,
-        std::make_shared<ActionsDAG>(name_and_types),
+        ActionsDAG(name_and_types),
         std::make_shared<PreparedSets>(),
         false /* no_subqueries */,
         false /* no_makeset */,
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index 45ff5a20b..8964f42d9 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -112,7 +112,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"rand", "randCanonical"},
        {"isnan", "isNaN"},
        {"bin", "sparkBin"},
-        {"rint", "sparkRint"},
+       {"rint", "sparkRint"},
 
        /// string functions
        {"like", "like"},
@@ -151,7 +151,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
        {"initcap", "initcapUTF8"},
        {"conv", "sparkConv"},
        {"uuid", "generateUUIDv4"},
-        {"levenshteinDistance", "editDistanceUTF8"},
+       {"levenshteinDistance", "editDistanceUTF8"},
 
        /// hash functions
        {"crc32", "CRC32"},
@@ -278,7 +278,7 @@ public:
         materialize_inputs.emplace_back(materialize_input);
     }
 
-    void addSplitInfo(std::string & split_info) { 
split_infos.emplace_back(std::move(split_info)); }
+    void addSplitInfo(std::string && split_info) { 
split_infos.emplace_back(std::move(split_info)); }
 
     int nextSplitInfoIndex()
     {
@@ -419,6 +419,7 @@ public:
     RelMetricPtr getMetric() const { return metric; }
     void setMetric(RelMetricPtr metric_) { metric = metric_; }
     void setExtraPlanHolder(std::vector<QueryPlanPtr> & extra_plan_holder_) { 
extra_plan_holder = std::move(extra_plan_holder_); }
+
 private:
     std::unique_ptr<SparkRowInfo> writeBlockToSparkRow(DB::Block & block);
 
@@ -434,7 +435,6 @@ private:
     DB::QueryPlanPtr current_query_plan;
     RelMetricPtr metric;
     std::vector<QueryPlanPtr> extra_plan_holder;
-
 };
 
 
@@ -450,7 +450,7 @@ public:
     ~ASTParser() = default;
 
     ASTPtr parseToAST(const Names & names, const substrait::Expression & rel);
-    ActionsDAGPtr convertToActions(const NamesAndTypesList & name_and_types, 
const ASTPtr & ast);
+    ActionsDAG convertToActions(const NamesAndTypesList & name_and_types, 
const ASTPtr & ast) const;
 
 private:
     ContextPtr context;
diff --git a/cpp-ch/local-engine/local_engine_jni.cpp 
b/cpp-ch/local-engine/local_engine_jni.cpp
index 38f188293..256f373c2 100644
--- a/cpp-ch/local-engine/local_engine_jni.cpp
+++ b/cpp-ch/local-engine/local_engine_jni.cpp
@@ -36,10 +36,14 @@
 #include <Shuffle/ShuffleWriter.h>
 #include <Shuffle/ShuffleWriterBase.h>
 #include <Shuffle/WriteBufferFromJavaOutputStream.h>
+#include <Storages/Mergetree/MergeSparkMergeTreeTask.h>
+#include <Storages/Mergetree/MetaDataHelper.h>
 #include <Storages/Mergetree/SparkMergeTreeWriter.h>
 #include <Storages/Output/BlockStripeSplitter.h>
 #include <Storages/Output/FileWriterWrappers.h>
+#include <Storages/StorageMergeTreeFactory.h>
 #include <Storages/SubstraitSource/ReadBufferBuilder.h>
+#include <google/protobuf/wrappers.pb.h>
 #include <jni/ReservationListenerWrapper.h>
 #include <jni/SharedPointerWrapper.h>
 #include <jni/jni_common.h>
@@ -51,10 +55,6 @@
 #include <Common/ExceptionUtils.h>
 #include <Common/JNIUtils.h>
 #include <Common/QueryContext.h>
-#include <google/protobuf/wrappers.pb.h>
-#include <Storages/StorageMergeTreeFactory.h>
-#include <Storages/Mergetree/MergeSparkMergeTreeTask.h>
-#include <Storages/Mergetree/MetaDataHelper.h>
 
 
 #ifdef __cplusplus
@@ -269,13 +269,12 @@ JNIEXPORT jlong 
Java_org_apache_gluten_vectorized_ExpressionEvaluatorJniWrapper_
         parser.addInputIter(iter, materialize_input);
     }
 
-    for (jsize i = 0, split_info_arr_size = env->GetArrayLength(split_infos); 
i < split_info_arr_size; i++) {
+    for (jsize i = 0, split_info_arr_size = env->GetArrayLength(split_infos); 
i < split_info_arr_size; i++)
+    {
         jbyteArray split_info = 
static_cast<jbyteArray>(env->GetObjectArrayElement(split_infos, i));
-        jsize split_info_size = env->GetArrayLength(split_info);
+        std::string::size_type split_info_size = 
env->GetArrayLength(split_info);
         jbyte * split_info_addr = env->GetByteArrayElements(split_info, 
nullptr);
-        std::string split_info_str;
-        split_info_str.assign(reinterpret_cast<const char *>(split_info_addr), 
split_info_size);
-        parser.addSplitInfo(split_info_str);
+        parser.addSplitInfo(std::string{reinterpret_cast<const char 
*>(split_info_addr), split_info_size});
     }
 
     jsize plan_size = env->GetArrayLength(plan);
@@ -630,8 +629,7 @@ JNIEXPORT jlong 
Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_na
         .max_sort_buffer_size = static_cast<size_t>(max_sort_buffer_size),
         .spill_firstly_before_stop = 
static_cast<bool>(spill_firstly_before_stop),
         .force_external_sort = static_cast<bool>(force_external_sort),
-        .force_mermory_sort = static_cast<bool>(force_memory_sort)
-    };
+        .force_mermory_sort = static_cast<bool>(force_memory_sort)};
     auto name = jstring2string(env, short_name);
     local_engine::SplitterHolder * splitter;
     if (prefer_spill)
@@ -696,8 +694,7 @@ JNIEXPORT jlong 
Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_na
         .throw_if_memory_exceed = static_cast<bool>(throw_if_memory_exceed),
         .flush_block_buffer_before_evict = 
static_cast<bool>(flush_block_buffer_before_evict),
         .force_external_sort = static_cast<bool>(force_external_sort),
-        .force_mermory_sort = static_cast<bool>(force_memory_sort)
-    };
+        .force_mermory_sort = static_cast<bool>(force_memory_sort)};
     auto name = jstring2string(env, short_name);
     local_engine::SplitterHolder * splitter;
     splitter = new local_engine::SplitterHolder{.splitter = 
std::make_unique<local_engine::CachedShuffleWriter>(name, options, pusher)};
@@ -768,8 +765,8 @@ JNIEXPORT void 
Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_clo
 }
 
 // CHBlockConverterJniWrapper
-JNIEXPORT jobject
-Java_org_apache_gluten_vectorized_CHBlockConverterJniWrapper_convertColumnarToRow(JNIEnv
 * env, jclass, jlong block_address, jintArray masks)
+JNIEXPORT jobject 
Java_org_apache_gluten_vectorized_CHBlockConverterJniWrapper_convertColumnarToRow(
+    JNIEnv * env, jclass, jlong block_address, jintArray masks)
 {
     LOCAL_ENGINE_JNI_METHOD_START
     local_engine::CHColumnToSparkRow converter;
@@ -958,21 +955,18 @@ JNIEXPORT jlong 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniW
     /// Parsing may fail when the number of recursive layers is large.
     /// Here, set a limit large enough to avoid this problem.
     /// Once this problem occurs, it is difficult to troubleshoot, because the 
pb of c++ will not provide any valid information
-    google::protobuf::io::CodedInputStream coded_in(
-        reinterpret_cast<const uint8_t *>(plan_str.data()), 
static_cast<int>(plan_str.size()));
+    google::protobuf::io::CodedInputStream coded_in(reinterpret_cast<const 
uint8_t *>(plan_str.data()), static_cast<int>(plan_str.size()));
     coded_in.SetRecursionLimit(100000);
 
     auto ok = plan_ptr->ParseFromCodedStream(&coded_in);
     if (!ok)
         throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, 
"Parse substrait::Plan from string failed");
 
-    substrait::ReadRel::ExtensionTable extension_table =
-        
local_engine::SerializedPlanParser::parseExtensionTable(split_info_str);
+    substrait::ReadRel::ExtensionTable extension_table = 
local_engine::SerializedPlanParser::parseExtensionTable(split_info_str);
 
     auto merge_tree_table = 
local_engine::MergeTreeRelParser::parseMergeTreeTable(extension_table);
     auto uuid = uuid_str + "_" + task_id;
-    auto * writer = new local_engine::SparkMergeTreeWriter(
-        merge_tree_table, query_context, uuid, partition_dir, bucket_dir);
+    auto * writer = new local_engine::SparkMergeTreeWriter(merge_tree_table, 
query_context, uuid, partition_dir, bucket_dir);
 
     env->ReleaseByteArrayElements(plan_, plan_buf_addr, JNI_ABORT);
     env->ReleaseByteArrayElements(split_info_, split_info_addr, JNI_ABORT);
@@ -1044,8 +1038,8 @@ JNIEXPORT void 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWr
     LOCAL_ENGINE_JNI_METHOD_END(env, )
 }
 
-JNIEXPORT void
-Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMergeTree(JNIEnv
 * env, jobject, jlong instanceId, jlong block_address)
+JNIEXPORT void 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMergeTree(
+    JNIEnv * env, jobject, jlong instanceId, jlong block_address)
 {
     LOCAL_ENGINE_JNI_METHOD_START
     auto * writer = reinterpret_cast<local_engine::SparkMergeTreeWriter 
*>(instanceId);
@@ -1054,7 +1048,8 @@ 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMe
     LOCAL_ENGINE_JNI_METHOD_END(env, )
 }
 
-JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_closeMergeTreeWriter(JNIEnv
 * env, jobject, jlong instanceId)
+JNIEXPORT jstring
+Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_closeMergeTreeWriter(JNIEnv
 * env, jobject, jlong instanceId)
 {
     LOCAL_ENGINE_JNI_METHOD_START
     auto * writer = reinterpret_cast<local_engine::SparkMergeTreeWriter 
*>(instanceId);
@@ -1067,7 +1062,14 @@ JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
 }
 
 JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_nativeMergeMTParts(
-    JNIEnv * env, jobject, jbyteArray plan_, jbyteArray split_info_, jstring 
uuid_, jstring task_id_, jstring partition_dir_, jstring bucket_dir_)
+    JNIEnv * env,
+    jobject,
+    jbyteArray plan_,
+    jbyteArray split_info_,
+    jstring uuid_,
+    jstring task_id_,
+    jstring partition_dir_,
+    jstring bucket_dir_)
 {
     LOCAL_ENGINE_JNI_METHOD_START
 
@@ -1095,16 +1097,14 @@ JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
     /// Parsing may fail when the number of recursive layers is large.
     /// Here, set a limit large enough to avoid this problem.
     /// Once this problem occurs, it is difficult to troubleshoot, because the 
pb of c++ will not provide any valid information
-    google::protobuf::io::CodedInputStream coded_in(
-        reinterpret_cast<const uint8_t *>(plan_str.data()), 
static_cast<int>(plan_str.size()));
+    google::protobuf::io::CodedInputStream coded_in(reinterpret_cast<const 
uint8_t *>(plan_str.data()), static_cast<int>(plan_str.size()));
     coded_in.SetRecursionLimit(100000);
 
     auto ok = plan_ptr->ParseFromCodedStream(&coded_in);
     if (!ok)
         throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, 
"Parse substrait::Plan from string failed");
 
-    substrait::ReadRel::ExtensionTable extension_table =
-        
local_engine::SerializedPlanParser::parseExtensionTable(split_info_str);
+    substrait::ReadRel::ExtensionTable extension_table = 
local_engine::SerializedPlanParser::parseExtensionTable(split_info_str);
     google::protobuf::StringValue table;
     table.ParseFromString(extension_table.detail().value());
     auto merge_tree_table = 
local_engine::parseMergeTreeTableString(table.value());
@@ -1114,12 +1114,12 @@ JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
         = 
local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table, 
local_engine::SerializedPlanParser::global_context);
 
     local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to 
release temp CustomStorageMergeTree with RAII
-    std::vector<DB::DataPartPtr> selected_parts
-        = 
local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(temp_storage->getStorageID(),
 "", merge_tree_table.getPartNames());
+    std::vector<DB::DataPartPtr> selected_parts = 
local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(
+        temp_storage->getStorageID(), "", merge_tree_table.getPartNames());
 
     std::unordered_map<String, String> partition_values;
-    std::vector<MergeTreeDataPartPtr> loaded =
-        local_engine::mergeParts(selected_parts, partition_values, uuid_str, 
temp_storage, partition_dir, bucket_dir);
+    std::vector<MergeTreeDataPartPtr> loaded
+        = local_engine::mergeParts(selected_parts, partition_values, uuid_str, 
temp_storage, partition_dir, bucket_dir);
 
     std::vector<local_engine::PartInfo> res;
     for (auto & partPtr : loaded)
@@ -1156,7 +1156,8 @@ JNIEXPORT jobject 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
         partition_col_indice_vec.push_back(pIndice[i]);
 
     env->ReleaseIntArrayElements(partitionColIndice, pIndice, JNI_ABORT);
-    local_engine::BlockStripes bs = 
local_engine::BlockStripeSplitter::split(*block, partition_col_indice_vec, 
hasBucket, reserve_partition_columns);
+    local_engine::BlockStripes bs
+        = local_engine::BlockStripeSplitter::split(*block, 
partition_col_indice_vec, hasBucket, reserve_partition_columns);
 
 
     auto * addresses = env->NewLongArray(bs.block_addresses.size());
@@ -1366,7 +1367,8 @@ JNIEXPORT jlong 
Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_getD
     return -1;
 }
 
-JNIEXPORT jlong 
Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_createListenableAllocator(JNIEnv
 * env, jclass, jobject listener)
+JNIEXPORT jlong
+Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_createListenableAllocator(JNIEnv
 * env, jclass, jobject listener)
 {
     LOCAL_ENGINE_JNI_METHOD_START
     auto listener_wrapper = 
std::make_shared<local_engine::ReservationListenerWrapper>(env->NewGlobalRef(listener));
diff --git a/cpp-ch/local-engine/tests/gluten_test_util.cpp 
b/cpp-ch/local-engine/tests/gluten_test_util.cpp
index 7fdd32d16..0448092b9 100644
--- a/cpp-ch/local-engine/tests/gluten_test_util.cpp
+++ b/cpp-ch/local-engine/tests/gluten_test_util.cpp
@@ -62,14 +62,14 @@ ActionsDAGPtr parseFilter(const std::string & filter, const 
AnotherRowType & nam
         size_limits_for_set,
         static_cast<size_t>(0),
         name_and_types,
-        std::make_shared<ActionsDAG>(name_and_types),
+        ActionsDAG(name_and_types),
         prepared_sets /* prepared_sets */,
         false /* no_subqueries */,
         false /* no_makeset */,
         false /* only_consts */,
         info);
     ActionsVisitor(visitor_data).visit(ast_exp);
-    return 
ActionsDAG::buildFilterActionsDAG({visitor_data.getActions()->getOutputs().back()},
 node_name_to_input_column);
+    return 
ActionsDAG::buildFilterActionsDAG({visitor_data.getActions().getOutputs().back()},
 node_name_to_input_column);
 }
 
 const char * get_data_dir()
diff --git a/cpp-ch/local-engine/tests/gtest_parser.cpp 
b/cpp-ch/local-engine/tests/gtest_parser.cpp
new file mode 100644
index 000000000..cbe41c90c
--- /dev/null
+++ b/cpp-ch/local-engine/tests/gtest_parser.cpp
@@ -0,0 +1,320 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <Parser/SerializedPlanParser.h>
+#include <google/protobuf/util/json_util.h>
+#include <gtest/gtest.h>
+
+using namespace local_engine;
+using namespace DB;
+
+std::string splitBinaryFromJson(const std::string & json)
+{
+    std::string binary;
+    substrait::ReadRel::LocalFiles local_files;
+    auto s = 
google::protobuf::util::JsonStringToMessage(absl::string_view(json), 
&local_files);
+    local_files.SerializeToString(&binary);
+    return binary;
+}
+
+std::string JsonPlanFor65234()
+{
+    // Plan for https://github.com/ClickHouse/ClickHouse/pull/65234
+    return R"(
+{
+  "extensions": [{
+    "extensionFunction": {
+      "functionAnchor": 1,
+      "name": "is_not_null:str"
+    }
+  }, {
+    "extensionFunction": {
+      "functionAnchor": 2,
+      "name": "equal:str_str"
+    }
+  }, {
+    "extensionFunction": {
+      "functionAnchor": 3,
+      "name": "is_not_null:i64"
+    }
+  }, {
+    "extensionFunction": {
+      "name": "and:bool_bool"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "project": {
+          "common": {
+            "emit": {
+              "outputMapping": [2]
+            }
+          },
+          "input": {
+            "filter": {
+              "common": {
+                "direct": {
+                }
+              },
+              "input": {
+                "read": {
+                  "common": {
+                    "direct": {
+                    }
+                  },
+                  "baseSchema": {
+                    "names": ["r_regionkey", "r_name"],
+                    "struct": {
+                      "types": [{
+                        "i64": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }, {
+                        "string": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }]
+                    },
+                    "columnTypes": ["NORMAL_COL", "NORMAL_COL"]
+                  },
+                  "filter": {
+                    "scalarFunction": {
+                      "outputType": {
+                        "bool": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "arguments": [{
+                        "value": {
+                          "scalarFunction": {
+                            "outputType": {
+                              "bool": {
+                                "nullability": "NULLABILITY_NULLABLE"
+                              }
+                            },
+                            "arguments": [{
+                              "value": {
+                                "scalarFunction": {
+                                  "functionReference": 1,
+                                  "outputType": {
+                                    "bool": {
+                                      "nullability": "NULLABILITY_REQUIRED"
+                                    }
+                                  },
+                                  "arguments": [{
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 1
+                                          }
+                                        }
+                                      }
+                                    }
+                                  }]
+                                }
+                              }
+                            }, {
+                              "value": {
+                                "scalarFunction": {
+                                  "functionReference": 2,
+                                  "outputType": {
+                                    "bool": {
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  },
+                                  "arguments": [{
+                                    "value": {
+                                      "selection": {
+                                        "directReference": {
+                                          "structField": {
+                                            "field": 1
+                                          }
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "value": {
+                                      "literal": {
+                                        "string": "EUROPE"
+                                      }
+                                    }
+                                  }]
+                                }
+                              }
+                            }]
+                          }
+                        }
+                      }, {
+                        "value": {
+                          "scalarFunction": {
+                            "functionReference": 3,
+                            "outputType": {
+                              "bool": {
+                                "nullability": "NULLABILITY_REQUIRED"
+                              }
+                            },
+                            "arguments": [{
+                              "value": {
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                    }
+                                  }
+                                }
+                              }
+                            }]
+                          }
+                        }
+                      }]
+                    }
+                  },
+                  "advancedExtension": {
+                    "optimization": {
+                      "@type": 
"type.googleapis.com/google.protobuf.StringValue",
+                      "value": "isMergeTree\u003d0\n"
+                    }
+                  }
+                }
+              },
+              "condition": {
+                "scalarFunction": {
+                  "outputType": {
+                    "bool": {
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "arguments": [{
+                    "value": {
+                      "scalarFunction": {
+                        "outputType": {
+                          "bool": {
+                            "nullability": "NULLABILITY_NULLABLE"
+                          }
+                        },
+                        "arguments": [{
+                          "value": {
+                            "scalarFunction": {
+                              "functionReference": 1,
+                              "outputType": {
+                                "bool": {
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              "arguments": [{
+                                "value": {
+                                  "selection": {
+                                    "directReference": {
+                                      "structField": {
+                                        "field": 1
+                                      }
+                                    }
+                                  }
+                                }
+                              }]
+                            }
+                          }
+                        }, {
+                          "value": {
+                            "scalarFunction": {
+                              "functionReference": 2,
+                              "outputType": {
+                                "bool": {
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              },
+                              "arguments": [{
+                                "value": {
+                                  "selection": {
+                                    "directReference": {
+                                      "structField": {
+                                        "field": 1
+                                      }
+                                    }
+                                  }
+                                }
+                              }, {
+                                "value": {
+                                  "literal": {
+                                    "string": "EUROPE"
+                                  }
+                                }
+                              }]
+                            }
+                          }
+                        }]
+                      }
+                    }
+                  }, {
+                    "value": {
+                      "scalarFunction": {
+                        "functionReference": 3,
+                        "outputType": {
+                          "bool": {
+                            "nullability": "NULLABILITY_REQUIRED"
+                          }
+                        },
+                        "arguments": [{
+                          "value": {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                }
+                              }
+                            }
+                          }
+                        }]
+                      }
+                    }
+                  }]
+                }
+              }
+            }
+          },
+          "expressions": [{
+            "selection": {
+              "directReference": {
+                "structField": {
+                }
+              }
+            }
+          }]
+        }
+      },
+      "names": ["r_regionkey#72"],
+      "outputSchema": {
+        "types": [{
+          "i64": {
+            "nullability": "NULLABILITY_NULLABLE"
+          }
+        }],
+        "nullability": "NULLABILITY_REQUIRED"
+      }
+    }
+  }]
+}
+)";
+}
+
+TEST(SerializedPlanParser, PR65234)
+{
+    const std::string split
+        = 
R"({"items":[{"uriFile":"file:///part-00000-16caa751-9774-470c-bd37-5c84c53373c8-c000.snappy.parquet","length":"84633","parquet":{},"schema":{},"metadataColumns":[{}]}]}")";
+    SerializedPlanParser parser(SerializedPlanParser::global_context);
+    parser.addSplitInfo(splitBinaryFromJson(split));
+    parser.parseJson(JsonPlanFor65234());
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) (#6170)

Reply via email to