This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new f498fe7485 [CH] Ignore unstabe uts and add more message when failed.  
(#7821)
f498fe7485 is described below

commit f498fe74850348e4ac3120da41138fe4dda122b1
Author: Chang chen <[email protected]>
AuthorDate: Tue Nov 5 21:11:53 2024 +0800

    [CH] Ignore unstabe uts and add more message when failed.  (#7821)
    
    * read data from orc file format - ignore reading except date32
    
    * dumpPlan and dumpMessage
    
    * fix due to comments
---
 .../GlutenClickHouseFileFormatSuite.scala          |  5 +-
 cpp-ch/local-engine/Common/DebugUtils.cpp          | 50 +++++++++++++++++++
 cpp-ch/local-engine/Common/DebugUtils.h            | 12 +++++
 cpp-ch/local-engine/Common/GlutenConfig.cpp        |  3 +-
 .../Parser/RelParsers/ReadRelParser.cpp            |  6 +--
 .../local-engine/Parser/SerializedPlanParser.cpp   | 40 +++++----------
 .../local-engine/Parser/SubstraitParserUtils.cpp   | 57 ----------------------
 cpp-ch/local-engine/Parser/SubstraitParserUtils.h  | 12 +++--
 .../Storages/MergeTree/SparkMergeTreeMeta.cpp      |  6 +--
 9 files changed, 93 insertions(+), 98 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
index 88a34a786a..2337316257 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
@@ -1037,12 +1037,13 @@ class GlutenClickHouseFileFormatSuite
     )
   }
 
-  test("read data from orc file format") {
+  test("read data from orc file format - except date32") {
     val filePath = 
s"$orcDataPath/all_data_types_with_non_primitive_type.snappy.orc"
     val orcFileFormat = "orc"
     val sql =
       s"""
-         | select *
+         | select string_field, int_field, long_field, float_field, 
double_field, short_field,
+         | byte_field, boolean_field, decimal_field
          | from $orcFileFormat.`$filePath`
          | where long_field > 30
          |""".stripMargin
diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp 
b/cpp-ch/local-engine/Common/DebugUtils.cpp
index c144a1384e..7b67f7d812 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.cpp
+++ b/cpp-ch/local-engine/Common/DebugUtils.cpp
@@ -25,9 +25,59 @@
 #include <Formats/FormatSettings.h>
 #include <Functions/FunctionHelpers.h>
 #include <IO/WriteBufferFromString.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <google/protobuf/json/json.h>
+#include <google/protobuf/util/json_util.h>
+#include <google/protobuf/wrappers.pb.h>
+#include <Common/CHUtil.h>
+#include <Common/logger_useful.h>
+
+namespace pb_util = google::protobuf::util;
 
 namespace debug
 {
+
+void dumpPlan(DB::QueryPlan & plan, bool force, LoggerPtr logger)
+{
+    if (!logger)
+    {
+        logger = getLogger("SerializedPlanParser");
+        if (!logger)
+            return;
+    }
+
+    if (!force && !logger->debug())
+        return;
+
+    auto out = local_engine::PlanUtil::explainPlan(plan);
+    if (force) // force
+        LOG_ERROR(logger, "clickhouse plan:\n{}", out);
+    else
+        LOG_DEBUG(logger, "clickhouse plan:\n{}", out);
+}
+
+void dumpMessage(const google::protobuf::Message & message, const char * type, 
bool force, LoggerPtr logger)
+{
+    if (!logger)
+    {
+        logger = getLogger("SubstraitPlan");
+        if (!logger)
+            return;
+    }
+
+    if (!force && !logger->debug())
+        return;
+    pb_util::JsonOptions options;
+    std::string json;
+    if (auto s = google::protobuf::json::MessageToJsonString(message, &json, 
options); !s.ok())
+        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Can not convert {} 
to Json", type);
+
+    if (force) // force
+        LOG_ERROR(logger, "{}:\n{}", type, json);
+    else
+        LOG_DEBUG(logger, "{}:\n{}", type, json);
+}
+
 void headBlock(const DB::Block & block, size_t count)
 {
     std::cout << "============Block============" << std::endl;
diff --git a/cpp-ch/local-engine/Common/DebugUtils.h 
b/cpp-ch/local-engine/Common/DebugUtils.h
index cc0ecdc59d..55a0be5140 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.h
+++ b/cpp-ch/local-engine/Common/DebugUtils.h
@@ -18,8 +18,20 @@
 
 #include <Core/Block.h>
 
+namespace google::protobuf
+{
+class Message;
+}
+namespace DB
+{
+class QueryPlan;
+}
 namespace debug
 {
+
+void dumpPlan(DB::QueryPlan & plan, bool force = false, LoggerPtr = nullptr);
+void dumpMessage(const google::protobuf::Message & message, const char * type, 
bool force = false, LoggerPtr = nullptr);
+
 void headBlock(const DB::Block & block, size_t count = 10);
 String printBlock(const DB::Block & block, size_t count = 10);
 
diff --git a/cpp-ch/local-engine/Common/GlutenConfig.cpp 
b/cpp-ch/local-engine/Common/GlutenConfig.cpp
index 44d77cf372..93d074ecc2 100644
--- a/cpp-ch/local-engine/Common/GlutenConfig.cpp
+++ b/cpp-ch/local-engine/Common/GlutenConfig.cpp
@@ -22,6 +22,7 @@
 #include <config.pb.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/CHUtil.h>
+#include <Common/DebugUtils.h>
 #include <Common/logger_useful.h>
 
 namespace local_engine
@@ -45,7 +46,7 @@ std::map<std::string, std::string> 
SparkConfigs::load(std::string_view plan, boo
     auto configMaps = local_engine::BinaryToMessage<gluten::ConfigMap>(plan);
 
     if (!processStart)
-        logDebugMessage(configMaps, "Update Config Map Plan");
+        debug::dumpMessage(configMaps, "Update Config Map Plan");
 
     for (const auto & pair : configMaps.configs())
         configs.emplace(pair.first, pair.second);
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
index 75e6e14c4a..2a98db344f 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
@@ -30,7 +30,7 @@
 #include <Storages/SubstraitSource/SubstraitFileSourceStep.h>
 #include <google/protobuf/wrappers.pb.h>
 #include <Common/BlockTypeUtils.h>
-
+#include <Common/DebugUtils.h>
 
 namespace DB
 {
@@ -77,7 +77,7 @@ DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr 
query_plan, const substra
         else
         {
             extension_table = 
BinaryToMessage<substrait::ReadRel::ExtensionTable>(split_info);
-            logDebugMessage(extension_table, "extension_table");
+            debug::dumpMessage(extension_table, "extension_table");
         }
         MergeTreeRelParser mergeTreeParser(parser_context, getContext());
         query_plan = 
mergeTreeParser.parseReadRel(std::make_unique<DB::QueryPlan>(), read, 
extension_table);
@@ -131,7 +131,7 @@ QueryPlanStepPtr 
ReadRelParser::parseReadRelWithLocalFile(const substrait::ReadR
     else
     {
         local_files = 
BinaryToMessage<substrait::ReadRel::LocalFiles>(split_info);
-        logDebugMessage(local_files, "local_files");
+        debug::dumpMessage(local_files, "local_files");
     }
     auto source = std::make_shared<SubstraitFileSource>(getContext(), header, 
local_files);
     auto source_pipe = Pipe(source);
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index 9799933b33..74c1d35001 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -20,35 +20,20 @@
 #include <string>
 #include <string_view>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Columns/ColumnSet.h>
 #include <Core/Block.h>
 #include <Core/ColumnWithTypeAndName.h>
-#include <Core/Field.h>
 #include <Core/Names.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/Settings.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeDate32.h>
-#include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypeMap.h>
-#include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeSet.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/IDataType.h>
-#include <DataTypes/Serializations/ISerialization.h>
-#include <DataTypes/getLeastSupertype.h>
 #include <Functions/FunctionFactory.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/PreparedSets.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/QueryPriorities.h>
-#include <Join/StorageJoinFromReadBuffer.h>
 #include <Operator/BlocksBufferPoolTransform.h>
 #include <Parser/ExpressionParser.h>
 #include <Parser/FunctionParser.h>
@@ -73,6 +58,7 @@
 #include <google/protobuf/wrappers.pb.h>
 #include <Common/BlockTypeUtils.h>
 #include <Common/CHUtil.h>
+#include <Common/DebugUtils.h>
 #include <Common/Exception.h>
 #include <Common/GlutenConfig.h>
 #include <Common/JNIUtils.h>
@@ -121,13 +107,17 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan, 
const substrait::PlanRel
     {
         ActionsDAG 
actions_dag{blockToNameAndTypeList(query_plan->getCurrentHeader())};
         NamesWithAliases aliases;
-        auto cols = query_plan->getCurrentHeader().getNamesAndTypesList();
+        const auto cols = 
query_plan->getCurrentHeader().getNamesAndTypesList();
         if (cols.getNames().size() != 
static_cast<size_t>(root_rel.root().names_size()))
+        {
+            debug::dumpPlan(*query_plan, true);
+            debug::dumpMessage(root_rel, "substrait::PlanRel", true);
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
-                "Missmatch result columns size. plan column size {}, subtrait 
plan size {}.",
+                "Missmatch result columns size. plan column size {}, subtrait 
plan name size {}.",
                 cols.getNames().size(),
                 root_rel.root().names_size());
+        }
         for (int i = 0; i < static_cast<int>(cols.getNames().size()); i++)
             aliases.emplace_back(NameWithAlias(cols.getNames()[i], 
root_rel.root().names(i)));
         actions_dag.project(aliases);
@@ -144,13 +134,14 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan, 
const substrait::PlanRel
         const auto & original_cols = 
original_header.getColumnsWithTypeAndName();
         if (static_cast<size_t>(output_schema.types_size()) != 
original_cols.size())
         {
+            debug::dumpPlan(*query_plan, true);
+            debug::dumpMessage(root_rel, "substrait::PlanRel", true);
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
-                "Mismatch output schema. plan column size {} [header: '{}'], 
subtrait plan size {}[schema: {}].",
+                "Missmatch result columns size. plan column size {}, subtrait 
plan output schema size {}, subtrait plan name size {}.",
                 original_cols.size(),
-                original_header.dumpStructure(),
                 output_schema.types_size(),
-                dumpMessage(output_schema));
+                root_rel.root().names_size());
         }
         bool need_final_project = false;
         ColumnsWithTypeAndName final_cols;
@@ -192,7 +183,7 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan, 
const substrait::PlanRel
 
 QueryPlanPtr SerializedPlanParser::parse(const substrait::Plan & plan)
 {
-    logDebugMessage(plan, "substrait plan");
+    debug::dumpMessage(plan, "substrait::Plan");
     //parseExtensions(plan.extensions());
     if (plan.relations_size() != 1)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "too many relations found");
@@ -213,12 +204,7 @@ QueryPlanPtr SerializedPlanParser::parse(const 
substrait::Plan & plan)
     PlanUtil::checkOuputType(*query_plan);
 #endif
 
-    if (auto * logger = &Poco::Logger::get("SerializedPlanParser"); 
logger->debug())
-    {
-        auto out = PlanUtil::explainPlan(*query_plan);
-        LOG_DEBUG(logger, "clickhouse plan:\n{}", out);
-    }
-
+    debug::dumpPlan(*query_plan);
     return query_plan;
 }
 
diff --git a/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp 
b/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp
deleted file mode 100644
index c16405eff3..0000000000
--- a/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "SubstraitParserUtils.h"
-#include <google/protobuf/wrappers.pb.h>
-#include <Common/logger_useful.h>
-
-using namespace DB;
-
-namespace local_engine
-{
-namespace pb_util = google::protobuf::util;
-void logDebugMessage(const google::protobuf::Message & message, const char * 
type)
-{
-    if (auto * logger = &Poco::Logger::get("SubstraitPlan"); logger->debug())
-    {
-        pb_util::JsonOptions options;
-        std::string json;
-        if (auto s = MessageToJsonString(message, &json, options); !s.ok())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not convert {} to 
Json", type);
-        LOG_DEBUG(logger, "{}:\n{}", type, json);
-    }
-}
-std::string dumpMessage(const google::protobuf::Message & message)
-{
-    pb_util::JsonOptions options;
-    std::string json;
-    if (auto s = MessageToJsonString(message, &json, options); !s.ok())
-    {
-        if (auto * logger = &Poco::Logger::get("SubstraitPlan"))
-            LOG_ERROR(logger, "Can not convert message to Json");
-        return "";
-    }
-    return json;
-}
-std::string toString(const google::protobuf::Any & any)
-{
-    google::protobuf::StringValue sv;
-    sv.ParseFromString(any.value());
-    return sv.value();
-}
-}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Parser/SubstraitParserUtils.h 
b/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
index a6c252034c..c020d96d8e 100644
--- a/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
+++ b/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
@@ -18,6 +18,7 @@
 
 #include <string>
 #include <google/protobuf/util/json_util.h>
+#include <google/protobuf/wrappers.pb.h>
 #include <Common/Exception.h>
 
 namespace DB::ErrorCodes
@@ -67,9 +68,10 @@ Message BinaryToMessage(const std::string_view binary)
     return message;
 }
 
-void logDebugMessage(const google::protobuf::Message & message, const char * 
type);
-
-std::string dumpMessage(const google::protobuf::Message & message);
-
-std::string toString(const google::protobuf::Any & any);
+inline std::string toString(const google::protobuf::Any & any)
+{
+    google::protobuf::StringValue sv;
+    sv.ParseFromString(any.value());
+    return sv.value();
+}
 } // namespace local_engine
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp 
b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
index b7c6055246..63c6225a1f 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
@@ -27,9 +27,9 @@
 #include <Storages/MergeTree/StorageMergeTreeFactory.h>
 #include <google/protobuf/util/json_util.h>
 #include <rapidjson/document.h>
-#include <Poco/StringTokenizer.h>
-
 #include <write_optimization.pb.h>
+#include <Poco/StringTokenizer.h>
+#include <Common/DebugUtils.h>
 
 using namespace DB;
 using namespace local_engine;
@@ -228,7 +228,7 @@ MergeTreeTableInstance::MergeTreeTableInstance(const 
google::protobuf::Any & any
 MergeTreeTableInstance::MergeTreeTableInstance(const 
substrait::ReadRel::ExtensionTable & extension_table)
     : MergeTreeTableInstance(extension_table.detail())
 {
-    logDebugMessage(extension_table, "merge_tree_table");
+    debug::dumpMessage(extension_table, "merge_tree_table");
 }
 
 SparkStorageMergeTreePtr MergeTreeTableInstance::restoreStorage(const 
ContextMutablePtr & context) const


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to