This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f498fe7485 [CH] Ignore unstabe uts and add more message when failed.
(#7821)
f498fe7485 is described below
commit f498fe74850348e4ac3120da41138fe4dda122b1
Author: Chang chen <[email protected]>
AuthorDate: Tue Nov 5 21:11:53 2024 +0800
[CH] Ignore unstabe uts and add more message when failed. (#7821)
* read data from orc file format - ignore reading except date32
* dumpPlan and dumpMessage
* fix due to comments
---
.../GlutenClickHouseFileFormatSuite.scala | 5 +-
cpp-ch/local-engine/Common/DebugUtils.cpp | 50 +++++++++++++++++++
cpp-ch/local-engine/Common/DebugUtils.h | 12 +++++
cpp-ch/local-engine/Common/GlutenConfig.cpp | 3 +-
.../Parser/RelParsers/ReadRelParser.cpp | 6 +--
.../local-engine/Parser/SerializedPlanParser.cpp | 40 +++++----------
.../local-engine/Parser/SubstraitParserUtils.cpp | 57 ----------------------
cpp-ch/local-engine/Parser/SubstraitParserUtils.h | 12 +++--
.../Storages/MergeTree/SparkMergeTreeMeta.cpp | 6 +--
9 files changed, 93 insertions(+), 98 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
index 88a34a786a..2337316257 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseFileFormatSuite.scala
@@ -1037,12 +1037,13 @@ class GlutenClickHouseFileFormatSuite
)
}
- test("read data from orc file format") {
+ test("read data from orc file format - except date32") {
val filePath =
s"$orcDataPath/all_data_types_with_non_primitive_type.snappy.orc"
val orcFileFormat = "orc"
val sql =
s"""
- | select *
+ | select string_field, int_field, long_field, float_field,
double_field, short_field,
+ | byte_field, boolean_field, decimal_field
| from $orcFileFormat.`$filePath`
| where long_field > 30
|""".stripMargin
diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp
b/cpp-ch/local-engine/Common/DebugUtils.cpp
index c144a1384e..7b67f7d812 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.cpp
+++ b/cpp-ch/local-engine/Common/DebugUtils.cpp
@@ -25,9 +25,59 @@
#include <Formats/FormatSettings.h>
#include <Functions/FunctionHelpers.h>
#include <IO/WriteBufferFromString.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <google/protobuf/json/json.h>
+#include <google/protobuf/util/json_util.h>
+#include <google/protobuf/wrappers.pb.h>
+#include <Common/CHUtil.h>
+#include <Common/logger_useful.h>
+
+namespace pb_util = google::protobuf::util;
namespace debug
{
+
+void dumpPlan(DB::QueryPlan & plan, bool force, LoggerPtr logger)
+{
+ if (!logger)
+ {
+ logger = getLogger("SerializedPlanParser");
+ if (!logger)
+ return;
+ }
+
+ if (!force && !logger->debug())
+ return;
+
+ auto out = local_engine::PlanUtil::explainPlan(plan);
+ if (force) // force
+ LOG_ERROR(logger, "clickhouse plan:\n{}", out);
+ else
+ LOG_DEBUG(logger, "clickhouse plan:\n{}", out);
+}
+
+void dumpMessage(const google::protobuf::Message & message, const char * type,
bool force, LoggerPtr logger)
+{
+ if (!logger)
+ {
+ logger = getLogger("SubstraitPlan");
+ if (!logger)
+ return;
+ }
+
+ if (!force && !logger->debug())
+ return;
+ pb_util::JsonOptions options;
+ std::string json;
+ if (auto s = google::protobuf::json::MessageToJsonString(message, &json,
options); !s.ok())
+ throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Can not convert {}
to Json", type);
+
+ if (force) // force
+ LOG_ERROR(logger, "{}:\n{}", type, json);
+ else
+ LOG_DEBUG(logger, "{}:\n{}", type, json);
+}
+
void headBlock(const DB::Block & block, size_t count)
{
std::cout << "============Block============" << std::endl;
diff --git a/cpp-ch/local-engine/Common/DebugUtils.h
b/cpp-ch/local-engine/Common/DebugUtils.h
index cc0ecdc59d..55a0be5140 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.h
+++ b/cpp-ch/local-engine/Common/DebugUtils.h
@@ -18,8 +18,20 @@
#include <Core/Block.h>
+namespace google::protobuf
+{
+class Message;
+}
+namespace DB
+{
+class QueryPlan;
+}
namespace debug
{
+
+void dumpPlan(DB::QueryPlan & plan, bool force = false, LoggerPtr = nullptr);
+void dumpMessage(const google::protobuf::Message & message, const char * type,
bool force = false, LoggerPtr = nullptr);
+
void headBlock(const DB::Block & block, size_t count = 10);
String printBlock(const DB::Block & block, size_t count = 10);
diff --git a/cpp-ch/local-engine/Common/GlutenConfig.cpp
b/cpp-ch/local-engine/Common/GlutenConfig.cpp
index 44d77cf372..93d074ecc2 100644
--- a/cpp-ch/local-engine/Common/GlutenConfig.cpp
+++ b/cpp-ch/local-engine/Common/GlutenConfig.cpp
@@ -22,6 +22,7 @@
#include <config.pb.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/CHUtil.h>
+#include <Common/DebugUtils.h>
#include <Common/logger_useful.h>
namespace local_engine
@@ -45,7 +46,7 @@ std::map<std::string, std::string>
SparkConfigs::load(std::string_view plan, boo
auto configMaps = local_engine::BinaryToMessage<gluten::ConfigMap>(plan);
if (!processStart)
- logDebugMessage(configMaps, "Update Config Map Plan");
+ debug::dumpMessage(configMaps, "Update Config Map Plan");
for (const auto & pair : configMaps.configs())
configs.emplace(pair.first, pair.second);
diff --git a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
index 75e6e14c4a..2a98db344f 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/ReadRelParser.cpp
@@ -30,7 +30,7 @@
#include <Storages/SubstraitSource/SubstraitFileSourceStep.h>
#include <google/protobuf/wrappers.pb.h>
#include <Common/BlockTypeUtils.h>
-
+#include <Common/DebugUtils.h>
namespace DB
{
@@ -77,7 +77,7 @@ DB::QueryPlanPtr ReadRelParser::parse(DB::QueryPlanPtr
query_plan, const substra
else
{
extension_table =
BinaryToMessage<substrait::ReadRel::ExtensionTable>(split_info);
- logDebugMessage(extension_table, "extension_table");
+ debug::dumpMessage(extension_table, "extension_table");
}
MergeTreeRelParser mergeTreeParser(parser_context, getContext());
query_plan =
mergeTreeParser.parseReadRel(std::make_unique<DB::QueryPlan>(), read,
extension_table);
@@ -131,7 +131,7 @@ QueryPlanStepPtr
ReadRelParser::parseReadRelWithLocalFile(const substrait::ReadR
else
{
local_files =
BinaryToMessage<substrait::ReadRel::LocalFiles>(split_info);
- logDebugMessage(local_files, "local_files");
+ debug::dumpMessage(local_files, "local_files");
}
auto source = std::make_shared<SubstraitFileSource>(getContext(), header,
local_files);
auto source_pipe = Pipe(source);
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
index 9799933b33..74c1d35001 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
@@ -20,35 +20,20 @@
#include <string>
#include <string_view>
#include <AggregateFunctions/AggregateFunctionFactory.h>
-#include <Columns/ColumnSet.h>
#include <Core/Block.h>
#include <Core/ColumnWithTypeAndName.h>
-#include <Core/Field.h>
#include <Core/Names.h>
#include <Core/NamesAndTypes.h>
#include <Core/Settings.h>
#include <DataTypes/DataTypeAggregateFunction.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeDate32.h>
-#include <DataTypes/DataTypeDateTime64.h>
-#include <DataTypes/DataTypeMap.h>
-#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
-#include <DataTypes/DataTypeSet.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/IDataType.h>
-#include <DataTypes/Serializations/ISerialization.h>
-#include <DataTypes/getLeastSupertype.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/Context.h>
-#include <Interpreters/PreparedSets.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/QueryPriorities.h>
-#include <Join/StorageJoinFromReadBuffer.h>
#include <Operator/BlocksBufferPoolTransform.h>
#include <Parser/ExpressionParser.h>
#include <Parser/FunctionParser.h>
@@ -73,6 +58,7 @@
#include <google/protobuf/wrappers.pb.h>
#include <Common/BlockTypeUtils.h>
#include <Common/CHUtil.h>
+#include <Common/DebugUtils.h>
#include <Common/Exception.h>
#include <Common/GlutenConfig.h>
#include <Common/JNIUtils.h>
@@ -121,13 +107,17 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan,
const substrait::PlanRel
{
ActionsDAG
actions_dag{blockToNameAndTypeList(query_plan->getCurrentHeader())};
NamesWithAliases aliases;
- auto cols = query_plan->getCurrentHeader().getNamesAndTypesList();
+ const auto cols =
query_plan->getCurrentHeader().getNamesAndTypesList();
if (cols.getNames().size() !=
static_cast<size_t>(root_rel.root().names_size()))
+ {
+ debug::dumpPlan(*query_plan, true);
+ debug::dumpMessage(root_rel, "substrait::PlanRel", true);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
- "Missmatch result columns size. plan column size {}, subtrait
plan size {}.",
+ "Missmatch result columns size. plan column size {}, subtrait
plan name size {}.",
cols.getNames().size(),
root_rel.root().names_size());
+ }
for (int i = 0; i < static_cast<int>(cols.getNames().size()); i++)
aliases.emplace_back(NameWithAlias(cols.getNames()[i],
root_rel.root().names(i)));
actions_dag.project(aliases);
@@ -144,13 +134,14 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan,
const substrait::PlanRel
const auto & original_cols =
original_header.getColumnsWithTypeAndName();
if (static_cast<size_t>(output_schema.types_size()) !=
original_cols.size())
{
+ debug::dumpPlan(*query_plan, true);
+ debug::dumpMessage(root_rel, "substrait::PlanRel", true);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
- "Mismatch output schema. plan column size {} [header: '{}'],
subtrait plan size {}[schema: {}].",
+ "Missmatch result columns size. plan column size {}, subtrait
plan output schema size {}, subtrait plan name size {}.",
original_cols.size(),
- original_header.dumpStructure(),
output_schema.types_size(),
- dumpMessage(output_schema));
+ root_rel.root().names_size());
}
bool need_final_project = false;
ColumnsWithTypeAndName final_cols;
@@ -192,7 +183,7 @@ void adjustOutput(const DB::QueryPlanPtr & query_plan,
const substrait::PlanRel
QueryPlanPtr SerializedPlanParser::parse(const substrait::Plan & plan)
{
- logDebugMessage(plan, "substrait plan");
+ debug::dumpMessage(plan, "substrait::Plan");
//parseExtensions(plan.extensions());
if (plan.relations_size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "too many relations found");
@@ -213,12 +204,7 @@ QueryPlanPtr SerializedPlanParser::parse(const
substrait::Plan & plan)
PlanUtil::checkOuputType(*query_plan);
#endif
- if (auto * logger = &Poco::Logger::get("SerializedPlanParser");
logger->debug())
- {
- auto out = PlanUtil::explainPlan(*query_plan);
- LOG_DEBUG(logger, "clickhouse plan:\n{}", out);
- }
-
+ debug::dumpPlan(*query_plan);
return query_plan;
}
diff --git a/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp
b/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp
deleted file mode 100644
index c16405eff3..0000000000
--- a/cpp-ch/local-engine/Parser/SubstraitParserUtils.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "SubstraitParserUtils.h"
-#include <google/protobuf/wrappers.pb.h>
-#include <Common/logger_useful.h>
-
-using namespace DB;
-
-namespace local_engine
-{
-namespace pb_util = google::protobuf::util;
-void logDebugMessage(const google::protobuf::Message & message, const char *
type)
-{
- if (auto * logger = &Poco::Logger::get("SubstraitPlan"); logger->debug())
- {
- pb_util::JsonOptions options;
- std::string json;
- if (auto s = MessageToJsonString(message, &json, options); !s.ok())
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not convert {} to
Json", type);
- LOG_DEBUG(logger, "{}:\n{}", type, json);
- }
-}
-std::string dumpMessage(const google::protobuf::Message & message)
-{
- pb_util::JsonOptions options;
- std::string json;
- if (auto s = MessageToJsonString(message, &json, options); !s.ok())
- {
- if (auto * logger = &Poco::Logger::get("SubstraitPlan"))
- LOG_ERROR(logger, "Can not convert message to Json");
- return "";
- }
- return json;
-}
-std::string toString(const google::protobuf::Any & any)
-{
- google::protobuf::StringValue sv;
- sv.ParseFromString(any.value());
- return sv.value();
-}
-}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
b/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
index a6c252034c..c020d96d8e 100644
--- a/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
+++ b/cpp-ch/local-engine/Parser/SubstraitParserUtils.h
@@ -18,6 +18,7 @@
#include <string>
#include <google/protobuf/util/json_util.h>
+#include <google/protobuf/wrappers.pb.h>
#include <Common/Exception.h>
namespace DB::ErrorCodes
@@ -67,9 +68,10 @@ Message BinaryToMessage(const std::string_view binary)
return message;
}
-void logDebugMessage(const google::protobuf::Message & message, const char *
type);
-
-std::string dumpMessage(const google::protobuf::Message & message);
-
-std::string toString(const google::protobuf::Any & any);
+inline std::string toString(const google::protobuf::Any & any)
+{
+ google::protobuf::StringValue sv;
+ sv.ParseFromString(any.value());
+ return sv.value();
+}
} // namespace local_engine
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
index b7c6055246..63c6225a1f 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeMeta.cpp
@@ -27,9 +27,9 @@
#include <Storages/MergeTree/StorageMergeTreeFactory.h>
#include <google/protobuf/util/json_util.h>
#include <rapidjson/document.h>
-#include <Poco/StringTokenizer.h>
-
#include <write_optimization.pb.h>
+#include <Poco/StringTokenizer.h>
+#include <Common/DebugUtils.h>
using namespace DB;
using namespace local_engine;
@@ -228,7 +228,7 @@ MergeTreeTableInstance::MergeTreeTableInstance(const
google::protobuf::Any & any
MergeTreeTableInstance::MergeTreeTableInstance(const
substrait::ReadRel::ExtensionTable & extension_table)
: MergeTreeTableInstance(extension_table.detail())
{
- logDebugMessage(extension_table, "merge_tree_table");
+ debug::dumpMessage(extension_table, "merge_tree_table");
}
SparkStorageMergeTreePtr MergeTreeTableInstance::restoreStorage(const
ContextMutablePtr & context) const
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]