This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 799149e81e [CH] Add tools to dump ActionsDAG into tree graph (#8461)
799149e81e is described below
commit 799149e81ecff19ddb3f3054f73d0b5be24e39c4
Author: 李扬 <[email protected]>
AuthorDate: Wed Jan 8 16:44:07 2025 +0800
[CH] Add tools to dump ActionsDAG into tree graph (#8461)
* add dump actions dag tools
* modify log level
* update
---
cpp-ch/local-engine/Common/ArrayJoinHelper.cpp | 9 ++--
cpp-ch/local-engine/Common/DebugUtils.cpp | 72 ++++++++++++++++++++++++++
cpp-ch/local-engine/Common/DebugUtils.h | 2 +
3 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
index acefad0aea..aa88b42db1 100644
--- a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
+++ b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
@@ -26,6 +26,7 @@
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Poco/Logger.h>
+#include <Common/DebugUtils.h>
#include <Common/logger_useful.h>
namespace DB
@@ -110,11 +111,11 @@ addArrayJoinStep(DB::ContextPtr context, DB::QueryPlan &
plan, const DB::Actions
{
/// If generator in generate rel is explode/posexplode, transform
arrayJoin function to ARRAY JOIN STEP to apply max_block_size
/// which avoids OOM when several lateral view explode/posexplode is
used in spark sqls
- LOG_DEBUG(logger, "original actions_dag:{}", actions_dag.dumpDAG());
+ LOG_TEST(logger, "original actions_dag:\n{}",
debug::dumpActionsDAG(actions_dag));
auto splitted_actions_dags = splitActionsDAGInGenerate(actions_dag);
- LOG_DEBUG(logger, "actions_dag before arrayJoin:{}",
splitted_actions_dags.before_array_join.dumpDAG());
- LOG_DEBUG(logger, "actions_dag during arrayJoin:{}",
splitted_actions_dags.array_join.dumpDAG());
- LOG_DEBUG(logger, "actions_dag after arrayJoin:{}",
splitted_actions_dags.after_array_join.dumpDAG());
+ LOG_TEST(logger, "actions_dag before arrayJoin:\n{}",
debug::dumpActionsDAG(splitted_actions_dags.before_array_join));
+ LOG_TEST(logger, "actions_dag during arrayJoin:\n{}",
debug::dumpActionsDAG(splitted_actions_dags.array_join));
+ LOG_TEST(logger, "actions_dag after arrayJoin:\n{}",
debug::dumpActionsDAG(splitted_actions_dags.after_array_join));
auto ignore_actions_dag = [](const DB::ActionsDAG & actions_dag_) ->
bool
{
diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp
b/cpp-ch/local-engine/Common/DebugUtils.cpp
index 4d3a0f55d2..513a4ee557 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.cpp
+++ b/cpp-ch/local-engine/Common/DebugUtils.cpp
@@ -32,6 +32,8 @@
#include <Common/QueryContext.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
+#include "Functions/IFunction.h"
+#include <Interpreters/ActionsDAG.h>
namespace pb_util = google::protobuf::util;
@@ -397,4 +399,74 @@ std::string showString(const DB::Block & block, size_t
numRows, size_t truncate,
[](const DB::ColumnWithTypeAndName & col) { return
std::make_pair(col.name, col.column); });
return Utils::showString(name_and_columns, numRows, truncate, vertical);
}
+
+std::string dumpActionsDAG(const DB::ActionsDAG & dag)
+{
+ std::stringstream ss;
+ ss << "digraph ActionsDAG {\n";
+ ss << " rankdir=BT;\n"; // Invert the vertical direction
+ ss << " nodesep=0.1;\n"; // Reduce space between nodes
+ ss << " ranksep=0.1;\n"; // Reduce space between ranks
+ ss << " margin=0.1;\n"; // Reduce graph margin
+
+ std::unordered_map<const DB::ActionsDAG::Node *, size_t> node_to_id;
+ size_t id = 0;
+ for (const auto & node : dag.getNodes())
+ node_to_id[&node] = id++;
+
+ std::unordered_set<const DB::ActionsDAG::Node *>
output_nodes(dag.getOutputs().begin(), dag.getOutputs().end());
+
+ for (const auto & node : dag.getNodes())
+ {
+ ss << " n" << node_to_id[&node] << " [label=\"";
+
+ ss << "id:" << node_to_id[&node] << "\\l";
+ switch (node.type)
+ {
+ case DB::ActionsDAG::ActionType::COLUMN:
+ ss << "column:"
+ << (node.column && DB::isColumnConst(*node.column)
+ ? toString(assert_cast<const DB::ColumnConst
&>(*node.column).getField())
+ : "null")
+ << "\\l";
+ break;
+ case DB::ActionsDAG::ActionType::ALIAS:
+ ss << "alias" << "\\l";
+ break;
+ case DB::ActionsDAG::ActionType::FUNCTION:
+ ss << "function: " << (node.function_base ?
node.function_base->getName() : "null");
+ if (node.is_function_compiled)
+ ss << " [compiled]";
+ ss << "\\l";
+ break;
+ case DB::ActionsDAG::ActionType::ARRAY_JOIN:
+ ss << "array join" << "\\l";
+ break;
+ case DB::ActionsDAG::ActionType::INPUT:
+ ss << "input" << "\\l";
+ break;
+ }
+
+ ss << "result type: " << (node.result_type ?
node.result_type->getName() : "null") << "\\l";
+
+ ss << "children:";
+ for (const auto * child : node.children)
+ ss << " " << node_to_id[child];
+ ss << "\\l";
+
+ ss << "\"";
+ if (output_nodes.contains(&node))
+ ss << ", shape=doublecircle";
+
+ ss << "];\n";
+ }
+
+ for (const auto & node : dag.getNodes())
+ for (const auto * child : node.children)
+ ss << " n" << node_to_id[child] << " -> n" << node_to_id[&node]
<< ";\n";
+
+ ss << "}\n";
+ return ss.str();
+}
+
}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/DebugUtils.h
b/cpp-ch/local-engine/Common/DebugUtils.h
index 850da408fb..dd29f93d8d 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.h
+++ b/cpp-ch/local-engine/Common/DebugUtils.h
@@ -25,6 +25,7 @@ class Message;
namespace DB
{
class QueryPlan;
+class ActionsDAG;
}
namespace debug
{
@@ -41,5 +42,6 @@ inline std::string verticalShowString(const DB::Block &
block, size_t numRows =
{
return showString(block, numRows, truncate, true);
}
+std::string dumpActionsDAG(const DB::ActionsDAG & dag);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]