This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 799149e81e [CH] Add tools to dump ActionsDAG into tree graph (#8461)
799149e81e is described below

commit 799149e81ecff19ddb3f3054f73d0b5be24e39c4
Author: 李扬 <[email protected]>
AuthorDate: Wed Jan 8 16:44:07 2025 +0800

    [CH] Add tools to dump ActionsDAG into tree graph (#8461)
    
    * add dump actions dag tools
    
    * modify log level
    
    * update
---
 cpp-ch/local-engine/Common/ArrayJoinHelper.cpp |  9 ++--
 cpp-ch/local-engine/Common/DebugUtils.cpp      | 72 ++++++++++++++++++++++++++
 cpp-ch/local-engine/Common/DebugUtils.h        |  2 +
 3 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp 
b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
index acefad0aea..aa88b42db1 100644
--- a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
+++ b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp
@@ -26,6 +26,7 @@
 #include <Processors/QueryPlan/IQueryPlanStep.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Poco/Logger.h>
+#include <Common/DebugUtils.h>
 #include <Common/logger_useful.h>
 
 namespace DB
@@ -110,11 +111,11 @@ addArrayJoinStep(DB::ContextPtr context, DB::QueryPlan & 
plan, const DB::Actions
     {
         /// If generator in generate rel is explode/posexplode, transform 
arrayJoin function to ARRAY JOIN STEP to apply max_block_size
         /// which avoids OOM when several lateral view explode/posexplode is 
used in spark sqls
-        LOG_DEBUG(logger, "original actions_dag:{}", actions_dag.dumpDAG());
+        LOG_TEST(logger, "original actions_dag:\n{}", 
debug::dumpActionsDAG(actions_dag));
         auto splitted_actions_dags = splitActionsDAGInGenerate(actions_dag);
-        LOG_DEBUG(logger, "actions_dag before arrayJoin:{}", 
splitted_actions_dags.before_array_join.dumpDAG());
-        LOG_DEBUG(logger, "actions_dag during arrayJoin:{}", 
splitted_actions_dags.array_join.dumpDAG());
-        LOG_DEBUG(logger, "actions_dag after arrayJoin:{}", 
splitted_actions_dags.after_array_join.dumpDAG());
+        LOG_TEST(logger, "actions_dag before arrayJoin:\n{}", 
debug::dumpActionsDAG(splitted_actions_dags.before_array_join));
+        LOG_TEST(logger, "actions_dag during arrayJoin:\n{}", 
debug::dumpActionsDAG(splitted_actions_dags.array_join));
+        LOG_TEST(logger, "actions_dag after arrayJoin:\n{}", 
debug::dumpActionsDAG(splitted_actions_dags.after_array_join));
 
         auto ignore_actions_dag = [](const DB::ActionsDAG & actions_dag_) -> 
bool
         {
diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp 
b/cpp-ch/local-engine/Common/DebugUtils.cpp
index 4d3a0f55d2..513a4ee557 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.cpp
+++ b/cpp-ch/local-engine/Common/DebugUtils.cpp
@@ -32,6 +32,8 @@
 #include <Common/QueryContext.h>
 #include <Common/formatReadable.h>
 #include <Common/logger_useful.h>
+#include "Functions/IFunction.h"
+#include <Interpreters/ActionsDAG.h>
 
 namespace pb_util = google::protobuf::util;
 
@@ -397,4 +399,74 @@ std::string showString(const DB::Block & block, size_t 
numRows, size_t truncate,
         [](const DB::ColumnWithTypeAndName & col) { return 
std::make_pair(col.name, col.column); });
     return Utils::showString(name_and_columns, numRows, truncate, vertical);
 }
+
+std::string dumpActionsDAG(const DB::ActionsDAG & dag)
+{
+    std::stringstream ss;
+    ss << "digraph ActionsDAG {\n";
+    ss << "  rankdir=BT;\n"; // Invert the vertical direction
+    ss << "  nodesep=0.1;\n"; // Reduce space between nodes
+    ss << "  ranksep=0.1;\n"; // Reduce space between ranks
+    ss << "  margin=0.1;\n"; // Reduce graph margin
+
+    std::unordered_map<const DB::ActionsDAG::Node *, size_t> node_to_id;
+    size_t id = 0;
+    for (const auto & node : dag.getNodes())
+        node_to_id[&node] = id++;
+
+    std::unordered_set<const DB::ActionsDAG::Node *> 
output_nodes(dag.getOutputs().begin(), dag.getOutputs().end());
+
+    for (const auto & node : dag.getNodes())
+    {
+        ss << "  n" << node_to_id[&node] << " [label=\"";
+
+        ss << "id:" << node_to_id[&node] << "\\l";
+        switch (node.type)
+        {
+            case DB::ActionsDAG::ActionType::COLUMN:
+                ss << "column:"
+                   << (node.column && DB::isColumnConst(*node.column)
+                           ? toString(assert_cast<const DB::ColumnConst 
&>(*node.column).getField())
+                           : "null")
+                   << "\\l";
+                break;
+            case DB::ActionsDAG::ActionType::ALIAS:
+                ss << "alias" << "\\l";
+                break;
+            case DB::ActionsDAG::ActionType::FUNCTION:
+                ss << "function: " << (node.function_base ? 
node.function_base->getName() : "null");
+                if (node.is_function_compiled)
+                   ss << " [compiled]";
+                ss << "\\l";
+                break;
+            case DB::ActionsDAG::ActionType::ARRAY_JOIN:
+                ss << "array join" << "\\l";
+                break;
+            case DB::ActionsDAG::ActionType::INPUT:
+                ss << "input" << "\\l";
+                break;
+        }
+
+        ss << "result type: " << (node.result_type ? 
node.result_type->getName() : "null") << "\\l";
+
+        ss << "children:";
+        for (const auto * child : node.children)
+            ss << " " << node_to_id[child];
+        ss << "\\l";
+
+        ss << "\"";
+        if (output_nodes.contains(&node))
+            ss << ", shape=doublecircle";
+
+        ss << "];\n";
+    }
+
+    for (const auto & node : dag.getNodes())
+        for (const auto * child : node.children)
+            ss << "  n" << node_to_id[child] << " -> n" << node_to_id[&node] 
<< ";\n";
+
+    ss << "}\n";
+    return ss.str();
+}
+
 }
\ No newline at end of file
diff --git a/cpp-ch/local-engine/Common/DebugUtils.h 
b/cpp-ch/local-engine/Common/DebugUtils.h
index 850da408fb..dd29f93d8d 100644
--- a/cpp-ch/local-engine/Common/DebugUtils.h
+++ b/cpp-ch/local-engine/Common/DebugUtils.h
@@ -25,6 +25,7 @@ class Message;
 namespace DB
 {
 class QueryPlan;
+class ActionsDAG;
 }
 namespace debug
 {
@@ -41,5 +42,6 @@ inline std::string verticalShowString(const DB::Block & 
block, size_t numRows =
 {
     return showString(block, numRows, truncate, true);
 }
+std::string dumpActionsDAG(const DB::ActionsDAG & dag);
 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to