Add visualization for execution plan DAGs combined with profiling stats
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/4b944328 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/4b944328 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/4b944328 Branch: refs/heads/execution-dag-visualizer Commit: 4b944328afde90c961122d547c6fa4a8d0230c1c Parents: a61b99e Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Tue Aug 2 16:57:47 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Wed Aug 3 05:55:41 2016 -0500 ---------------------------------------------------------------------- CMakeLists.txt | 1 + cli/QuickstepCli.cpp | 19 +- query_execution/ForemanSingleNode.cpp | 12 +- query_execution/ForemanSingleNode.hpp | 11 + query_execution/PolicyEnforcerBase.cpp | 3 +- query_execution/PolicyEnforcerBase.hpp | 9 +- query_execution/QueryExecutionMessages.proto | 10 +- query_execution/QueryExecutionTypedefs.hpp | 6 + query_execution/Worker.cpp | 15 +- .../tests/QueryManagerSingleNode_unittest.cpp | 4 + relational_operators/AggregationOperator.hpp | 10 + relational_operators/BuildHashOperator.hpp | 12 + relational_operators/CreateIndexOperator.hpp | 4 + relational_operators/CreateTableOperator.hpp | 4 + relational_operators/DeleteOperator.hpp | 4 + relational_operators/DestroyHashOperator.hpp | 4 + relational_operators/DropTableOperator.hpp | 4 + .../FinalizeAggregationOperator.hpp | 4 + relational_operators/HashJoinOperator.hpp | 23 ++ relational_operators/InsertOperator.hpp | 4 + .../NestedLoopsJoinOperator.hpp | 4 + relational_operators/RelationalOperator.hpp | 16 ++ relational_operators/SampleOperator.hpp | 4 + relational_operators/SaveBlocksOperator.hpp | 4 + relational_operators/SelectOperator.hpp | 8 + relational_operators/SortMergeRunOperator.hpp | 4 + .../SortRunGenerationOperator.hpp | 4 + relational_operators/TableGeneratorOperator.hpp | 4 + relational_operators/TextScanOperator.hpp | 4 + relational_operators/UpdateOperator.hpp | 4 + .../WindowAggregationOperator.hpp | 4 + utility/CMakeLists.txt | 14 ++ utility/ExecutionDAGVisualizer.cpp | 229 +++++++++++++++++++ utility/ExecutionDAGVisualizer.hpp | 112 +++++++++ 34 files changed, 560 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bbde61..3192713 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -770,6 +770,7 @@ target_link_libraries(quickstep_cli_shell quickstep_queryoptimizer_QueryProcessor quickstep_storage_PreloaderThread quickstep_threading_ThreadIDBasedMap + quickstep_utility_ExecutionDAGVisualizer quickstep_utility_Macros quickstep_utility_PtrVector quickstep_utility_SqlError http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/cli/QuickstepCli.cpp ---------------------------------------------------------------------- diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp index 68a3599..154c689 100644 --- a/cli/QuickstepCli.cpp +++ b/cli/QuickstepCli.cpp @@ -75,6 +75,7 @@ typedef quickstep::LineReaderDumb LineReaderImpl; #include "storage/PreloaderThread.hpp" #include "threading/ThreadIDBasedMap.hpp" +#include "utility/ExecutionDAGVisualizer.hpp" #include "utility/Macros.hpp" #include "utility/PtrVector.hpp" #include "utility/SqlError.hpp" @@ -185,6 +186,10 @@ DEFINE_string(profile_file_name, "", // To put things in perspective, the first run is, in my experiments, about 5-10 // times more expensive than the average run. That means the query needs to be // run at least a hundred times to make the impact of the first run small (< 5 %). +DEFINE_bool(visualize_execution_dag, false, + "If true, visualize the execution plan DAG into a graph in DOT " + "format (DOT is a plain text graph description language) which is " + "then printed via stderr."); } // namespace quickstep @@ -361,7 +366,7 @@ int main(int argc, char* argv[]) { query_processor->getStorageManager(), -1, // Don't pin the Foreman thread. num_numa_nodes_system, - quickstep::FLAGS_profile_and_report_workorder_perf); + quickstep::FLAGS_profile_and_report_workorder_perf || quickstep::FLAGS_visualize_execution_dag); // Start the worker threads. for (Worker &worker : workers) { @@ -434,6 +439,12 @@ int main(int argc, char* argv[]) { } DCHECK(query_handle->getQueryPlanMutable() != nullptr); + std::unique_ptr<quickstep::ExecutionDAGVisualizer> dag_visualizer; + if (quickstep::FLAGS_visualize_execution_dag) { + dag_visualizer.reset( + new quickstep::ExecutionDAGVisualizer(*query_handle->getQueryPlanMutable())); + } + start = std::chrono::steady_clock::now(); QueryExecutionUtil::ConstructAndSendAdmitRequestMessage( main_thread_client_id, @@ -471,6 +482,12 @@ int main(int argc, char* argv[]) { foreman.printWorkOrderProfilingResults(query_handle->query_id(), stdout); } + if (quickstep::FLAGS_visualize_execution_dag) { + const auto &profiling_stats = + foreman.getWorkOrderProfilingResults(query_handle->query_id()); + dag_visualizer->bindProfilingStats(profiling_stats); + std::cerr << "\n" << dag_visualizer->toDOT() << "\n"; + } } catch (const std::exception &e) { fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what()); break; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/ForemanSingleNode.cpp ---------------------------------------------------------------------- diff --git a/query_execution/ForemanSingleNode.cpp b/query_execution/ForemanSingleNode.cpp index d2b56ae..5f3bec2 100644 --- a/query_execution/ForemanSingleNode.cpp +++ b/query_execution/ForemanSingleNode.cpp @@ -236,11 +236,15 @@ void ForemanSingleNode::sendWorkerMessage(const size_t worker_thread_index, << worker_directory_->getClientID(worker_thread_index); } +const std::vector<WorkOrderTimeEntry>& ForemanSingleNode + ::getWorkOrderProfilingResults(const std::size_t query_id) const { + return policy_enforcer_->getProfilingResults(query_id); +} + void ForemanSingleNode::printWorkOrderProfilingResults(const std::size_t query_id, std::FILE *out) const { - const std::vector< - std::tuple<std::size_t, std::size_t, std::size_t>> - &recorded_times = policy_enforcer_->getProfilingResults(query_id); + const std::vector<WorkOrderTimeEntry> &recorded_times = + policy_enforcer_->getProfilingResults(query_id); fputs("Query ID,Worker ID,NUMA Socket,Operator ID,Time (microseconds)\n", out); for (auto workorder_entry : recorded_times) { // Note: Index of the "worker thread index" in the tuple is 0. @@ -251,7 +255,7 @@ void ForemanSingleNode::printWorkOrderProfilingResults(const std::size_t query_i worker_id, worker_directory_->getNUMANode(worker_id), std::get<1>(workorder_entry), // Operator ID. - std::get<2>(workorder_entry)); // Time. + std::get<3>(workorder_entry) - std::get<2>(workorder_entry)); // Time. } } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/ForemanSingleNode.hpp ---------------------------------------------------------------------- diff --git a/query_execution/ForemanSingleNode.hpp b/query_execution/ForemanSingleNode.hpp index caef5e0..d999095 100644 --- a/query_execution/ForemanSingleNode.hpp +++ b/query_execution/ForemanSingleNode.hpp @@ -76,6 +76,17 @@ class ForemanSingleNode final : public ForemanBase { ~ForemanSingleNode() override {} + + /** + * @brief Get the results of profiling individual work orders for a given + * query. + * + * @param query_id The ID of the query for which the results are to be printed. + * @return A vector of tuples, each being a single profiling entry. + **/ + const std::vector<WorkOrderTimeEntry>& getWorkOrderProfilingResults( + const std::size_t query_id) const; + /** * @brief Print the results of profiling individual work orders for a given * query. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/PolicyEnforcerBase.cpp ---------------------------------------------------------------------- diff --git a/query_execution/PolicyEnforcerBase.cpp b/query_execution/PolicyEnforcerBase.cpp index d16a502..1ddfe67 100644 --- a/query_execution/PolicyEnforcerBase.cpp +++ b/query_execution/PolicyEnforcerBase.cpp @@ -171,7 +171,8 @@ void PolicyEnforcerBase::recordTimeForWorkOrder( workorder_time_recorder_[query_id].emplace_back( proto.worker_thread_index(), proto.operator_index(), - proto.execution_time_in_microseconds()); + proto.execution_start_time(), + proto.execution_end_time()); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/PolicyEnforcerBase.hpp ---------------------------------------------------------------------- diff --git a/query_execution/PolicyEnforcerBase.hpp b/query_execution/PolicyEnforcerBase.hpp index 0482ebc..a5180e3 100644 --- a/query_execution/PolicyEnforcerBase.hpp +++ b/query_execution/PolicyEnforcerBase.hpp @@ -126,8 +126,8 @@ class PolicyEnforcerBase { * * @return A vector of tuples, each being a single profiling entry. **/ - inline const std::vector<std::tuple<std::size_t, std::size_t, std::size_t>>& - getProfilingResults(const std::size_t query_id) const { + inline const std::vector<WorkOrderTimeEntry>& getProfilingResults( + const std::size_t query_id) const { DCHECK(profile_individual_workorders_); DCHECK(workorder_time_recorder_.find(query_id) != workorder_time_recorder_.end()); @@ -164,10 +164,7 @@ class PolicyEnforcerBase { // 1st element: Logical worker ID. // 2nd element: Operator ID. // 3rd element: Time in microseconds to execute the work order. - std::unordered_map< - std::size_t, - std::vector<std::tuple<std::size_t, std::size_t, std::size_t>>> - workorder_time_recorder_; + WorkOrderTimeRecorder workorder_time_recorder_; private: /** http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/QueryExecutionMessages.proto ---------------------------------------------------------------------- diff --git a/query_execution/QueryExecutionMessages.proto b/query_execution/QueryExecutionMessages.proto index f2219f6..5a089d2 100644 --- a/query_execution/QueryExecutionMessages.proto +++ b/query_execution/QueryExecutionMessages.proto @@ -38,7 +38,10 @@ message NormalWorkOrderCompletionMessage { required uint64 operator_index = 1; required uint64 worker_thread_index = 2; required uint64 query_id = 3; - optional uint64 execution_time_in_microseconds = 4; + + // Epoch time in microseconds. + optional uint64 execution_start_time = 4; + optional uint64 execution_end_time = 5; } // A message sent upon completion of a rebuild WorkOrder execution. @@ -46,7 +49,10 @@ message RebuildWorkOrderCompletionMessage { required uint64 operator_index = 1; required uint64 worker_thread_index = 2; required uint64 query_id = 3; - optional uint64 execution_time_in_microseconds = 4; + + // Epoch time in microseconds. + optional uint64 execution_start_time = 4; + optional uint64 execution_end_time = 5; } message CatalogRelationNewBlockMessage { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/QueryExecutionTypedefs.hpp ---------------------------------------------------------------------- diff --git a/query_execution/QueryExecutionTypedefs.hpp b/query_execution/QueryExecutionTypedefs.hpp index b67209f..26ff6fc 100644 --- a/query_execution/QueryExecutionTypedefs.hpp +++ b/query_execution/QueryExecutionTypedefs.hpp @@ -98,6 +98,12 @@ enum QueryExecutionMessageType : message_type_id { #endif }; +// WorkOrder profiling data structures. +// A tuple of <Worker ID, Operator ID, Start Time, End Time> +typedef std::tuple<std::size_t, std::size_t, std::size_t, std::size_t> WorkOrderTimeEntry; + +typedef std::unordered_map<std::size_t, std::vector<WorkOrderTimeEntry>> WorkOrderTimeRecorder; + /** @} */ } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/Worker.cpp ---------------------------------------------------------------------- diff --git a/query_execution/Worker.cpp b/query_execution/Worker.cpp index 6ba27f1..a582132 100644 --- a/query_execution/Worker.cpp +++ b/query_execution/Worker.cpp @@ -120,14 +120,21 @@ void Worker::executeWorkOrderHelper(const TaggedMessage &tagged_message, worker_message.getWorkOrder()->execute(); end = std::chrono::steady_clock::now(); delete worker_message.getWorkOrder(); - const uint64_t execution_time_microseconds = - std::chrono::duration_cast<std::chrono::microseconds>(end - start) - .count(); + + // Convert the measured timestamps to epoch times in microseconds. + const uint64_t execution_start_time = + std::chrono::duration_cast<std::chrono::microseconds>( + start.time_since_epoch()).count(); + const uint64_t execution_end_time = + std::chrono::duration_cast<std::chrono::microseconds>( + end.time_since_epoch()).count(); + // Construct the proto message. proto->set_operator_index(worker_message.getRelationalOpIndex()); proto->set_query_id(query_id_for_workorder); proto->set_worker_thread_index(worker_thread_index_); - proto->set_execution_time_in_microseconds(execution_time_microseconds); + proto->set_execution_start_time(execution_start_time); + proto->set_execution_end_time(execution_end_time); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/query_execution/tests/QueryManagerSingleNode_unittest.cpp ---------------------------------------------------------------------- diff --git a/query_execution/tests/QueryManagerSingleNode_unittest.cpp b/query_execution/tests/QueryManagerSingleNode_unittest.cpp index 39ca58c..7c96e7f 100644 --- a/query_execution/tests/QueryManagerSingleNode_unittest.cpp +++ b/query_execution/tests/QueryManagerSingleNode_unittest.cpp @@ -104,6 +104,10 @@ class MockOperator: public RelationalOperator { num_calls_donefeedingblocks_(0) { } + std::string getName() const override { + return "MockOperator"; + } + #define MOCK_OP_LOG(x) VLOG(x) << "Op[" << op_index_ << "]: " << __func__ << ": " // The methods below are used to check whether QueryManager calls the Relational http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/AggregationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/AggregationOperator.hpp b/relational_operators/AggregationOperator.hpp index 4bcbcf6..0ce0462 100644 --- a/relational_operators/AggregationOperator.hpp +++ b/relational_operators/AggregationOperator.hpp @@ -68,6 +68,7 @@ class AggregationOperator : public RelationalOperator { bool input_relation_is_stored, const QueryContext::aggregation_state_id aggr_state_index) : RelationalOperator(query_id), + input_relation_(input_relation), input_relation_is_stored_(input_relation_is_stored), input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot() : std::vector<block_id>()), @@ -77,6 +78,14 @@ class AggregationOperator : public RelationalOperator { ~AggregationOperator() override {} + std::string getName() const override { + return "AggregationOperator"; + } + + const CatalogRelation& input_relation() const { + return input_relation_; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, @@ -103,6 +112,7 @@ class AggregationOperator : public RelationalOperator { **/ serialization::WorkOrder* createWorkOrderProto(const block_id block); + const CatalogRelation &input_relation_; const bool input_relation_is_stored_; std::vector<block_id> input_relation_block_ids_; const QueryContext::aggregation_state_id aggr_state_index_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/BuildHashOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp index 464bbf8..e0be5be 100644 --- a/relational_operators/BuildHashOperator.hpp +++ b/relational_operators/BuildHashOperator.hpp @@ -93,6 +93,14 @@ class BuildHashOperator : public RelationalOperator { ~BuildHashOperator() override {} + const CatalogRelation& input_relation() const { + return input_relation_; + } + + std::string getName() const override { + return "BuildHashOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, @@ -196,6 +204,10 @@ class BuildHashWorkOrder : public WorkOrder { ~BuildHashWorkOrder() override {} + const CatalogRelationSchema& input_relation() const { + return input_relation_; + } + void execute() override; private: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/CreateIndexOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/CreateIndexOperator.hpp b/relational_operators/CreateIndexOperator.hpp index 18ca656..4e05448 100644 --- a/relational_operators/CreateIndexOperator.hpp +++ b/relational_operators/CreateIndexOperator.hpp @@ -69,6 +69,10 @@ class CreateIndexOperator : public RelationalOperator { ~CreateIndexOperator() override {} + std::string getName() const override { + return "CreateIndexOperator"; + } + /** * @note No WorkOrder generated for this operator. **/ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/CreateTableOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/CreateTableOperator.hpp b/relational_operators/CreateTableOperator.hpp index 6d91142..b7b707b 100644 --- a/relational_operators/CreateTableOperator.hpp +++ b/relational_operators/CreateTableOperator.hpp @@ -66,6 +66,10 @@ class CreateTableOperator : public RelationalOperator { ~CreateTableOperator() override {} + std::string getName() const override { + return "CreateTableOperator"; + } + /** * @note No WorkOrder generated for this operator. **/ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/DeleteOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/DeleteOperator.hpp b/relational_operators/DeleteOperator.hpp index 74da8c1..abfe4a9 100644 --- a/relational_operators/DeleteOperator.hpp +++ b/relational_operators/DeleteOperator.hpp @@ -81,6 +81,10 @@ class DeleteOperator : public RelationalOperator { ~DeleteOperator() override {} + std::string getName() const override { + return "DeleteOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/DestroyHashOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/DestroyHashOperator.hpp b/relational_operators/DestroyHashOperator.hpp index 181386f..ae65de5 100644 --- a/relational_operators/DestroyHashOperator.hpp +++ b/relational_operators/DestroyHashOperator.hpp @@ -58,6 +58,10 @@ class DestroyHashOperator : public RelationalOperator { ~DestroyHashOperator() override {} + std::string getName() const override { + return "DestroyHashOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/DropTableOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/DropTableOperator.hpp b/relational_operators/DropTableOperator.hpp index 6c7fca3..f854b4f 100644 --- a/relational_operators/DropTableOperator.hpp +++ b/relational_operators/DropTableOperator.hpp @@ -74,6 +74,10 @@ class DropTableOperator : public RelationalOperator { ~DropTableOperator() override {} + std::string getName() const override { + return "DropTableOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/FinalizeAggregationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/FinalizeAggregationOperator.hpp b/relational_operators/FinalizeAggregationOperator.hpp index 158a637..0dcfc9e 100644 --- a/relational_operators/FinalizeAggregationOperator.hpp +++ b/relational_operators/FinalizeAggregationOperator.hpp @@ -74,6 +74,10 @@ class FinalizeAggregationOperator : public RelationalOperator { ~FinalizeAggregationOperator() override {} + std::string getName() const override { + return "FinalizeAggregationOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/HashJoinOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp index 5d3d7da..3704ede 100644 --- a/relational_operators/HashJoinOperator.hpp +++ b/relational_operators/HashJoinOperator.hpp @@ -157,6 +157,29 @@ class HashJoinOperator : public RelationalOperator { ~HashJoinOperator() override {} + std::string getName() const override { + switch (join_type_) { + case JoinType::kInnerJoin: + return "HashJoinOperator"; + case JoinType::kLeftSemiJoin: + return "HashJoinOperator(LeftSemi)"; + case JoinType::kLeftAntiJoin: + return "HashJoinOperator(LeftAnti)"; + case JoinType::kLeftOuterJoin: + return "HashJoinOperator(LeftOuter)"; + default: break; + } + LOG(FATAL) << "Unknown join type in HashJoinOperator::getName()"; + } + + const CatalogRelation& build_relation() const { + return build_relation_; + } + + const CatalogRelation& probe_relation() const { + return probe_relation_; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/InsertOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/InsertOperator.hpp b/relational_operators/InsertOperator.hpp index 78f5199..2c6aca7 100644 --- a/relational_operators/InsertOperator.hpp +++ b/relational_operators/InsertOperator.hpp @@ -73,6 +73,10 @@ class InsertOperator : public RelationalOperator { ~InsertOperator() override {} + std::string getName() const override { + return "InsertOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/NestedLoopsJoinOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/NestedLoopsJoinOperator.hpp b/relational_operators/NestedLoopsJoinOperator.hpp index 992e76d..cf190fe 100644 --- a/relational_operators/NestedLoopsJoinOperator.hpp +++ b/relational_operators/NestedLoopsJoinOperator.hpp @@ -116,6 +116,10 @@ class NestedLoopsJoinOperator : public RelationalOperator { ~NestedLoopsJoinOperator() override {} + std::string getName() const override { + return "NestedLoopsJoinOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/RelationalOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/RelationalOperator.hpp b/relational_operators/RelationalOperator.hpp index 116727b..65cd213 100644 --- a/relational_operators/RelationalOperator.hpp +++ b/relational_operators/RelationalOperator.hpp @@ -55,6 +55,13 @@ class RelationalOperator { virtual ~RelationalOperator() {} /** + * @brief Get the name of this relational operator. + * + * @return The name of this relational operator. + */ + virtual std::string getName() const = 0; + + /** * @brief Generate all the next WorkOrders for this RelationalOperator. * * @note If a RelationalOperator has blocking dependencies, it should not @@ -226,6 +233,15 @@ class RelationalOperator { op_index_ = operator_index; } + /** + * @brief Get the index of this operator in the query plan DAG. + * + * @return The index of this operator in the query plan DAG. + */ + std::size_t getOperatorIndex() const { + return op_index_; + } + protected: /** * @brief Constructor http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/SampleOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SampleOperator.hpp b/relational_operators/SampleOperator.hpp index f8fe5f6..08f08c8 100644 --- a/relational_operators/SampleOperator.hpp +++ b/relational_operators/SampleOperator.hpp @@ -93,6 +93,10 @@ class SampleOperator : public RelationalOperator { ~SampleOperator() override {} + std::string getName() const override { + return "SampleOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/SaveBlocksOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SaveBlocksOperator.hpp b/relational_operators/SaveBlocksOperator.hpp index 50032b6..ebc5ffc 100644 --- a/relational_operators/SaveBlocksOperator.hpp +++ b/relational_operators/SaveBlocksOperator.hpp @@ -64,6 +64,10 @@ class SaveBlocksOperator : public RelationalOperator { ~SaveBlocksOperator() override {} + std::string getName() const override { + return "SaveBlocksOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/SelectOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SelectOperator.hpp b/relational_operators/SelectOperator.hpp index 0c10686..d260a53 100644 --- a/relational_operators/SelectOperator.hpp +++ b/relational_operators/SelectOperator.hpp @@ -189,6 +189,14 @@ class SelectOperator : public RelationalOperator { ~SelectOperator() override {} + std::string getName() const override { + return "SelectOperator"; + } + + const CatalogRelation& input_relation() const { + return input_relation_; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/SortMergeRunOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SortMergeRunOperator.hpp b/relational_operators/SortMergeRunOperator.hpp index 177836f..9b07ad6 100644 --- a/relational_operators/SortMergeRunOperator.hpp +++ b/relational_operators/SortMergeRunOperator.hpp @@ -129,6 +129,10 @@ class SortMergeRunOperator : public RelationalOperator { **/ ~SortMergeRunOperator() {} + std::string getName() const override { + return "SortMergeRunOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/SortRunGenerationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SortRunGenerationOperator.hpp b/relational_operators/SortRunGenerationOperator.hpp index 96a3ce1..54c7feb 100644 --- a/relational_operators/SortRunGenerationOperator.hpp +++ b/relational_operators/SortRunGenerationOperator.hpp @@ -109,6 +109,10 @@ class SortRunGenerationOperator : public RelationalOperator { ~SortRunGenerationOperator() {} + std::string getName() const override { + return "SortRunGenerationOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/TableGeneratorOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/TableGeneratorOperator.hpp b/relational_operators/TableGeneratorOperator.hpp index 1b791a6..15e7052 100644 --- a/relational_operators/TableGeneratorOperator.hpp +++ b/relational_operators/TableGeneratorOperator.hpp @@ -76,6 +76,10 @@ class TableGeneratorOperator : public RelationalOperator { ~TableGeneratorOperator() override {} + std::string getName() const override { + return "TableGeneratorOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/TextScanOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/TextScanOperator.hpp b/relational_operators/TextScanOperator.hpp index 1a62ded..6890d7d 100644 --- a/relational_operators/TextScanOperator.hpp +++ b/relational_operators/TextScanOperator.hpp @@ -134,6 +134,10 @@ class TextScanOperator : public RelationalOperator { ~TextScanOperator() override {} + std::string getName() const override { + return "TextScanOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/UpdateOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/UpdateOperator.hpp b/relational_operators/UpdateOperator.hpp index 4471a17..d021844 100644 --- a/relational_operators/UpdateOperator.hpp +++ b/relational_operators/UpdateOperator.hpp @@ -94,6 +94,10 @@ class UpdateOperator : public RelationalOperator { ~UpdateOperator() override {} + std::string getName() const override { + return "UpdateOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/relational_operators/WindowAggregationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/WindowAggregationOperator.hpp b/relational_operators/WindowAggregationOperator.hpp index bd83248..ee9b9b2 100644 --- a/relational_operators/WindowAggregationOperator.hpp +++ b/relational_operators/WindowAggregationOperator.hpp @@ -78,6 +78,10 @@ class WindowAggregationOperator : public RelationalOperator { ~WindowAggregationOperator() override {} + std::string getName() const override { + return "WindowAggregationOperator"; + } + bool getAllWorkOrders(WorkOrdersContainer *container, QueryContext *query_context, StorageManager *storage_manager, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/utility/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt index 2d3db8f..803b909 100644 --- a/utility/CMakeLists.txt +++ b/utility/CMakeLists.txt @@ -167,6 +167,9 @@ add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp) add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp) add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp) add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp) +add_library(quickstep_utility_ExecutionDAGVisualizer + ExecutionDAGVisualizer.cpp + ExecutionDAGVisualizer.hpp) add_library(quickstep_utility_Glob Glob.cpp Glob.hpp) add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp) add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp) @@ -225,6 +228,16 @@ target_link_libraries(quickstep_utility_CheckSnprintf target_link_libraries(quickstep_utility_DAG glog quickstep_utility_Macros) +target_link_libraries(quickstep_utility_ExecutionDAGVisualizer + quickstep_catalog_CatalogRelationSchema + quickstep_queryexecution_QueryExecutionTypedefs + quickstep_queryoptimizer_QueryPlan + quickstep_relationaloperators_AggregationOperator + quickstep_relationaloperators_BuildHashOperator + quickstep_relationaloperators_HashJoinOperator + quickstep_relationaloperators_SelectOperator + quickstep_utility_Macros + quickstep_utility_StringUtil) target_link_libraries(quickstep_utility_Glob glog) target_link_libraries(quickstep_utility_MemStream @@ -303,6 +316,7 @@ target_link_libraries(quickstep_utility quickstep_utility_CheckSnprintf quickstep_utility_DAG quickstep_utility_EqualsAnyConstant + quickstep_utility_ExecutionDAGVisualizer quickstep_utility_Glob quickstep_utility_HashPair quickstep_utility_Macros http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/utility/ExecutionDAGVisualizer.cpp ---------------------------------------------------------------------- diff --git a/utility/ExecutionDAGVisualizer.cpp b/utility/ExecutionDAGVisualizer.cpp new file mode 100644 index 0000000..c5af403 --- /dev/null +++ b/utility/ExecutionDAGVisualizer.cpp @@ -0,0 +1,229 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#include "utility/ExecutionDAGVisualizer.hpp" + +#include <cmath> +#include <cstddef> +#include <iomanip> +#include <memory> +#include <sstream> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "catalog/CatalogRelationSchema.hpp" +#include "query_execution/QueryExecutionTypedefs.hpp" +#include "query_optimizer/QueryPlan.hpp" +#include "relational_operators/AggregationOperator.hpp" +#include "relational_operators/BuildHashOperator.hpp" +#include "relational_operators/HashJoinOperator.hpp" +#include "relational_operators/SelectOperator.hpp" +#include "utility/StringUtil.hpp" + +#include "glog/logging.h" + +namespace quickstep { + +ExecutionDAGVisualizer::ExecutionDAGVisualizer(const QueryPlan &plan) { + // Do not display these relational operators in the graph. + std::set<std::string> no_display_op_names = + { "DestroyHashOperator", "DropTableOperator" }; + + const auto &dag = plan.getQueryPlanDAG(); + num_nodes_ = dag.size(); + + // Collect DAG vertices info. + std::vector<bool> display_ops(num_nodes_, false); + for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) { + const auto &node = dag.getNodePayload(node_index); + const std::string relop_name = node.getName(); + if (no_display_op_names.find(relop_name) == no_display_op_names.end()) { + display_ops[node_index] = true; + NodeInfo &node_info = nodes_[node_index]; + node_info.id = node_index; + node_info.labels.emplace_back( + "[" + std::to_string(node.getOperatorIndex()) + "] " + relop_name); + + std::vector<std::pair<std::string, const CatalogRelationSchema*>> input_relations; + if (relop_name == "AggregationOperator") { + const AggregationOperator &aggregation_op = + static_cast<const AggregationOperator&>(node); + input_relations.emplace_back("input", &aggregation_op.input_relation()); + } else if (relop_name == "BuildHashOperator") { + const BuildHashOperator &build_hash_op = + static_cast<const BuildHashOperator&>(node); + input_relations.emplace_back("input", &build_hash_op.input_relation()); + } else if (relop_name == "HashJoinOperator") { + const HashJoinOperator &hash_join_op = + static_cast<const HashJoinOperator&>(node); + input_relations.emplace_back("probe side", &hash_join_op.probe_relation()); + } else if (relop_name == "SelectOperator") { + const SelectOperator &select_op = + static_cast<const SelectOperator&>(node); + input_relations.emplace_back("input", &select_op.input_relation()); + } + for (const auto &rel_pair : input_relations) { + if (!rel_pair.second->isTemporary()) { + node_info.labels.emplace_back( + rel_pair.first + " stored relation [" + + rel_pair.second->getName() + "]"); + } + } + } + } + + // Collect DAG edges info. + for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) { + if (display_ops[node_index]) { + for (const auto &link : dag.getDependents(node_index)) { + if (display_ops[link.first]) { + edges_.emplace_back(); + edges_.back().src_node_id = node_index; + edges_.back().dst_node_id = link.first; + edges_.back().is_pipeline_breaker = link.second; + } + } + } + } +} + +void ExecutionDAGVisualizer::bindProfilingStats( + const std::vector<WorkOrderTimeEntry> &execution_time_records) { + std::vector<std::size_t> time_start(num_nodes_, std::numeric_limits<std::size_t>::max()); + std::vector<std::size_t> time_end(num_nodes_, 0); + std::vector<std::size_t> time_elapsed(num_nodes_, 0); + std::size_t overall_start_time = std::numeric_limits<std::size_t>::max(); + std::size_t overall_end_time = 0; + for (const auto &entry : execution_time_records) { + const std::size_t relop_index = std::get<1>(entry); + DCHECK_LT(relop_index, num_nodes_); + + const std::size_t workorder_start_time = std::get<2>(entry); + const std::size_t workorder_end_time = std::get<3>(entry); + overall_start_time = std::min(overall_start_time, workorder_start_time); + overall_end_time = std::max(overall_end_time, workorder_end_time); + + time_start[relop_index] = + std::min(time_start[relop_index], workorder_start_time); + time_end[relop_index] = + std::max(time_end[relop_index], workorder_end_time); + time_elapsed[relop_index] += (workorder_end_time - workorder_start_time); + } + + double total_time_elapsed = 0; + for (std::size_t i = 0; i < time_elapsed.size(); ++i) { + total_time_elapsed += time_elapsed[i]; + } + std::vector<double> time_percentage(num_nodes_, 0); + std::vector<double> span_percentage(num_nodes_, 0); + double overall_span = overall_end_time - overall_start_time; + double max_percentage = 0; + for (std::size_t i = 0; i < time_elapsed.size(); ++i) { + time_percentage[i] = time_elapsed[i] / total_time_elapsed * 100; + span_percentage[i] = (time_end[i] - time_start[i]) / overall_span * 100; + max_percentage = std::max(max_percentage, time_percentage[i] + span_percentage[i]); + } + + for (std::size_t node_index = 0; node_index < num_nodes_; ++node_index) { + if (nodes_.find(node_index) != nodes_.end()) { + const std::size_t relop_start_time = time_start[node_index]; + const std::size_t relop_end_time = time_end[node_index]; + const std::size_t relop_elapsed_time = time_elapsed[node_index]; + NodeInfo &node_info = nodes_[node_index]; + + const double hue = + (time_percentage[node_index] + span_percentage[node_index]) / max_percentage; + node_info.color = std::to_string(hue) + " " + std::to_string(hue) + " 1.0"; + + if (overall_start_time == 0) { + node_info.labels.emplace_back( + "span: " + + std::to_string((relop_end_time - relop_start_time) / 1000) + "ms"); + } else { + node_info.labels.emplace_back( + "span: [" + + std::to_string((relop_start_time - overall_start_time) / 1000) + "ms, " + + std::to_string((relop_end_time - overall_start_time) / 1000) + "ms] (" + + FormatDigits(span_percentage[node_index], 2) + "%)"); + } + + node_info.labels.emplace_back( + "total: " + + std::to_string(relop_elapsed_time / 1000) + "ms (" + + FormatDigits(time_percentage[node_index], 2) + "%)"); + + const double concurrency = + static_cast<double>(relop_elapsed_time) / (relop_end_time - relop_start_time); + node_info.labels.emplace_back( + "effective concurrency: " + FormatDigits(concurrency, 2)); + } + } +} + +std::string ExecutionDAGVisualizer::toDOT() { + // Format output graph + std::ostringstream graph_oss; + graph_oss << "digraph g {\n"; + graph_oss << " rankdir=BT\n"; + graph_oss << " node [penwidth=2]\n"; + graph_oss << " edge [fontsize=16 fontcolor=gray penwidth=2]\n\n"; + + // Format nodes + for (const auto &node_pair : nodes_) { + const NodeInfo &node_info = node_pair.second; + graph_oss << " " << node_info.id << " [ "; + if (!node_info.labels.empty()) { + graph_oss << "label=\"" + << EscapeSpecialChars(JoinToString(node_info.labels, " ")) + << "\" "; + } + if (!node_info.color.empty()) { + graph_oss << "style=filled fillcolor=\"" << node_info.color << "\" "; + } + graph_oss << "]\n"; + } + graph_oss << "\n"; + + // Format edges + for (const EdgeInfo &edge_info : edges_) { + graph_oss << " " << edge_info.src_node_id << " -> " + << edge_info.dst_node_id << " [ "; + if (edge_info.is_pipeline_breaker) { + graph_oss << "style=dashed "; + } + if (!edge_info.labels.empty()) { + graph_oss << "label=\"" + << EscapeSpecialChars(JoinToString(edge_info.labels, " ")) + << "\" "; + } + graph_oss << "]\n"; + } + graph_oss << "}\n"; + + return graph_oss.str(); +} + +std::string ExecutionDAGVisualizer::FormatDigits(const double value, + const int num_digits) { + std::ostringstream oss; + oss << std::fixed << std::setprecision(num_digits) << value; + return oss.str(); +} + +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/4b944328/utility/ExecutionDAGVisualizer.hpp ---------------------------------------------------------------------- diff --git a/utility/ExecutionDAGVisualizer.hpp b/utility/ExecutionDAGVisualizer.hpp new file mode 100644 index 0000000..d6aea9b --- /dev/null +++ b/utility/ExecutionDAGVisualizer.hpp @@ -0,0 +1,112 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_ +#define QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_ + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "query_execution/QueryExecutionTypedefs.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { + +class QueryPlan; + +/** \addtogroup Utility + * @{ + */ + +/** + * @brief A visualizer that converts an execution plan DAG into a graph in + * DOT format. Note that DOT is a plain text graph description language. + * + * @note This utility tool can be further extended to be more generic. + */ +class ExecutionDAGVisualizer { + public: + /** + * @brief Constructor + * + * @param plan The execution plan to be visualized. + */ + ExecutionDAGVisualizer(const QueryPlan &plan); + + /** + * @brief Destructor + */ + ~ExecutionDAGVisualizer() {} + + /** + * @brief Summarize the execution timing stats and bind the stats to the + * corresponding relational operators in the execution plan. + * + * @param execution_time_records The profiled timing records of execution. + */ + void bindProfilingStats( + const std::vector<WorkOrderTimeEntry> &execution_time_records); + + /** + * @brief Get the string represenation of the visualized execution plan + * in DOT format (DOT is a plain text graph description language). + * + * @return The execution plan graph in DOT format. + */ + std::string toDOT(); + + private: + /** + * @brief Format a float value to string representation with the specified + * number of decimal digits. + */ + static std::string FormatDigits(const double value, + const int num_digits); + + /** + * @brief Information of a graph node. + */ + struct NodeInfo { + std::size_t id; + std::vector<std::string> labels; + std::string color; + }; + + /** + * @brief Information of a graph edge. + */ + struct EdgeInfo { + std::size_t src_node_id; + std::size_t dst_node_id; + std::vector<std::string> labels; + bool is_pipeline_breaker; + }; + + std::size_t num_nodes_; + std::map<std::size_t, NodeInfo> nodes_; + std::vector<EdgeInfo> edges_; + + DISALLOW_COPY_AND_ASSIGN(ExecutionDAGVisualizer); +}; + +/** @} */ + +} // namespace quickstep + +#endif /* QUICKSTEP_UTILITY_EXECUTION_DAG_VISUALIZER_HPP_ */