http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/query_optimizer/tests/ExecutionHeuristics_unittest.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp b/query_optimizer/tests/ExecutionHeuristics_unittest.cpp deleted file mode 100644 index 73b3e84..0000000 --- a/query_optimizer/tests/ExecutionHeuristics_unittest.cpp +++ /dev/null @@ -1,311 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#include <cstddef> -#include <memory> -#include <string> -#include <vector> - -#include "catalog/Catalog.hpp" -#include "catalog/CatalogDatabase.hpp" -#include "catalog/CatalogTypedefs.hpp" -#include "query_execution/QueryContext.hpp" -#include "query_execution/QueryContext.pb.h" -#include "query_optimizer/ExecutionHeuristics.hpp" -#include "query_optimizer/QueryPlan.hpp" -#include "relational_operators/BuildHashOperator.hpp" -#include "relational_operators/HashJoinOperator.hpp" -#include "utility/Macros.hpp" - -#include "glog/logging.h" -#include "gtest/gtest.h" - -namespace quickstep { -namespace optimizer { - -namespace { -constexpr std::size_t kQueryId = 0; -} - -class ExecutionHeuristicsTest : public ::testing::Test { - protected: - virtual void SetUp() { - db_ = cat_.getDatabaseByIdMutable(cat_.addDatabase(new CatalogDatabase(nullptr, "db"))); - execution_heuristics_.reset(new ExecutionHeuristics()); - query_plan_.reset(new QueryPlan()); - query_context_proto_.reset(new serialization::QueryContext()); - } - - CatalogRelation* createCatalogRelation(const std::string &name, bool temporary = false) { - return db_->getRelationByIdMutable(db_->addRelation(new CatalogRelation(nullptr, name, -1, temporary))); - } - - void addDummyHashJoinInfo(ExecutionHeuristics *execution_heuristics, - const QueryPlan::DAGNodeIndex build_operator_index, - const QueryPlan::DAGNodeIndex join_operator_index, - const CatalogRelation *build_relation, - const CatalogRelation *probe_relation, - const attribute_id build_attribute_id, - const attribute_id probe_attribute_id, - const QueryContext::join_hash_table_id join_hash_table_id) { - std::vector<attribute_id> build_attribute_ids(1, build_attribute_id); - std::vector<attribute_id> probe_attribute_ids(1, probe_attribute_id); - execution_heuristics->addHashJoinInfo(build_operator_index, - join_operator_index, - build_relation, - probe_relation, - std::move(build_attribute_ids), - std::move(probe_attribute_ids), - join_hash_table_id); - } - - QueryPlan::DAGNodeIndex createDummyBuildHashOperator(QueryPlan *query_plan, - const CatalogRelation *build_relation, - const attribute_id build_attribute_id, - const QueryContext::join_hash_table_id join_hash_table_index) { - std::vector<attribute_id> build_attribute_ids; - build_attribute_ids.push_back(build_attribute_id); - QueryPlan::DAGNodeIndex build_operator_index = - query_plan->addRelationalOperator(new BuildHashOperator(kQueryId, - *build_relation, - true, - build_attribute_ids, - false, - join_hash_table_index)); - return build_operator_index; - } - - QueryPlan::DAGNodeIndex createDummyHashJoinOperator(QueryPlan *query_plan, - const CatalogRelation *build_relation, - const CatalogRelation *probe_relation, - const attribute_id probe_attribute_id, - const QueryContext::join_hash_table_id join_hash_table_index) { - std::vector<attribute_id> probe_attribute_ids; - probe_attribute_ids.push_back(probe_attribute_id); - QueryPlan::DAGNodeIndex join_operator_index = - query_plan->addRelationalOperator( - new HashJoinOperator(kQueryId, - *build_relation, - *probe_relation, - true, - probe_attribute_ids, - false, - *probe_relation, - 0, - join_hash_table_index, - 0, - 0)); - return join_operator_index; - } - - Catalog cat_; - CatalogDatabase *db_; // db_ is owned by cat_. - std::unique_ptr<QueryPlan> query_plan_; - std::unique_ptr<serialization::QueryContext> query_context_proto_; - std::unique_ptr<ExecutionHeuristics> execution_heuristics_; -}; - -TEST_F(ExecutionHeuristicsTest, HashJoinOptimizedTest) { - // This test case creates three hash joins, all of which are being probed on the same relation. - // Since the probe are being made on the same relation, ExecutionHeuristics should optimize - // these hash joins using bloom filters. - - const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1"); - const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2"); - const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3"); - const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1"); - - const attribute_id build_attribute_id_1 = 0; - const attribute_id build_attribute_id_2 = 0; - const attribute_id build_attribute_id_3 = 0; - const attribute_id probe_attribute_id_1 = 1; - const attribute_id probe_attribute_id_2 = 2; - const attribute_id probe_attribute_id_3 = 3; - - const QueryContext::join_hash_table_id join_hash_table_index_1 = 0; - const QueryContext::join_hash_table_id join_hash_table_index_2 = 1; - const QueryContext::join_hash_table_id join_hash_table_index_3 = 2; - query_context_proto_->add_join_hash_tables(); - query_context_proto_->add_join_hash_tables(); - query_context_proto_->add_join_hash_tables(); - - const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_1, - build_attribute_id_1, - join_hash_table_index_1); - const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_1, - probe_relation_1, - probe_attribute_id_1, - join_hash_table_index_1); - const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_2, - build_attribute_id_2, - join_hash_table_index_2); - const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_2, - probe_relation_1, - probe_attribute_id_2, - join_hash_table_index_2); - const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_3, - build_attribute_id_3, - join_hash_table_index_3); - const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_3, - probe_relation_1, - probe_attribute_id_3, - join_hash_table_index_3); - - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_1, - probe_operator_index_1, - build_relation_1, - probe_relation_1, - build_attribute_id_1, - probe_attribute_id_1, - join_hash_table_index_1); - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_2, - probe_operator_index_2, - build_relation_2, - probe_relation_1, - build_attribute_id_2, - probe_attribute_id_2, - join_hash_table_index_2); - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_3, - probe_operator_index_3, - build_relation_3, - probe_relation_1, - build_attribute_id_3, - probe_attribute_id_3, - join_hash_table_index_3); - - execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get()); - - // Test whether correct number of bloom filters were added. - EXPECT_EQ(1, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size()); - EXPECT_EQ(1, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size()); - EXPECT_EQ(1, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size()); - EXPECT_EQ(3, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size()); - - // Test that the DAG was modified correctly or not. - // Probe operator 1 should have now build operator 1 and build operator 2 added as dependencies. - auto const probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1); - EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_2)); - EXPECT_EQ(1u, probe_node_dependencies.count(build_operator_index_3)); -} - -TEST_F(ExecutionHeuristicsTest, HashJoinNotOptimizedTest) { - // This test case creates three hash joins, all of which are being probed on different relations. - // Since the probe are being made on the different relations, ExecutionHeuristics should optimize - // these hash joins using bloom filters. - - const CatalogRelation *build_relation_1 = createCatalogRelation("build_relation_1"); - const CatalogRelation *build_relation_2 = createCatalogRelation("build_relation_2"); - const CatalogRelation *build_relation_3 = createCatalogRelation("build_relation_3"); - const CatalogRelation *probe_relation_1 = createCatalogRelation("probe_relation_1"); - const CatalogRelation *probe_relation_2 = createCatalogRelation("probe_relation_2"); - const CatalogRelation *probe_relation_3 = createCatalogRelation("probe_relation_3"); - - const attribute_id build_attribute_id_1 = 0; - const attribute_id build_attribute_id_2 = 0; - const attribute_id build_attribute_id_3 = 0; - const attribute_id probe_attribute_id_1 = 1; - const attribute_id probe_attribute_id_2 = 2; - const attribute_id probe_attribute_id_3 = 3; - - const QueryContext::join_hash_table_id join_hash_table_index_1 = 0; - const QueryContext::join_hash_table_id join_hash_table_index_2 = 1; - const QueryContext::join_hash_table_id join_hash_table_index_3 = 2; - query_context_proto_->add_join_hash_tables(); - query_context_proto_->add_join_hash_tables(); - query_context_proto_->add_join_hash_tables(); - - const QueryPlan::DAGNodeIndex build_operator_index_1 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_1, - build_attribute_id_1, - join_hash_table_index_1); - const QueryPlan::DAGNodeIndex probe_operator_index_1 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_1, - probe_relation_1, - probe_attribute_id_1, - join_hash_table_index_1); - const QueryPlan::DAGNodeIndex build_operator_index_2 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_2, - build_attribute_id_2, - join_hash_table_index_2); - const QueryPlan::DAGNodeIndex probe_operator_index_2 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_2, - probe_relation_2, - probe_attribute_id_2, - join_hash_table_index_2); - const QueryPlan::DAGNodeIndex build_operator_index_3 = createDummyBuildHashOperator(query_plan_.get(), - build_relation_3, - build_attribute_id_3, - join_hash_table_index_3); - const QueryPlan::DAGNodeIndex probe_operator_index_3 = createDummyHashJoinOperator(query_plan_.get(), - build_relation_3, - probe_relation_3, - probe_attribute_id_3, - join_hash_table_index_3); - - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_1, - probe_operator_index_1, - build_relation_1, - probe_relation_1, - build_attribute_id_1, - probe_attribute_id_1, - join_hash_table_index_1); - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_2, - probe_operator_index_2, - build_relation_2, - probe_relation_2, - build_attribute_id_2, - probe_attribute_id_2, - join_hash_table_index_2); - addDummyHashJoinInfo(execution_heuristics_.get(), - build_operator_index_3, - probe_operator_index_3, - build_relation_3, - probe_relation_3, - build_attribute_id_3, - probe_attribute_id_3, - join_hash_table_index_3); - - execution_heuristics_->optimizeExecutionPlan(query_plan_.get(), query_context_proto_.get()); - - // Test that no bloom filters were added. - EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).build_side_bloom_filter_id_size()); - EXPECT_EQ(0, query_context_proto_->join_hash_tables(1).build_side_bloom_filter_id_size()); - EXPECT_EQ(0, query_context_proto_->join_hash_tables(2).build_side_bloom_filter_id_size()); - EXPECT_EQ(0, query_context_proto_->join_hash_tables(0).probe_side_bloom_filters_size()); - - // Test that the DAG was not modified at all. - // Probe operator 1 should not have build operator 1 and build operator 2 added as dependencies. - auto probe_node_dependencies = query_plan_->getQueryPlanDAG().getDependencies(probe_operator_index_1); - EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_2)); - EXPECT_EQ(0u, probe_node_dependencies.count(build_operator_index_3)); -} - -} // namespace optimizer -} // namespace quickstep
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt index ddaae45..f3f3fa3 100644 --- a/utility/CMakeLists.txt +++ b/utility/CMakeLists.txt @@ -156,6 +156,8 @@ QS_PROTOBUF_GENERATE_CPP(quickstep_utility_SortConfiguration_proto_srcs quickstep_utility_SortConfiguration_proto_hdrs SortConfiguration.proto) +add_subdirectory(lip_filter) + # Declare micro-libs: add_library(quickstep_utility_Alignment ../empty_src.cpp Alignment.hpp) add_library(quickstep_utility_BitManipulation ../empty_src.cpp BitManipulation.hpp) @@ -168,6 +170,7 @@ add_library(quickstep_utility_CalculateInstalledMemory CalculateInstalledMemory. add_library(quickstep_utility_Cast ../empty_src.cpp Cast.hpp) add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp) add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp) +add_library(quickstep_utility_DisjointTreeForest ../empty_src.cpp DisjointTreeForest.hpp) add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp EqualsAnyConstant.hpp) add_library(quickstep_utility_ExecutionDAGVisualizer ExecutionDAGVisualizer.cpp @@ -230,6 +233,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf target_link_libraries(quickstep_utility_DAG glog quickstep_utility_Macros) +target_link_libraries(quickstep_utility_DisjointTreeForest + glog) target_link_libraries(quickstep_utility_ExecutionDAGVisualizer quickstep_catalog_CatalogRelationSchema quickstep_queryexecution_QueryExecutionTypedefs @@ -251,11 +256,16 @@ target_link_libraries(quickstep_utility_PrimeNumber glog) target_link_libraries(quickstep_utility_PlanVisualizer quickstep_catalog_CatalogRelation + quickstep_catalog_CatalogRelationStatistics + quickstep_catalog_CatalogTypedefs quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_physical_Aggregate quickstep_queryoptimizer_physical_HashJoin + quickstep_queryoptimizer_physical_LIPFilterConfiguration quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_physical_PhysicalType + quickstep_queryoptimizer_physical_Selection quickstep_queryoptimizer_physical_TableReference quickstep_queryoptimizer_physical_TopLevelPlan quickstep_utility_Macros @@ -319,6 +329,7 @@ target_link_libraries(quickstep_utility quickstep_utility_Cast quickstep_utility_CheckSnprintf quickstep_utility_DAG + quickstep_utility_DisjointTreeForest quickstep_utility_EqualsAnyConstant quickstep_utility_ExecutionDAGVisualizer quickstep_utility_Glob @@ -375,6 +386,13 @@ target_link_libraries(DAG_unittest ${LIBS}) add_test(DAG_unittest DAG_unittest) +add_executable(DisjointTreeForest_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/DisjointTreeForest_unittest.cpp") +target_link_libraries(DisjointTreeForest_unittest + gtest + gtest_main + quickstep_utility_DisjointTreeForest) +add_test(DisjointTreeForest_unittest DisjointTreeForest_unittest) + add_executable(EqualsAnyConstant_unittest "${CMAKE_CURRENT_SOURCE_DIR}/tests/EqualsAnyConstant_unittest.cpp") target_link_libraries(EqualsAnyConstant_unittest gtest http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/DisjointTreeForest.hpp ---------------------------------------------------------------------- diff --git a/utility/DisjointTreeForest.hpp b/utility/DisjointTreeForest.hpp new file mode 100644 index 0000000..ce9e23c --- /dev/null +++ b/utility/DisjointTreeForest.hpp @@ -0,0 +1,151 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_ +#define QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_ + +#include <cstddef> +#include <limits> +#include <utility> +#include <unordered_map> +#include <vector> + +#include "glog/logging.h" + +namespace quickstep { + +/** \addtogroup Utility + * @{ + */ + +/** + * @brief Disjoint sets implemented with tree data structures so that the + * union/find operations have nearly O(1) time complexity. + */ +template <typename ElementT, + class MapperT = std::unordered_map<ElementT, std::size_t>> +class DisjointTreeForest { + public: + /** + * @brief Whether the given element is in a subset. + * + * @param element The element. + * @return True if the element is in a subset. + */ + bool hasElement(const ElementT &element) const { + return elements_map_.find(element) != elements_map_.end(); + } + + /** + * @brief If the given element is not in any subset yet, make a singleton + * subset for it. Otherwise do nothing. + * + * @param element The element. + */ + void makeSet(const ElementT &element) { + if (!hasElement(element)) { + std::size_t loc = nodes_.size(); + nodes_.emplace_back(0, loc); + elements_map_.emplace(element, loc); + } + } + + /** + * @brief Find the subset id for the given element. + * + * @param element The element. + */ + std::size_t find(const ElementT &element) { + DCHECK(hasElement(element)); + + const std::size_t node_id = elements_map_.at(element); + std::size_t root_id = node_id; + std::size_t parent_id; + while ((parent_id = nodes_[root_id].parent) != root_id) { + root_id = parent_id; + } + compress_path(node_id, root_id); + return root_id; + } + + /** + * @brief Union the two subsets that the two given elements belong to. + * + * @param element1 The first element. + * @param element2 The second element. + */ + void merge(const ElementT &element1, const ElementT &element2) { + std::size_t root_id1 = find(element1); + std::size_t root_id2 = find(element2); + if (root_id1 != root_id2) { + Node &n1 = nodes_[root_id1]; + Node &n2 = nodes_[root_id2]; + if (n1.rank > n2.rank) { + n2.parent = root_id1; + } else if (n1.rank < n2.rank) { + n1.parent = root_id2; + } else { + n1.parent = root_id2; + n2.rank += 1; + } + } + } + + /** + * @brief Whether the two given elements are in the same subset. + * + * @param element1 The first element. + * @param element2 The second element. + * @return True if the twos elements are in the same subset, false otherwise. + */ + bool isConnected(const ElementT &element1, const ElementT &element2) { + return find(element1) == find(element2); + } + + private: + struct Node { + Node(const std::size_t rank_in, const std::size_t parent_in) + : rank(rank_in), parent(parent_in) { + } + std::size_t rank; + std::size_t parent; + }; + + inline void compress_path(const std::size_t leaf_node_id, + const std::size_t root_node_id) { + std::size_t node_id = leaf_node_id; + std::size_t max_rank = 0; + while (node_id != root_node_id) { + const Node &node = nodes_[node_id]; + max_rank = std::max(max_rank, node.rank); + + const std::size_t parent_id = node.parent; + nodes_[node_id].parent = root_node_id; + node_id = parent_id; + } + nodes_[root_node_id].rank = max_rank + 1; + } + + std::vector<Node> nodes_; + MapperT elements_map_; +}; + +/** @} */ + +} // namespace quickstep + +#endif // QUICKSTEP_UTILITY_DISJOINT_TREE_FOREST_HPP_ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/PlanVisualizer.cpp ---------------------------------------------------------------------- diff --git a/utility/PlanVisualizer.cpp b/utility/PlanVisualizer.cpp index 50cf7f0..f2dba18 100644 --- a/utility/PlanVisualizer.cpp +++ b/utility/PlanVisualizer.cpp @@ -21,18 +21,22 @@ #include <cstddef> #include <memory> +#include <set> #include <sstream> #include <string> #include <unordered_map> #include <vector> #include "catalog/CatalogRelation.hpp" - +#include "catalog/CatalogRelationStatistics.hpp" +#include "catalog/CatalogTypedefs.hpp" #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/PhysicalType.hpp" +#include "query_optimizer/physical/Selection.hpp" #include "query_optimizer/physical/TableReference.hpp" #include "query_optimizer/physical/TopLevelPlan.hpp" #include "utility/StringUtil.hpp" @@ -47,9 +51,12 @@ namespace C = ::quickstep::optimizer::cost; std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) { DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); + const P::TopLevelPlanPtr top_level_plan = + std::static_pointer_cast<const P::TopLevelPlan>(input); cost_model_.reset( new C::StarSchemaSimpleCostModel( - std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans())); + top_level_plan->shared_subplans())); + lip_filter_conf_ = top_level_plan->lip_filter_configuration(); color_map_["TableReference"] = "skyblue"; color_map_["Selection"] = "#90EE90"; @@ -86,6 +93,9 @@ std::string PlanVisualizer::visualize(const P::PhysicalPtr &input) { for (const EdgeInfo &edge_info : edges_) { graph_oss << " " << edge_info.src_node_id << " -> " << edge_info.dst_node_id << " ["; + if (edge_info.dashed) { + graph_oss << "style=dashed "; + } if (!edge_info.labels.empty()) { graph_oss << "label=\"" << EscapeSpecialChars(JoinToString(edge_info.labels, " ")) @@ -103,6 +113,10 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) { int node_id = ++id_counter_; node_id_map_.emplace(input, node_id); + std::set<E::ExprId> referenced_ids; + for (const auto &attr : input->getReferencedAttributes()) { + referenced_ids.emplace(attr->id()); + } for (const auto &child : input->children()) { visit(child); @@ -112,12 +126,18 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) { EdgeInfo &edge_info = edges_.back(); edge_info.src_node_id = child_id; edge_info.dst_node_id = node_id; + edge_info.dashed = false; - // Print output attributes except for TableReference -- there are just too many - // attributes out of TableReference. - if (child->getPhysicalType() != P::PhysicalType::kTableReference) { - for (const auto &attr : child->getOutputAttributes()) { - edge_info.labels.emplace_back(attr->attribute_alias()); + if (input->getPhysicalType() == P::PhysicalType::kHashJoin && + child == input->children()[1]) { + edge_info.dashed = true; + } + + for (const auto &attr : child->getOutputAttributes()) { + if (referenced_ids.find(attr->id()) != referenced_ids.end()) { + edge_info.labels.emplace_back( + attr->attribute_alias() + ", est # distinct = " + + std::to_string(cost_model_->estimateNumDistinctValues(attr->id(), child))); } } } @@ -154,6 +174,26 @@ void PlanVisualizer::visit(const P::PhysicalPtr &input) { break; } } + + if (lip_filter_conf_ != nullptr) { + const auto &build_filters = lip_filter_conf_->getBuildInfoMap(); + const auto build_it = build_filters.find(input); + if (build_it != build_filters.end()) { + for (const auto &build_info : build_it->second) { + node_info.labels.emplace_back( + std::string("[LIP build] ") + build_info.build_attribute->attribute_alias()); + } + } + const auto &probe_filters = lip_filter_conf_->getProbeInfoMap(); + const auto probe_it = probe_filters.find(input); + if (probe_it != probe_filters.end()) { + for (const auto &probe_info : probe_it->second) { + node_info.labels.emplace_back( + std::string("[LIP probe] ") + probe_info.probe_attribute->attribute_alias()); + } + } + } + node_info.labels.emplace_back( "est. # = " + std::to_string(cost_model_->estimateCardinality(input))); node_info.labels.emplace_back( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/PlanVisualizer.hpp ---------------------------------------------------------------------- diff --git a/utility/PlanVisualizer.hpp b/utility/PlanVisualizer.hpp index 1c0df77..9b8b0db 100644 --- a/utility/PlanVisualizer.hpp +++ b/utility/PlanVisualizer.hpp @@ -26,6 +26,7 @@ #include <vector> #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" +#include "query_optimizer/physical/LIPFilterConfiguration.hpp" #include "query_optimizer/physical/Physical.hpp" #include "utility/Macros.hpp" @@ -73,6 +74,7 @@ class PlanVisualizer { int src_node_id; int dst_node_id; std::vector<std::string> labels; + bool dashed; }; void visit(const optimizer::physical::PhysicalPtr &input); @@ -85,6 +87,7 @@ class PlanVisualizer { std::vector<EdgeInfo> edges_; std::unique_ptr<optimizer::cost::StarSchemaSimpleCostModel> cost_model_; + optimizer::physical::LIPFilterConfigurationPtr lip_filter_conf_; DISALLOW_COPY_AND_ASSIGN(PlanVisualizer); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/lip_filter/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/utility/lip_filter/CMakeLists.txt b/utility/lip_filter/CMakeLists.txt new file mode 100644 index 0000000..2232abe --- /dev/null +++ b/utility/lip_filter/CMakeLists.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Declare micro-libs: +add_library(quickstep_utility_lipfilter_LIPFilter ../../empty_src.cpp LIPFilter.hpp) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/lip_filter/LIPFilter.hpp ---------------------------------------------------------------------- diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp new file mode 100644 index 0000000..33165ed --- /dev/null +++ b/utility/lip_filter/LIPFilter.hpp @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_ +#define QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_ + +namespace quickstep { + +/** \addtogroup Utility + * @{ + */ + +enum class LIPFilterType { + kBloomFilter, + kExactFilter, + kSingleIdentityHashFilter +}; + +/** @} */ + +} // namespace quickstep + +#endif // QUICKSTEP_UTILITY_LIP_FILTER_LIP_FILTER_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/03ca4cd3/utility/tests/DisjointTreeForest_unittest.cpp ---------------------------------------------------------------------- diff --git a/utility/tests/DisjointTreeForest_unittest.cpp b/utility/tests/DisjointTreeForest_unittest.cpp new file mode 100644 index 0000000..f42ec40 --- /dev/null +++ b/utility/tests/DisjointTreeForest_unittest.cpp @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "utility/DisjointTreeForest.hpp" + +#include <cstddef> +#include <string> + +#include "gtest/gtest.h" + +namespace quickstep { + +TEST(DisjointTreeForestTest, IntTest) { + DisjointTreeForest<int> forest; + for (int i = 10; i < 20; ++i) { + forest.makeSet(i); + } + + for (int i = 10; i < 20; i += 2) { + EXPECT_NE(forest.find(i), forest.find(i+1)); + EXPECT_FALSE(forest.isConnected(i, i+1)); + + forest.merge(i, i+1); + EXPECT_EQ(forest.find(i), forest.find(i+1)); + EXPECT_TRUE(forest.isConnected(i, i+1)); + + forest.merge(i+1, i); + EXPECT_EQ(forest.find(i), forest.find(i+1)); + EXPECT_TRUE(forest.isConnected(i, i+1)); + } + + for (int i = 12; i < 20; i += 2) { + EXPECT_NE(forest.find(i), forest.find(i-1)); + EXPECT_FALSE(forest.isConnected(i, i-1)); + } + + forest.merge(10, 17); + forest.merge(11, 18); + EXPECT_EQ(forest.find(11), forest.find(16)); + EXPECT_EQ(forest.find(10), forest.find(19)); + EXPECT_NE(forest.find(10), forest.find(12)); + EXPECT_NE(forest.find(15), forest.find(17)); + + forest.merge(12, 14); + forest.merge(15, 16); + const std::size_t id = forest.find(10); + for (int i = 10; i < 20; ++i) { + EXPECT_EQ(forest.find(i), id); + } +} + +TEST(DisjointTreeForestTest, StringTest) { + DisjointTreeForest<std::string> forest; + const std::vector<std::string> elements = { "aaa", "bbb", "ccc", "ddd" }; + for (const std::string &element : elements) { + forest.makeSet(element); + } + + EXPECT_NE(forest.find("aaa"), forest.find("bbb")); + forest.merge("aaa", "bbb"); + EXPECT_EQ(forest.find("aaa"), forest.find("bbb")); + + EXPECT_NE(forest.find("ccc"), forest.find("ddd")); + forest.merge("ccc", "ddd"); + EXPECT_EQ(forest.find("ccc"), forest.find("ddd")); + + EXPECT_NE(forest.find("aaa"), forest.find("ccc")); + EXPECT_NE(forest.find("aaa"), forest.find("ddd")); + EXPECT_NE(forest.find("bbb"), forest.find("ccc")); + EXPECT_NE(forest.find("bbb"), forest.find("ddd")); + + forest.merge("aaa", "ddd"); + for (const std::string &e1 : elements) { + for (const std::string &e2 : elements) { + EXPECT_EQ(forest.find(e1), forest.find(e2)); + EXPECT_TRUE(forest.isConnected(e1, e2)); + } + } +} + +} // namespace quickstep