Repository: incubator-quickstep Updated Branches: refs/heads/adaptive-bloom-filters bb1929aa5 -> ccf55f50c
updates Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/ccf55f50 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/ccf55f50 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/ccf55f50 Branch: refs/heads/adaptive-bloom-filters Commit: ccf55f50cee3beb801f75ac12b49d4fd0c691aa1 Parents: bb1929a Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Tue Jul 5 15:50:45 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Tue Jul 5 15:50:45 2016 -0500 ---------------------------------------------------------------------- query_optimizer/PhysicalGenerator.cpp | 1 + .../StarSchemaHashJoinOrderOptimization.cpp | 29 ++++++++++++- .../StarSchemaHashJoinOrderOptimization.hpp | 43 +++++++++++++++----- 3 files changed, 61 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccf55f50/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index ee133b5..5f97d6f 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -95,6 +95,7 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan( P::PhysicalPtr PhysicalGenerator::optimizePlan() { std::vector<std::unique_ptr<Rule<P::Physical>>> rules; if (FLAGS_reorder_hash_joins) { + rules.emplace_back(new PruneColumns()); rules.emplace_back(new StarSchemaHashJoinOrderOptimization()); } rules.emplace_back(new PruneColumns()); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccf55f50/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp index c9bd7d2..2e0b6c6 100644 --- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp +++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp @@ -73,6 +73,9 @@ P::PhysicalPtr StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi JoinGroupInfo *join_group = nullptr; if (parent_join_group == nullptr || !is_valid_cascading_hash_join) { new_join_group.reset(new JoinGroupInfo()); + for (const auto &attr : input->getReferencedAttributes()) { + new_join_group->referenced_attributes.emplace(attr->id()); + } join_group = new_join_group.get(); } else { join_group = parent_join_group; @@ -145,7 +148,9 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan( i, tables[i], cost_model_->estimateCardinality(tables[i]), - cost_model_->estimateSelectivity(tables[i])); + cost_model_->estimateSelectivity(tables[i]), + CountSharedAttributes(join_group.referenced_attributes, + tables[i]->getOutputAttributes())); } // Auxiliary mapping info. @@ -237,6 +242,11 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan( CHECK(selected_probe_table_info != nullptr); CHECK(selected_build_table_info != nullptr); + std::cerr << selected_probe_table_info->estimated_num_output_attributes + << " -- " + << selected_build_table_info->estimated_num_output_attributes + << "\n"; + remaining_tables.erase(selected_probe_table_info); remaining_tables.erase(selected_build_table_info); @@ -283,6 +293,10 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan( selected_probe_table_info->estimated_cardinality = cost_model_->estimateCardinality(output); selected_probe_table_info->estimated_selectivity = cost_model_->estimateSelectivity(output); + selected_probe_table_info->estimated_num_output_attributes = + CountSharedAttributes(join_group.referenced_attributes, + output->getOutputAttributes()); + remaining_tables.emplace(selected_probe_table_info); // Update join attribute groups. @@ -307,5 +321,18 @@ physical::PhysicalPtr StarSchemaHashJoinOrderOptimization::generatePlan( } } +std::size_t StarSchemaHashJoinOrderOptimization::CountSharedAttributes( + const std::unordered_set<expressions::ExprId> &attr_set1, + const std::vector<expressions::AttributeReferencePtr> &attr_set2) { + std::size_t cnt = 0; + for (const auto &attr : attr_set2) { + if (attr_set1.find(attr->id()) != attr_set1.end()) { + ++cnt; + } + } + return cnt; +} + + } // namespace optimizer } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/ccf55f50/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp index 9ff89fd..6ad300c 100644 --- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp +++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp @@ -62,6 +62,7 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> { * @brief A group of tables to form a hash join tree. */ struct JoinGroupInfo { + std::unordered_set<expressions::ExprId> referenced_attributes; std::vector<physical::PhysicalPtr> tables; std::vector<std::pair<expressions::ExprId, expressions::ExprId>> join_attribute_pairs; }; @@ -70,20 +71,23 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> { * @brief Auxiliary information of a table for the optimizer. */ struct TableInfo { - TableInfo(const std::size_t in_table_info_id, - const physical::PhysicalPtr &in_table, - const std::size_t in_estimated_cardinality, - const double in_estimated_selectivity) - : table_info_id(in_table_info_id), - table(in_table), - estimated_cardinality(in_estimated_cardinality), - estimated_selectivity(in_estimated_selectivity) { + TableInfo(const std::size_t table_info_id_in, + const physical::PhysicalPtr &table_in, + const std::size_t estimated_cardinality_in, + const double estimated_selectivity_in, + const std::size_t estimated_num_output_attributes_in) + : table_info_id(table_info_id_in), + table(table_in), + estimated_cardinality(estimated_cardinality_in), + estimated_selectivity(estimated_selectivity_in), + estimated_num_output_attributes(estimated_num_output_attributes_in) { } const std::size_t table_info_id; physical::PhysicalPtr table; std::size_t estimated_cardinality; double estimated_selectivity; + std::size_t estimated_num_output_attributes; }; struct JoinPair { @@ -91,13 +95,26 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> { : probe(probe_in), build(build_in) { } - inline bool isBetterThan (const JoinPair &rhs) const { + inline bool isBetterThan(const JoinPair &rhs) const { const auto &lhs = *this; - const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x1000; - const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x1000; + const bool lhs_has_large_output = + lhs.build->estimated_num_output_attributes + + lhs.probe->estimated_num_output_attributes > 5; + const bool rhs_has_large_output = + rhs.build->estimated_num_output_attributes + + rhs.probe->estimated_num_output_attributes > 5; + if (lhs_has_large_output != rhs_has_large_output) { + return rhs_has_large_output; + } + + const bool lhs_has_small_build = + !lhs_has_large_output && lhs.build->estimated_cardinality < 0x1000; + const bool rhs_has_small_build = + !rhs_has_large_output && rhs.build->estimated_cardinality < 0x1000; if (lhs_has_small_build != rhs_has_small_build) { return lhs_has_small_build; } + if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) { return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality; } @@ -126,6 +143,10 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> { const expressions::PredicatePtr &residual_predicate, const std::vector<expressions::NamedExpressionPtr> &project_expressions); + static std::size_t CountSharedAttributes( + const std::unordered_set<expressions::ExprId> &attr_set1, + const std::vector<expressions::AttributeReferencePtr> &attr_set2); + std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_; DISALLOW_COPY_AND_ASSIGN(StarSchemaHashJoinOrderOptimization);