Repository: incubator-quickstep Updated Branches: refs/heads/estimate-num-distinct-values f9fead970 -> 62b9e205a
minor fixes Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/62b9e205 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/62b9e205 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/62b9e205 Branch: refs/heads/estimate-num-distinct-values Commit: 62b9e205a7842dad08c5bccbeedab9870b350ac8 Parents: f9fead9 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Tue Oct 4 16:22:22 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Tue Oct 4 16:22:22 2016 -0500 ---------------------------------------------------------------------- query_optimizer/cost_model/CMakeLists.txt | 2 ++ .../cost_model/StarSchemaSimpleCostModel.cpp | 20 ++++++++++---------- .../StarSchemaHashJoinOrderOptimization.hpp | 4 ++++ 3 files changed, 16 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/cost_model/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt index abbc3da..08582f5 100644 --- a/query_optimizer/cost_model/CMakeLists.txt +++ b/query_optimizer/cost_model/CMakeLists.txt @@ -30,9 +30,11 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel glog quickstep_catalog_CatalogRelation quickstep_queryoptimizer_costmodel_CostModel + quickstep_queryoptimizer_expressions_ExpressionUtil quickstep_queryoptimizer_physical_Aggregate quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_NestedLoopsJoin + quickstep_queryoptimizer_physical_PatternMatcher quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_physical_PhysicalType quickstep_queryoptimizer_physical_Selection http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp index 122485a..8344ac3 100644 --- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp +++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp @@ -114,9 +114,9 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableReference( std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSelection( const P::SelectionPtr &physical_plan) { double selectivity = - estimateSelectivityForPredicate(physical_plan->filter_predicate(), - physical_plan); - return static_cast<std::size_t>(estimateCardinality(physical_plan->input()) * selectivity); + estimateSelectivityForPredicate(physical_plan->filter_predicate(), physical_plan); + return static_cast<std::size_t>( + estimateCardinality(physical_plan->input()) * selectivity); } std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSort( @@ -153,13 +153,13 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForAggregate( return 1; } - const std::size_t estimated_child_cardinality = estimateCardinality(physical_plan->input()); + std::size_t estimated_child_cardinality = estimateCardinality(physical_plan->input()); std::size_t estimated_num_groups = 1; std::size_t max_attr_num_distinct_values = 0; for (const auto &expr : physical_plan->grouping_expressions()) { E::AttributeReferencePtr attr; if (E::SomeAttributeReference::MatchesWithConditionalCast(expr, &attr)) { - const std::size_t attr_num_distinct_values = + std::size_t attr_num_distinct_values = estimateNumDistinctValues(attr->id(), physical_plan->input()); estimated_num_groups *= std::max(1uL, attr_num_distinct_values); max_attr_num_distinct_values = @@ -196,7 +196,7 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues( case P::PhysicalType::kAggregate: { const P::PhysicalPtr &child = physical_plan->children()[0]; if (E::ContainsExprId(child->getOutputAttributes(), attribute_id)) { - const std::size_t child_num_distinct_values = + std::size_t child_num_distinct_values = estimateNumDistinctValues(attribute_id, child); return static_cast<std::size_t>( child_num_distinct_values * filter_selectivity + 0.5); @@ -207,17 +207,17 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues( const P::HashJoinPtr &hash_join = std::static_pointer_cast<const P::HashJoin>(physical_plan); if (E::ContainsExprId(hash_join->left()->getOutputAttributes(), attribute_id)) { - const std::size_t left_child_num_distinct_values = + std::size_t left_child_num_distinct_values = estimateNumDistinctValues(attribute_id, hash_join->left()); - const double right_child_selectivity = + double right_child_selectivity = estimateSelectivity(hash_join->right()); return static_cast<std::size_t>( left_child_num_distinct_values * right_child_selectivity * filter_selectivity + 0.5); } if (E::ContainsExprId(hash_join->right()->getOutputAttributes(), attribute_id)) { - const std::size_t right_child_num_distinct_values = + std::size_t right_child_num_distinct_values = estimateNumDistinctValues(attribute_id, hash_join->right()); - const double left_child_selectivity = + double left_child_selectivity = estimateSelectivity(hash_join->left()); return static_cast<std::size_t>( right_child_num_distinct_values * left_child_selectivity * filter_selectivity + 0.5); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp index f832f00..c1a7bae 100644 --- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp +++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp @@ -103,6 +103,10 @@ class StarSchemaHashJoinOrderOptimization : public Rule<physical::Physical> { if (lhs->estimated_selectivity < rhs->estimated_selectivity) { return !swapped; + } else if (lhs->estimated_cardinality < 100u && + rhs->estimated_cardinality > 10000u && + lhs->estimated_selectivity < rhs->estimated_selectivity * 1.5) { + return !swapped; } else if (lhs->estimated_selectivity > rhs->estimated_selectivity) { return swapped; } else if (lhs->estimated_cardinality != rhs->estimated_cardinality) {