Repository: incubator-quickstep Updated Branches: refs/heads/master 7a4644349 -> 2ee5c1cc6
Use SimpleCostModel for estimating JoinHashTable size Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2ee5c1cc Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2ee5c1cc Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2ee5c1cc Branch: refs/heads/master Commit: 2ee5c1cc656ef71c99714380b1a78bbd16a74925 Parents: 7a46443 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Tue Oct 18 11:07:51 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Tue Oct 18 11:36:55 2016 -0500 ---------------------------------------------------------------------- query_optimizer/CMakeLists.txt | 1 + query_optimizer/ExecutionGenerator.cpp | 10 +++++++--- query_optimizer/ExecutionGenerator.hpp | 9 +++++++-- query_optimizer/cost_model/CMakeLists.txt | 1 + query_optimizer/cost_model/SimpleCostModel.cpp | 11 +++++++++++ query_optimizer/cost_model/SimpleCostModel.hpp | 6 ++++++ 6 files changed, 33 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index fa9141c..8333d4b 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -76,6 +76,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_queryoptimizer_QueryHandle quickstep_queryoptimizer_QueryPlan quickstep_queryoptimizer_costmodel_CostModel + quickstep_queryoptimizer_costmodel_SimpleCostModel quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel quickstep_queryoptimizer_expressions_AggregateFunction quickstep_queryoptimizer_expressions_Alias http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 09ef9e0..5a701b7 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -57,6 +57,7 @@ #include "query_optimizer/OptimizerContext.hpp" #include "query_optimizer/QueryHandle.hpp" #include "query_optimizer/QueryPlan.hpp" +#include "query_optimizer/cost_model/SimpleCostModel.hpp" #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" #include "query_optimizer/expressions/AggregateFunction.hpp" #include "query_optimizer/expressions/Alias.hpp" @@ -165,8 +166,10 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) { CHECK(P::SomeTopLevelPlan::MatchesWithConditionalCast(physical_plan, &top_level_physical_plan_)) << "The physical plan must be rooted by a TopLevelPlan"; - cost_model_.reset( + cost_model_for_aggregation_.reset( new cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans())); + cost_model_for_hash_join_.reset( + new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans())); const CatalogRelation *result_relation = nullptr; @@ -594,7 +597,8 @@ void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { std::vector<attribute_id> probe_attribute_ids; std::vector<attribute_id> build_attribute_ids; - std::size_t build_cardinality = cost_model_->estimateCardinality(build_physical); + std::size_t build_cardinality = + cost_model_for_hash_join_->estimateCardinality(build_physical); bool any_probe_attributes_nullable = false; bool any_build_attributes_nullable = false; @@ -1363,7 +1367,7 @@ void ExecutionGenerator::convertAggregate( } const std::size_t estimated_num_groups = - cost_model_->estimateNumGroupsForAggregate(physical_plan); + cost_model_for_aggregation_->estimateNumGroupsForAggregate(physical_plan); aggr_state_proto->set_estimated_num_entries(std::max(16uL, estimated_num_groups)); const QueryPlan::DAGNodeIndex aggregation_operator_index = http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/ExecutionGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp index 495955e..b7d8ef9 100644 --- a/query_optimizer/ExecutionGenerator.hpp +++ b/query_optimizer/ExecutionGenerator.hpp @@ -416,9 +416,14 @@ class ExecutionGenerator { std::vector<CatalogRelationInfo> temporary_relation_info_vec_; /** - * @brief The cost model to use for creating the execution plan. + * @brief The cost model to use for estimating aggregation hash table size. */ - std::unique_ptr<cost::CostModel> cost_model_; + std::unique_ptr<cost::CostModel> cost_model_for_aggregation_; + + /** + * @brief The cost model to use for estimating join hash table size. + */ + std::unique_ptr<cost::CostModel> cost_model_for_hash_join_; physical::TopLevelPlanPtr top_level_physical_plan_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt index ba99de3..d616696 100644 --- a/query_optimizer/cost_model/CMakeLists.txt +++ b/query_optimizer/cost_model/CMakeLists.txt @@ -38,6 +38,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel quickstep_queryoptimizer_physical_PhysicalType quickstep_queryoptimizer_physical_Selection quickstep_queryoptimizer_physical_SharedSubplanReference + quickstep_queryoptimizer_physical_Sort quickstep_queryoptimizer_physical_TableGenerator quickstep_queryoptimizer_physical_TableReference quickstep_queryoptimizer_physical_TopLevelPlan http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/SimpleCostModel.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp index f313c90..74b62d6 100644 --- a/query_optimizer/cost_model/SimpleCostModel.cpp +++ b/query_optimizer/cost_model/SimpleCostModel.cpp @@ -30,6 +30,7 @@ #include "query_optimizer/physical/PhysicalType.hpp" #include "query_optimizer/physical/Selection.hpp" #include "query_optimizer/physical/SharedSubplanReference.hpp" +#include "query_optimizer/physical/Sort.hpp" #include "query_optimizer/physical/TableGenerator.hpp" #include "query_optimizer/physical/TableReference.hpp" #include "query_optimizer/physical/TopLevelPlan.hpp" @@ -73,6 +74,9 @@ std::size_t SimpleCostModel::estimateCardinality( return estimateCardinality( shared_subplans_[shared_subplan_reference->subplan_id()]); } + case P::PhysicalType::kSort: + return estimateCardinalityForSort( + std::static_pointer_cast<const P::Sort>(physical_plan)); case P::PhysicalType::kWindowAggregate: return estimateCardinalityForWindowAggregate( std::static_pointer_cast<const P::WindowAggregate>(physical_plan)); @@ -96,6 +100,13 @@ std::size_t SimpleCostModel::estimateCardinalityForSelection( return estimateCardinality(physical_plan->input()); } +std::size_t SimpleCostModel::estimateCardinalityForSort( + const physical::SortPtr &physical_plan) { + std::size_t cardinality = estimateCardinality( + std::static_pointer_cast<const P::Sort>(physical_plan)->input()); + return std::min(cardinality, static_cast<std::size_t>(physical_plan->limit())); +} + std::size_t SimpleCostModel::estimateCardinalityForTableGenerator( const P::TableGeneratorPtr &physical_plan) { return physical_plan->generator_function_handle()->getEstimatedCardinality(); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/SimpleCostModel.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp index 25ff8fe..16366cd 100644 --- a/query_optimizer/cost_model/SimpleCostModel.hpp +++ b/query_optimizer/cost_model/SimpleCostModel.hpp @@ -29,6 +29,7 @@ #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/Selection.hpp" +#include "query_optimizer/physical/Sort.hpp" #include "query_optimizer/physical/TableGenerator.hpp" #include "query_optimizer/physical/TableReference.hpp" #include "query_optimizer/physical/TopLevelPlan.hpp" @@ -74,6 +75,11 @@ class SimpleCostModel : public CostModel { std::size_t estimateCardinalityForTableGenerator( const physical::TableGeneratorPtr &physical_plan); + // Returns the estimated cardinality as K if there is a LIMIT K clause, + // otherwise returns the estimated cardinality of the input plan. + std::size_t estimateCardinalityForSort( + const physical::SortPtr &physical_plan); + // Returns the larger value of the estimated cardinalities of two // input plans. std::size_t estimateCardinalityForHashJoin(