Use SimpleCostModel for estimating JoinHashTable size

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/2ee5c1cc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/2ee5c1cc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/2ee5c1cc

Branch: refs/heads/build-cardinality-fix
Commit: 2ee5c1cc656ef71c99714380b1a78bbd16a74925
Parents: 7a46443
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Tue Oct 18 11:07:51 2016 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Tue Oct 18 11:36:55 2016 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                 |  1 +
 query_optimizer/ExecutionGenerator.cpp         | 10 +++++++---
 query_optimizer/ExecutionGenerator.hpp         |  9 +++++++--
 query_optimizer/cost_model/CMakeLists.txt      |  1 +
 query_optimizer/cost_model/SimpleCostModel.cpp | 11 +++++++++++
 query_optimizer/cost_model/SimpleCostModel.hpp |  6 ++++++
 6 files changed, 33 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index fa9141c..8333d4b 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -76,6 +76,7 @@ 
target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator
                       quickstep_queryoptimizer_QueryHandle
                       quickstep_queryoptimizer_QueryPlan
                       quickstep_queryoptimizer_costmodel_CostModel
+                      quickstep_queryoptimizer_costmodel_SimpleCostModel
                       
quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
                       quickstep_queryoptimizer_expressions_AggregateFunction
                       quickstep_queryoptimizer_expressions_Alias

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp 
b/query_optimizer/ExecutionGenerator.cpp
index 09ef9e0..5a701b7 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -57,6 +57,7 @@
 #include "query_optimizer/OptimizerContext.hpp"
 #include "query_optimizer/QueryHandle.hpp"
 #include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/cost_model/SimpleCostModel.hpp"
 #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
 #include "query_optimizer/expressions/AggregateFunction.hpp"
 #include "query_optimizer/expressions/Alias.hpp"
@@ -165,8 +166,10 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr 
&physical_plan) {
   CHECK(P::SomeTopLevelPlan::MatchesWithConditionalCast(physical_plan, 
&top_level_physical_plan_))
       << "The physical plan must be rooted by a TopLevelPlan";
 
-  cost_model_.reset(
+  cost_model_for_aggregation_.reset(
       new 
cost::StarSchemaSimpleCostModel(top_level_physical_plan_->shared_subplans()));
+  cost_model_for_hash_join_.reset(
+      new cost::SimpleCostModel(top_level_physical_plan_->shared_subplans()));
 
   const CatalogRelation *result_relation = nullptr;
 
@@ -594,7 +597,8 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
   std::vector<attribute_id> probe_attribute_ids;
   std::vector<attribute_id> build_attribute_ids;
 
-  std::size_t build_cardinality = 
cost_model_->estimateCardinality(build_physical);
+  std::size_t build_cardinality =
+      cost_model_for_hash_join_->estimateCardinality(build_physical);
 
   bool any_probe_attributes_nullable = false;
   bool any_build_attributes_nullable = false;
@@ -1363,7 +1367,7 @@ void ExecutionGenerator::convertAggregate(
   }
 
   const std::size_t estimated_num_groups =
-      cost_model_->estimateNumGroupsForAggregate(physical_plan);
+      
cost_model_for_aggregation_->estimateNumGroupsForAggregate(physical_plan);
   aggr_state_proto->set_estimated_num_entries(std::max(16uL, 
estimated_num_groups));
 
   const QueryPlan::DAGNodeIndex aggregation_operator_index =

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/ExecutionGenerator.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.hpp 
b/query_optimizer/ExecutionGenerator.hpp
index 495955e..b7d8ef9 100644
--- a/query_optimizer/ExecutionGenerator.hpp
+++ b/query_optimizer/ExecutionGenerator.hpp
@@ -416,9 +416,14 @@ class ExecutionGenerator {
   std::vector<CatalogRelationInfo> temporary_relation_info_vec_;
 
   /**
-   * @brief The cost model to use for creating the execution plan.
+   * @brief The cost model to use for estimating aggregation hash table size.
    */
-  std::unique_ptr<cost::CostModel> cost_model_;
+  std::unique_ptr<cost::CostModel> cost_model_for_aggregation_;
+
+  /**
+   * @brief The cost model to use for estimating join hash table size.
+   */
+  std::unique_ptr<cost::CostModel> cost_model_for_hash_join_;
 
   physical::TopLevelPlanPtr top_level_physical_plan_;
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt 
b/query_optimizer/cost_model/CMakeLists.txt
index ba99de3..d616696 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -38,6 +38,7 @@ 
target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_Selection
                       quickstep_queryoptimizer_physical_SharedSubplanReference
+                      quickstep_queryoptimizer_physical_Sort
                       quickstep_queryoptimizer_physical_TableGenerator
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_physical_TopLevelPlan

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/SimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp 
b/query_optimizer/cost_model/SimpleCostModel.cpp
index f313c90..74b62d6 100644
--- a/query_optimizer/cost_model/SimpleCostModel.cpp
+++ b/query_optimizer/cost_model/SimpleCostModel.cpp
@@ -30,6 +30,7 @@
 #include "query_optimizer/physical/PhysicalType.hpp"
 #include "query_optimizer/physical/Selection.hpp"
 #include "query_optimizer/physical/SharedSubplanReference.hpp"
+#include "query_optimizer/physical/Sort.hpp"
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
@@ -73,6 +74,9 @@ std::size_t SimpleCostModel::estimateCardinality(
       return estimateCardinality(
           shared_subplans_[shared_subplan_reference->subplan_id()]);
     }
+    case P::PhysicalType::kSort:
+      return estimateCardinalityForSort(
+          std::static_pointer_cast<const P::Sort>(physical_plan));
     case P::PhysicalType::kWindowAggregate:
       return estimateCardinalityForWindowAggregate(
           std::static_pointer_cast<const P::WindowAggregate>(physical_plan));
@@ -96,6 +100,13 @@ std::size_t 
SimpleCostModel::estimateCardinalityForSelection(
   return estimateCardinality(physical_plan->input());
 }
 
+std::size_t SimpleCostModel::estimateCardinalityForSort(
+    const physical::SortPtr &physical_plan) {
+  std::size_t cardinality = estimateCardinality(
+      std::static_pointer_cast<const P::Sort>(physical_plan)->input());
+  return std::min(cardinality, 
static_cast<std::size_t>(physical_plan->limit()));
+}
+
 std::size_t SimpleCostModel::estimateCardinalityForTableGenerator(
     const P::TableGeneratorPtr &physical_plan) {
   return physical_plan->generator_function_handle()->getEstimatedCardinality();

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/2ee5c1cc/query_optimizer/cost_model/SimpleCostModel.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp 
b/query_optimizer/cost_model/SimpleCostModel.hpp
index 25ff8fe..16366cd 100644
--- a/query_optimizer/cost_model/SimpleCostModel.hpp
+++ b/query_optimizer/cost_model/SimpleCostModel.hpp
@@ -29,6 +29,7 @@
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/Sort.hpp"
 #include "query_optimizer/physical/TableGenerator.hpp"
 #include "query_optimizer/physical/TableReference.hpp"
 #include "query_optimizer/physical/TopLevelPlan.hpp"
@@ -74,6 +75,11 @@ class SimpleCostModel : public CostModel {
   std::size_t estimateCardinalityForTableGenerator(
       const physical::TableGeneratorPtr &physical_plan);
 
+  // Returns the estimated cardinality as K if there is a LIMIT K clause,
+  // otherwise returns the estimated cardinality of the input plan.
+  std::size_t estimateCardinalityForSort(
+      const physical::SortPtr &physical_plan);
+
   // Returns the larger value of the estimated cardinalities of two
   // input plans.
   std::size_t estimateCardinalityForHashJoin(

Reply via email to