Repository: incubator-quickstep
Updated Branches:
  refs/heads/estimate-num-distinct-values f9fead970 -> 62b9e205a


minor fixes


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/62b9e205
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/62b9e205
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/62b9e205

Branch: refs/heads/estimate-num-distinct-values
Commit: 62b9e205a7842dad08c5bccbeedab9870b350ac8
Parents: f9fead9
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Tue Oct 4 16:22:22 2016 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Tue Oct 4 16:22:22 2016 -0500

----------------------------------------------------------------------
 query_optimizer/cost_model/CMakeLists.txt       |  2 ++
 .../cost_model/StarSchemaSimpleCostModel.cpp    | 20 ++++++++++----------
 .../StarSchemaHashJoinOrderOptimization.hpp     |  4 ++++
 3 files changed, 16 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/cost_model/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/CMakeLists.txt 
b/query_optimizer/cost_model/CMakeLists.txt
index abbc3da..08582f5 100644
--- a/query_optimizer/cost_model/CMakeLists.txt
+++ b/query_optimizer/cost_model/CMakeLists.txt
@@ -30,9 +30,11 @@ 
target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel
                       glog
                       quickstep_catalog_CatalogRelation
                       quickstep_queryoptimizer_costmodel_CostModel
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
                       quickstep_queryoptimizer_physical_Aggregate
                       quickstep_queryoptimizer_physical_HashJoin
                       quickstep_queryoptimizer_physical_NestedLoopsJoin
+                      quickstep_queryoptimizer_physical_PatternMatcher
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_physical_PhysicalType
                       quickstep_queryoptimizer_physical_Selection

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp 
b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index 122485a..8344ac3 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -114,9 +114,9 @@ std::size_t 
StarSchemaSimpleCostModel::estimateCardinalityForTableReference(
 std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSelection(
     const P::SelectionPtr &physical_plan) {
   double selectivity =
-      estimateSelectivityForPredicate(physical_plan->filter_predicate(),
-                                      physical_plan);
-  return static_cast<std::size_t>(estimateCardinality(physical_plan->input()) 
* selectivity);
+      estimateSelectivityForPredicate(physical_plan->filter_predicate(), 
physical_plan);
+  return static_cast<std::size_t>(
+      estimateCardinality(physical_plan->input()) * selectivity);
 }
 
 std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSort(
@@ -153,13 +153,13 @@ std::size_t 
StarSchemaSimpleCostModel::estimateCardinalityForAggregate(
     return 1;
   }
 
-  const std::size_t estimated_child_cardinality = 
estimateCardinality(physical_plan->input());
+  std::size_t estimated_child_cardinality = 
estimateCardinality(physical_plan->input());
   std::size_t estimated_num_groups = 1;
   std::size_t max_attr_num_distinct_values = 0;
   for (const auto &expr : physical_plan->grouping_expressions()) {
     E::AttributeReferencePtr attr;
     if (E::SomeAttributeReference::MatchesWithConditionalCast(expr, &attr)) {
-      const std::size_t attr_num_distinct_values =
+      std::size_t attr_num_distinct_values =
           estimateNumDistinctValues(attr->id(), physical_plan->input());
       estimated_num_groups *= std::max(1uL, attr_num_distinct_values);
       max_attr_num_distinct_values =
@@ -196,7 +196,7 @@ std::size_t 
StarSchemaSimpleCostModel::estimateNumDistinctValues(
     case P::PhysicalType::kAggregate: {
       const P::PhysicalPtr &child = physical_plan->children()[0];
       if (E::ContainsExprId(child->getOutputAttributes(), attribute_id)) {
-        const std::size_t child_num_distinct_values =
+        std::size_t child_num_distinct_values =
             estimateNumDistinctValues(attribute_id, child);
         return static_cast<std::size_t>(
             child_num_distinct_values * filter_selectivity + 0.5);
@@ -207,17 +207,17 @@ std::size_t 
StarSchemaSimpleCostModel::estimateNumDistinctValues(
       const P::HashJoinPtr &hash_join =
           std::static_pointer_cast<const P::HashJoin>(physical_plan);
       if (E::ContainsExprId(hash_join->left()->getOutputAttributes(), 
attribute_id)) {
-        const std::size_t left_child_num_distinct_values =
+        std::size_t left_child_num_distinct_values =
             estimateNumDistinctValues(attribute_id, hash_join->left());
-        const double right_child_selectivity =
+        double right_child_selectivity =
             estimateSelectivity(hash_join->right());
         return static_cast<std::size_t>(
             left_child_num_distinct_values * right_child_selectivity * 
filter_selectivity + 0.5);
       }
       if (E::ContainsExprId(hash_join->right()->getOutputAttributes(), 
attribute_id)) {
-        const std::size_t right_child_num_distinct_values =
+        std::size_t right_child_num_distinct_values =
             estimateNumDistinctValues(attribute_id, hash_join->right());
-        const double left_child_selectivity =
+        double left_child_selectivity =
             estimateSelectivity(hash_join->left());
         return static_cast<std::size_t>(
             right_child_num_distinct_values * left_child_selectivity * 
filter_selectivity + 0.5);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/62b9e205/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp 
b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index f832f00..c1a7bae 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -103,6 +103,10 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
 
       if (lhs->estimated_selectivity < rhs->estimated_selectivity) {
         return !swapped;
+      } else if (lhs->estimated_cardinality < 100u &&
+                 rhs->estimated_cardinality > 10000u &&
+                 lhs->estimated_selectivity < rhs->estimated_selectivity * 
1.5) {
+        return !swapped;
       } else if (lhs->estimated_selectivity > rhs->estimated_selectivity) {
         return swapped;
       } else if (lhs->estimated_cardinality != rhs->estimated_cardinality) {

Reply via email to