Repository: incubator-quickstep
Updated Branches:
  refs/heads/lip-refactor c4bf1e62d -> 969b02f57


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/969b02f5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/969b02f5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/969b02f5

Branch: refs/heads/lip-refactor
Commit: 969b02f577aa09122170e129b56f952d416ad9c8
Parents: c4bf1e6
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Wed Oct 12 15:53:05 2016 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Wed Oct 12 15:53:05 2016 -0500

----------------------------------------------------------------------
 .../StarSchemaHashJoinOrderOptimization.cpp     | 66 +++++++++++++-------
 .../StarSchemaHashJoinOrderOptimization.hpp     | 29 ++++-----
 relational_operators/HashJoinOperator.cpp       | 40 +++++++-----
 utility/lip_filter/LIPFilter.hpp                |  2 +
 utility/lip_filter/SingleIdentityHashFilter.hpp | 14 ++++-
 5 files changed, 96 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/969b02f5/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp 
b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 150d9d7..78a1680 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -28,6 +28,7 @@
 #include "query_optimizer/expressions/AttributeReference.hpp"
 #include "query_optimizer/expressions/NamedExpression.hpp"
 #include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
 #include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/physical/PatternMatcher.hpp"
 #include "query_optimizer/physical/Physical.hpp"
@@ -75,7 +76,7 @@ P::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::applyInternal(const P::Physi
     JoinGroupInfo *join_group = nullptr;
     if (parent_join_group == nullptr || !is_valid_cascading_hash_join) {
       new_join_group.reset(new JoinGroupInfo());
-      for (const auto &attr : input->getReferencedAttributes()) {
+      for (const auto &attr : input->getOutputAttributes()) {
         new_join_group->referenced_attributes.emplace(attr->id());
       }
       join_group = new_join_group.get();
@@ -211,14 +212,14 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
         if (probe_table_info != build_table_info) {
           const std::size_t probe_table_id = probe_table_info->table_info_id;
           const std::size_t build_table_id = build_table_info->table_info_id;
-          bool has_join_attribute = false;
+          std::size_t num_join_attributes = 0;
           double build_side_uniqueness = 1.0;
           for (const auto &attr_group_pair : join_attribute_groups) {
             const auto &attr_group = attr_group_pair.second;
             auto probe_it = attr_group.find(probe_table_id);
             auto build_it = attr_group.find(build_table_id);
             if (probe_it != attr_group.end() && build_it != attr_group.end()) {
-              has_join_attribute = true;
+              ++num_join_attributes;
               build_side_uniqueness *= std::max(
                   1uL,
                   cost_model_->estimateNumDistinctValues(
@@ -227,27 +228,29 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
           }
           build_side_uniqueness /= build_table_info->estimated_cardinality;
 
-          if (has_join_attribute) {
+          if (num_join_attributes > 0) {
+//            std::cerr << "Unqueness = " << build_side_uniqueness << "\n";
             std::unique_ptr<JoinPair> new_join(
                 new JoinPair(probe_table_info,
                              build_table_info,
-                             build_side_uniqueness >= 0.9));
+                             build_side_uniqueness >= 0.9,
+                             num_join_attributes));
             if (best_join == nullptr || new_join->isBetterThan(*best_join)) {
-              if (best_join != nullptr) {
-                std::cerr << "(" << best_join->probe->estimated_selectivity
-                          << ", " << best_join->probe->estimated_cardinality 
<< ")"
-                          << " -- "
-                          << "(" << best_join->build->estimated_selectivity
-                          << ", " << best_join->build->estimated_cardinality 
<< ")"
-                          << "\n";
-                std::cerr << "REPLACED WITH\n";
-              }
-              std::cerr << "(" << new_join->probe->estimated_selectivity
-                        << ", " << new_join->probe->estimated_cardinality << 
")"
-                        << " -- "
-                        << "(" << new_join->build->estimated_selectivity
-                        << ", " << new_join->build->estimated_cardinality << 
")"
-                        << "\n****\n";
+//              if (best_join != nullptr) {
+//                std::cerr << "(" << best_join->probe->estimated_selectivity
+//                          << ", " << best_join->probe->estimated_cardinality 
<< ")"
+//                          << " -- "
+//                          << "(" << best_join->build->estimated_selectivity
+//                          << ", " << best_join->build->estimated_cardinality 
<< ")"
+//                          << "\n";
+//                std::cerr << "REPLACED WITH\n";
+//              }
+//              std::cerr << "(" << new_join->probe->estimated_selectivity
+//                        << ", " << new_join->probe->estimated_cardinality << 
")"
+//                        << " -- "
+//                        << "(" << new_join->build->estimated_selectivity
+//                        << ", " << new_join->build->estimated_cardinality << 
")"
+//                        << "\n****\n";
               best_join.reset(new_join.release());
             }
           }
@@ -261,11 +264,19 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
     TableInfo *selected_build_table_info = best_join->build;
 //    std::cerr << "card: " << 
selected_probe_table_info->estimated_cardinality << "\n";
 //    std::cerr << "card: " << 
selected_build_table_info->estimated_cardinality << "\n";
-    std::cerr << "--------\n\n";
-    if (!best_join->build_side_unique &&
+    const std::size_t probe_num_groups_as_agg =
+        getEstimatedNumGroups(selected_probe_table_info->table);
+    const std::size_t build_num_groups_as_agg =
+        getEstimatedNumGroups(selected_build_table_info->table);
+    if (build_num_groups_as_agg > 1000000 || probe_num_groups_as_agg > 
1000000) {
+      if (build_num_groups_as_agg > probe_num_groups_as_agg) {
+        std::swap(selected_probe_table_info, selected_build_table_info);
+      }
+    } else if ((!best_join->build_side_unique || 
best_join->num_join_attributes > 1) &&
         selected_probe_table_info->estimated_cardinality < 
selected_build_table_info->estimated_cardinality) {
       std::swap(selected_probe_table_info, selected_build_table_info);
     }
+//    std::cerr << "--------\n\n";
 
 //    std::cerr << selected_probe_table_info->estimated_selectivity
 //              << " -- "
@@ -326,7 +337,6 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
       selected_probe_table_info->estimated_num_output_attributes =
           CountSharedAttributes(join_group.referenced_attributes,
                                 output->getOutputAttributes());
-      selected_probe_table_info->is_aggregation = false;
 
       remaining_tables.emplace(selected_probe_table_info);
 
@@ -364,6 +374,16 @@ std::size_t 
StarSchemaHashJoinOrderOptimization::CountSharedAttributes(
   return cnt;
 }
 
+std::size_t StarSchemaHashJoinOrderOptimization::getEstimatedNumGroups(
+    const physical::PhysicalPtr &input) {
+  P::AggregatePtr aggregate;
+  if (P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate)) {
+    return cost_model_->estimateNumGroupsForAggregate(aggregate);
+  } else {
+    return 0;
+  }
+}
+
 
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/969b02f5/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp 
b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index b81921d..413fc18 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -83,8 +83,7 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
           table(table_in),
           estimated_cardinality(estimated_cardinality_in),
           estimated_selectivity(estimated_selectivity_in),
-          estimated_num_output_attributes(estimated_num_output_attributes_in),
-          is_aggregation(is_aggregation_in) {
+          estimated_num_output_attributes(estimated_num_output_attributes_in) {
     }
 
     const std::size_t table_info_id;
@@ -92,22 +91,26 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
     std::size_t estimated_cardinality;
     double estimated_selectivity;
     std::size_t estimated_num_output_attributes;
-    bool is_aggregation;
   };
 
   struct JoinPair {
     JoinPair(TableInfo *probe_in,
              TableInfo *build_in,
-             const bool build_side_unique_in)
+             const bool build_side_unique_in,
+             const std::size_t num_join_attributes_in)
         : probe(probe_in),
           build(build_in),
-          build_side_unique(build_side_unique_in) {
+          build_side_unique(build_side_unique_in),
+          num_join_attributes(num_join_attributes_in) {
     }
 
     inline bool isBetterThan(const JoinPair &rhs) const {
       const auto &lhs = *this;
 
-      std::cerr << "Check 1\n";
+//      std::cerr << "Check 1\n";
+//      std::cerr << lhs.build->estimated_num_output_attributes + 
lhs.probe->estimated_num_output_attributes
+//                << " vs "
+//                << rhs.build->estimated_num_output_attributes + 
rhs.probe->estimated_num_output_attributes << "\n";
       const bool lhs_has_large_output =
           lhs.build->estimated_num_output_attributes
               + lhs.probe->estimated_num_output_attributes > 5;
@@ -118,24 +121,19 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
         return rhs_has_large_output;
       }
 
-      std::cerr << "Check 2\n";
+//      std::cerr << "Check 2\n";
       if (lhs.build_side_unique != rhs.build_side_unique) {
         return lhs.build_side_unique;
       }
 
-      std::cerr << "Check 3\n";
+//      std::cerr << "Check 3\n";
       const bool lhs_has_small_build = lhs.build->estimated_cardinality < 
0x100;
       const bool rhs_has_small_build = rhs.build->estimated_cardinality < 
0x100;
       if (lhs_has_small_build != rhs_has_small_build) {
         return lhs_has_small_build;
       }
 
-      std::cerr << "Check 4\n";
-      if (lhs.probe->is_aggregation != rhs.probe->is_aggregation) {
-        return lhs.probe->is_aggregation;
-      }
-
-      std::cerr << "Check 5\n";
+//      std::cerr << "Check 4\n";
       if (lhs.probe->estimated_cardinality != 
rhs.probe->estimated_cardinality) {
         return lhs.probe->estimated_cardinality < 
rhs.probe->estimated_cardinality;
       }
@@ -155,6 +153,7 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
     TableInfo *probe;
     TableInfo *build;
     const bool build_side_unique;
+    const std::size_t num_join_attributes;
   };
 
   physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input,
@@ -165,6 +164,8 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions);
 
+  std::size_t getEstimatedNumGroups(const physical::PhysicalPtr &input);
+
   static std::size_t CountSharedAttributes(
       const std::unordered_set<expressions::ExprId> &attr_set1,
       const std::vector<expressions::AttributeReferencePtr> &attr_set2);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/969b02f5/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp 
b/relational_operators/HashJoinOperator.cpp
index 449a118..ddc2a40 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -96,8 +96,8 @@ class MapBasedJoinedTupleCollector {
 
 class SemiAntiJoinTupleCollector {
  public:
-  explicit SemiAntiJoinTupleCollector(const TupleStorageSubBlock &tuple_store) 
{
-    filter_.reset(tuple_store.getExistenceMap());
+  explicit SemiAntiJoinTupleCollector(TupleIdSequence *existence_map) {
+    filter_ = existence_map;
   }
 
   template <typename ValueAccessorT>
@@ -105,12 +105,8 @@ class SemiAntiJoinTupleCollector {
     filter_->set(accessor.getCurrentPosition(), false);
   }
 
-  const TupleIdSequence* filter() const {
-    return filter_.get();
-  }
-
  private:
-  std::unique_ptr<TupleIdSequence> filter_;
+  TupleIdSequence *filter_;
 };
 
 class OuterJoinTupleCollector {
@@ -585,7 +581,6 @@ void HashSemiJoinWorkOrder::executeWithResidualPredicate() {
 
   // Get a filter for tuples in the given probe block.
   TupleIdSequence filter(probe_store.getMaxTupleID() + 1);
-  filter.setRange(0, filter.length(), false);
   for (const std::pair<const block_id,
                        std::vector<std::pair<tuple_id, tuple_id>>>
            &build_block_entry : *collector.getJoinedTuples()) {
@@ -646,18 +641,22 @@ void 
HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
   const TupleStorageSubBlock &probe_store = 
probe_block->getTupleStorageSubBlock();
 
   std::unique_ptr<ValueAccessor> 
probe_accessor(probe_store.createValueAccessor());
+  std::unique_ptr<TupleIdSequence> existence_map;
 
-  std::unique_ptr<TupleIdSequence> lip_filter_existence_map;
   std::unique_ptr<ValueAccessor> base_accessor;
   if (lip_filter_adaptive_prober_ != nullptr) {
     base_accessor.reset(probe_accessor.release());
-    lip_filter_existence_map.reset(
+    existence_map.reset(
         lip_filter_adaptive_prober_->filterValueAccessor(base_accessor.get()));
     probe_accessor.reset(
-        
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*lip_filter_existence_map));
+        
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  }
+
+  if (existence_map == nullptr) {
+    existence_map.reset(probe_store.getExistenceMap());
   }
 
-  SemiAntiJoinTupleCollector collector(probe_store);
+  SemiAntiJoinTupleCollector collector(existence_map.get());
   // We collect all the probe relation tuples which have at least one matching
   // tuple in the build relation. As a performance optimization, the hash table
   // just looks for the existence of the probing key in the hash table and sets
@@ -684,8 +683,15 @@ void 
HashSemiJoinWorkOrder::executeWithoutResidualPredicate() {
                                     probe_block->getIndices(),
                                     probe_block->getIndicesConsistent());
 
-  std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
-      probe_store.createValueAccessor(collector.filter()));
+  std::unique_ptr<ValueAccessor> probe_accessor_with_filter;
+  if (base_accessor != nullptr) {
+    probe_accessor_with_filter.reset(
+      
base_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  } else {
+    probe_accessor_with_filter.reset(
+      
probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
+  }
+
   ColumnVectorsValueAccessor temp_result;
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = 
selection_.begin();
        selection_it != selection_.end(); ++selection_it) {
@@ -704,7 +710,9 @@ void 
HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
   const TupleStorageSubBlock &probe_store = 
probe_block->getTupleStorageSubBlock();
 
   std::unique_ptr<ValueAccessor> 
probe_accessor(probe_store.createValueAccessor());
-  SemiAntiJoinTupleCollector collector(probe_store);
+  std::unique_ptr<TupleIdSequence> 
existence_map(probe_store.getExistenceMap());
+
+  SemiAntiJoinTupleCollector collector(existence_map.get());
   // We probe the hash table to find the keys which have an entry in the
   // hash table.
   if (join_key_attributes_.size() == 1) {
@@ -728,7 +736,7 @@ void 
HashAntiJoinWorkOrder::executeWithoutResidualPredicate() {
                                     probe_block->getIndicesConsistent());
 
   std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
-      probe_store.createValueAccessor(collector.filter()));
+      
probe_accessor->createSharedTupleIdSequenceAdapterVirtual(*existence_map));
   ColumnVectorsValueAccessor temp_result;
   for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = 
selection_.begin();
        selection_it != selection_.end(); ++selection_it) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/969b02f5/utility/lip_filter/LIPFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/LIPFilter.hpp b/utility/lip_filter/LIPFilter.hpp
index dd72e48..0df3c18 100644
--- a/utility/lip_filter/LIPFilter.hpp
+++ b/utility/lip_filter/LIPFilter.hpp
@@ -60,6 +60,8 @@ class LIPFilter {
                                   std::vector<tuple_id> *batch,
                                   const std::size_t batch_size) const = 0;
 
+  virtual std::size_t onesCount() const = 0;
+
  protected:
   LIPFilter(const LIPFilterType &type)
       : type_(type) {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/969b02f5/utility/lip_filter/SingleIdentityHashFilter.hpp
----------------------------------------------------------------------
diff --git a/utility/lip_filter/SingleIdentityHashFilter.hpp 
b/utility/lip_filter/SingleIdentityHashFilter.hpp
index 40ef14b..0258c24 100644
--- a/utility/lip_filter/SingleIdentityHashFilter.hpp
+++ b/utility/lip_filter/SingleIdentityHashFilter.hpp
@@ -33,8 +33,9 @@
 #include "storage/ValueAccessor.hpp"
 #include "storage/ValueAccessorUtil.hpp"
 #include "types/Type.hpp"
-#include "utility/lip_filter/LIPFilter.hpp"
+#include "utility/BitManipulation.hpp"
 #include "utility/Macros.hpp"
+#include "utility/lip_filter/LIPFilter.hpp"
 
 #include "glog/logging.h"
 
@@ -86,6 +87,13 @@ class SingleIdentityHashFilter : public LIPFilter {
     });
   }
 
+  std::size_t onesCount() const override {
+    std::size_t count = 0;
+    for (std::size_t i = 0; i < bit_array_.size(); ++i) {
+      count += 
population_count<std::uint8_t>(bit_array_[i].load(std::memory_order_relaxed));
+    }
+    return count;
+  }
 
   /**
    * @brief Inserts a given value into the hash filter.
@@ -136,6 +144,9 @@ class SingleIdentityHashFilter : public LIPFilter {
       const tuple_id tid = batch->at(i);
       const void *value =
           accessor->template getUntypedValueAtAbsolutePosition(attr_id, tid);
+      if (is_attr_nullable && value == nullptr) {
+        continue;
+      }
       if (contains(value)) {
         batch->at(out_size) = tid;
         ++out_size;
@@ -144,7 +155,6 @@ class SingleIdentityHashFilter : public LIPFilter {
     return out_size;
   }
 
-
   std::size_t filter_cardinality_;
   alignas(kCacheLineBytes) std::vector<std::atomic<std::uint8_t>> bit_array_;
 

Reply via email to