Repository: incubator-quickstep
Updated Branches:
  refs/heads/reorder-attrs c2743f724 -> 9c14e86e3


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9c14e86e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9c14e86e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9c14e86e

Branch: refs/heads/reorder-attrs
Commit: 9c14e86e3567a9db642be357e3007c09b5e8cb5a
Parents: c2743f7
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Sun Jan 29 00:56:30 2017 -0600
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Sun Jan 29 00:56:30 2017 -0600

----------------------------------------------------------------------
 query_optimizer/PhysicalGenerator.cpp     |  4 +--
 relational_operators/HashJoinOperator.cpp | 39 ++++++++++++++------------
 2 files changed, 23 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9c14e86e/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp 
b/query_optimizer/PhysicalGenerator.cpp
index dc2763d..e12f8be 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -46,7 +46,7 @@ namespace quickstep {
 namespace optimizer {
 
 DEFINE_bool(reorder_columns, true,
-            "Adjust the ordering of intermediate relations' attributes to 
improve "
+            "Adjust the ordering of intermediate relations' columns to improve 
"
             "copy performance.");
 
 DEFINE_bool(reorder_hash_joins, true,
@@ -115,7 +115,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     rules.emplace_back(new SwapProbeBuild());
   }
   if (FLAGS_reorder_columns) {
-    // NOTE(jianqiao): This optimization is based on the fact that the 
intermediate
+    // NOTE(jianqiao): This optimization relies on the fact that the 
intermediate
     // relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, 
the
     // optimization algorithm may need to be updated and the performance impact
     // should be re-evaluated.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9c14e86e/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp 
b/relational_operators/HashJoinOperator.cpp
index b72096c..64f73ea 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -547,7 +547,7 @@ void 
HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce
 }
 
 void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor 
*probe_accessor) {
-  VectorsOfPairsJoinedTuplesCollector collector;
+  PairsOfVectorsJoinedTuplesCollector collector;
   if (join_key_attributes_.size() == 1) {
     hash_table_.getAllFromValueAccessor(
         probe_accessor,
@@ -596,14 +596,14 @@ void 
HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso
     ++dest_attr;
   }
 
-  std::vector<tuple_id> build_tids;
-  std::vector<tuple_id> probe_tids;
-  for (std::pair<const block_id, VectorOfPairs>
+  for (std::pair<const block_id, PairOfVectors>
            &build_block_entry : *collector.getJoinedTuples()) {
     BlockReference build_block =
         storage_manager_->getBlock(build_block_entry.first, build_relation_);
     const TupleStorageSubBlock &build_store = 
build_block->getTupleStorageSubBlock();
     std::unique_ptr<ValueAccessor> 
build_accessor(build_store.createValueAccessor());
+    const std::vector<tuple_id> &build_tids = build_block_entry.second.first;
+    const std::vector<tuple_id> &probe_tids = build_block_entry.second.second;
 
     // Evaluate '*residual_predicate_', if any.
     //
@@ -616,17 +616,17 @@ void 
HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso
     // hash join is below a reasonable threshold so that we don't blow up
     // temporary memory requirements to an unreasonable degree.
     if (residual_predicate_ != nullptr) {
-      VectorOfPairs filtered_matches;
+      PairOfVectors filtered_matches;
 
-      for (const std::pair<tuple_id, tuple_id> &hash_match
-           : build_block_entry.second) {
+      for (std::size_t i = 0; i < build_tids.size(); ++i) {
         if (residual_predicate_->matchesForJoinedTuples(*build_accessor,
                                                         build_relation_id,
-                                                        hash_match.first,
+                                                        build_tids[i],
                                                         *probe_accessor,
                                                         probe_relation_id,
-                                                        hash_match.second)) {
-          filtered_matches.emplace_back(hash_match);
+                                                        probe_tids[i])) {
+          filtered_matches.first.emplace_back(build_tids[i]);
+          filtered_matches.second.emplace_back(probe_tids[i]);
         }
       }
 
@@ -641,22 +641,25 @@ void 
HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso
     // We also need a temp value accessor to store results of any scalar 
expressions.
     ColumnVectorsValueAccessor temp_result;
     if (!non_trivial_expressions.empty()) {
+      // The getAllValuesForJoin function below needs joined tuple IDs as a
+      // vector of pair of (build-tuple-ID, probe-tuple-ID), and we have a pair
+      // of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So we'll have to
+      // zip our two vectors together.
+      VectorOfPairs zipped_joined_tuple_ids;
+      zipped_joined_tuple_ids.reserve(build_tids.size());
+      for (std::size_t i = 0; i < build_tids.size(); ++i) {
+        zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]);
+      }
+
       for (const Scalar *scalar : non_trivial_expressions) {
         temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id,
                                                           build_accessor.get(),
                                                           probe_relation_id,
                                                           probe_accessor,
-                                                          
build_block_entry.second));
+                                                          
zipped_joined_tuple_ids));
       }
     }
 
-    build_tids.clear();
-    probe_tids.clear();
-    for (const auto &pair : build_block_entry.second) {
-      build_tids.emplace_back(pair.first);
-      probe_tids.emplace_back(pair.second);
-    }
-
     // We now create ordered value accessors for both build and probe side,
     // using the joined tuple TIDs.
     std::unique_ptr<ValueAccessor> ordered_build_accessor(

Reply via email to