Repository: incubator-quickstep Updated Branches: refs/heads/reorder-attrs c2743f724 -> 9c14e86e3
Updates Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9c14e86e Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9c14e86e Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9c14e86e Branch: refs/heads/reorder-attrs Commit: 9c14e86e3567a9db642be357e3007c09b5e8cb5a Parents: c2743f7 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Sun Jan 29 00:56:30 2017 -0600 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Sun Jan 29 00:56:30 2017 -0600 ---------------------------------------------------------------------- query_optimizer/PhysicalGenerator.cpp | 4 +-- relational_operators/HashJoinOperator.cpp | 39 ++++++++++++++------------ 2 files changed, 23 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9c14e86e/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index dc2763d..e12f8be 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -46,7 +46,7 @@ namespace quickstep { namespace optimizer { DEFINE_bool(reorder_columns, true, - "Adjust the ordering of intermediate relations' attributes to improve " + "Adjust the ordering of intermediate relations' columns to improve " "copy performance."); DEFINE_bool(reorder_hash_joins, true, @@ -115,7 +115,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { rules.emplace_back(new SwapProbeBuild()); } if (FLAGS_reorder_columns) { - // NOTE(jianqiao): This optimization is based on the fact that the intermediate + // NOTE(jianqiao): This optimization relies on the fact that the intermediate // relations all have SPLIT_ROW_STORE layouts. If this fact gets changed, the // optimization algorithm may need to be updated and the performance impact // should be re-evaluated. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9c14e86e/relational_operators/HashJoinOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp index b72096c..64f73ea 100644 --- a/relational_operators/HashJoinOperator.cpp +++ b/relational_operators/HashJoinOperator.cpp @@ -547,7 +547,7 @@ void HashInnerJoinWorkOrder::executeWithoutCopyElision(ValueAccessor *probe_acce } void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accessor) { - VectorsOfPairsJoinedTuplesCollector collector; + PairsOfVectorsJoinedTuplesCollector collector; if (join_key_attributes_.size() == 1) { hash_table_.getAllFromValueAccessor( probe_accessor, @@ -596,14 +596,14 @@ void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso ++dest_attr; } - std::vector<tuple_id> build_tids; - std::vector<tuple_id> probe_tids; - for (std::pair<const block_id, VectorOfPairs> + for (std::pair<const block_id, PairOfVectors> &build_block_entry : *collector.getJoinedTuples()) { BlockReference build_block = storage_manager_->getBlock(build_block_entry.first, build_relation_); const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock(); std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor()); + const std::vector<tuple_id> &build_tids = build_block_entry.second.first; + const std::vector<tuple_id> &probe_tids = build_block_entry.second.second; // Evaluate '*residual_predicate_', if any. // @@ -616,17 +616,17 @@ void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso // hash join is below a reasonable threshold so that we don't blow up // temporary memory requirements to an unreasonable degree. if (residual_predicate_ != nullptr) { - VectorOfPairs filtered_matches; + PairOfVectors filtered_matches; - for (const std::pair<tuple_id, tuple_id> &hash_match - : build_block_entry.second) { + for (std::size_t i = 0; i < build_tids.size(); ++i) { if (residual_predicate_->matchesForJoinedTuples(*build_accessor, build_relation_id, - hash_match.first, + build_tids[i], *probe_accessor, probe_relation_id, - hash_match.second)) { - filtered_matches.emplace_back(hash_match); + probe_tids[i])) { + filtered_matches.first.emplace_back(build_tids[i]); + filtered_matches.second.emplace_back(probe_tids[i]); } } @@ -641,22 +641,25 @@ void HashInnerJoinWorkOrder::executeWithCopyElision(ValueAccessor *probe_accesso // We also need a temp value accessor to store results of any scalar expressions. ColumnVectorsValueAccessor temp_result; if (!non_trivial_expressions.empty()) { + // The getAllValuesForJoin function below needs joined tuple IDs as a + // vector of pair of (build-tuple-ID, probe-tuple-ID), and we have a pair + // of (build-tuple-IDs-vector, probe-tuple-IDs-vector). So we'll have to + // zip our two vectors together. + VectorOfPairs zipped_joined_tuple_ids; + zipped_joined_tuple_ids.reserve(build_tids.size()); + for (std::size_t i = 0; i < build_tids.size(); ++i) { + zipped_joined_tuple_ids.emplace_back(build_tids[i], probe_tids[i]); + } + for (const Scalar *scalar : non_trivial_expressions) { temp_result.addColumn(scalar->getAllValuesForJoin(build_relation_id, build_accessor.get(), probe_relation_id, probe_accessor, - build_block_entry.second)); + zipped_joined_tuple_ids)); } } - build_tids.clear(); - probe_tids.clear(); - for (const auto &pair : build_block_entry.second) { - build_tids.emplace_back(pair.first); - probe_tids.emplace_back(pair.second); - } - // We now create ordered value accessors for both build and probe side, // using the joined tuple TIDs. std::unique_ptr<ValueAccessor> ordered_build_accessor(