Multiple build attributes
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/7d868b0d Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/7d868b0d Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/7d868b0d Branch: refs/heads/adaptive-bloom-filters Commit: 7d868b0ddc9c59e535bade506439d9ab0b873c66 Parents: 91e4982 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Fri Jul 29 20:11:32 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Fri Jul 29 20:11:32 2016 -0500 ---------------------------------------------------------------------- query_optimizer/ExecutionHeuristics.cpp | 4 +- storage/HashTable.hpp | 66 +++++++++++++++++++--------- storage/HashTable.proto | 6 +-- storage/HashTableFactory.hpp | 10 ++++- 4 files changed, 60 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/query_optimizer/ExecutionHeuristics.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionHeuristics.cpp b/query_optimizer/ExecutionHeuristics.cpp index 26c4378..372f6f3 100644 --- a/query_optimizer/ExecutionHeuristics.cpp +++ b/query_optimizer/ExecutionHeuristics.cpp @@ -65,7 +65,9 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan, bloom_filter_config.builder), std::make_pair(bloom_filter_id, info.build_operator_index_)); - hash_table_proto->add_build_side_bloom_filter_id(bloom_filter_id); + auto *build_side_bloom_filter = hash_table_proto->add_build_side_bloom_filters(); + build_side_bloom_filter->set_bloom_filter_id(bloom_filter_id); + build_side_bloom_filter->set_attr_id(info.build_side_bloom_filter_ids_[i]); std::cout << "Build " << build_side_bf.attribute->toString() << " @" << bloom_filter_config.builder << "\n"; } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTable.hpp ---------------------------------------------------------------------- diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp index 04c2ca8..bb06ace 100644 --- a/storage/HashTable.hpp +++ b/storage/HashTable.hpp @@ -1020,8 +1020,12 @@ class HashTable : public HashTableBase<resizable, * * @param bloom_filter The pointer to the bloom filter. **/ - inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) { - build_bloom_filter_ = bloom_filter; + inline void addBuildSideBloomFilter(BloomFilter *bloom_filter) { + build_bloom_filters_.emplace_back(bloom_filter); + } + + inline void addBuildSideAttributeId(const attribute_id build_attribute_id) { + build_attribute_ids_.push_back(build_attribute_id); } /** @@ -1333,7 +1337,8 @@ class HashTable : public HashTableBase<resizable, // Data structures used for bloom filter optimized semi-joins. bool has_build_side_bloom_filter_ = false; bool has_probe_side_bloom_filter_ = false; - BloomFilter *build_bloom_filter_; + std::vector<BloomFilter *> build_bloom_filters_; + std::vector<attribute_id> build_attribute_ids_; std::vector<const BloomFilter*> probe_bloom_filters_; std::vector<attribute_id> probe_attribute_ids_; @@ -1481,11 +1486,26 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al &prealloc_state); } } - std::unique_ptr<BloomFilter> thread_local_bloom_filter; + if (has_build_side_bloom_filter_) { - thread_local_bloom_filter.reset(new BloomFilter(build_bloom_filter_->getNumberOfHashes(), - build_bloom_filter_->getBitArraySize())); + for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) { + auto *build_bloom_filter = build_bloom_filters_[i]; + std::unique_ptr<BloomFilter> thread_local_bloom_filter( + new BloomFilter(build_bloom_filter->getNumberOfHashes(), + build_bloom_filter->getBitArraySize())); + const auto &build_attr = build_attribute_ids_[i]; + const std::size_t attr_size = + accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second; + while (accessor->next()) { + thread_local_bloom_filter->insertUnSafe( + static_cast<const std::uint8_t *>(accessor->getUntypedValue(build_attr)), + attr_size); + } + build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get()); + accessor->beginIteration(); + } } + if (resizable) { while (result == HashTablePutResult::kOutOfSpace) { { @@ -1501,11 +1521,6 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al variable_size, (*functor)(*accessor), using_prealloc ? &prealloc_state : nullptr); - // Insert into bloom filter, if enabled. - if (has_build_side_bloom_filter_) { - thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()), - key.getDataSize()); - } if (result == HashTablePutResult::kDuplicateKey) { DEBUG_ASSERT(!using_prealloc); return result; @@ -1531,20 +1546,11 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al variable_size, (*functor)(*accessor), using_prealloc ? &prealloc_state : nullptr); - // Insert into bloom filter, if enabled. - if (has_build_side_bloom_filter_) { - thread_local_bloom_filter->insertUnSafe(static_cast<const std::uint8_t *>(key.getDataPtr()), - key.getDataSize()); - } if (result != HashTablePutResult::kOK) { return result; } } } - // Update the build side bloom filter with thread local copy, if available. - if (has_build_side_bloom_filter_) { - build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get()); - } return HashTablePutResult::kOK; }); @@ -1610,6 +1616,26 @@ HashTablePutResult HashTable<ValueT, resizable, serializable, force_key_copy, al &prealloc_state); } } + + if (has_build_side_bloom_filter_) { + for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) { + auto *build_bloom_filter = build_bloom_filters_[i]; + std::unique_ptr<BloomFilter> thread_local_bloom_filter( + new BloomFilter(build_bloom_filter->getNumberOfHashes(), + build_bloom_filter->getBitArraySize())); + const auto &build_attr = build_attribute_ids_[i]; + const std::size_t attr_size = + accessor->template getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second; + while (accessor->next()) { + thread_local_bloom_filter->insertUnSafe( + static_cast<const std::uint8_t *>(accessor->getUntypedValue(build_attr)), + attr_size); + } + build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get()); + accessor->beginIteration(); + } + } + if (resizable) { while (result == HashTablePutResult::kOutOfSpace) { { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTable.proto ---------------------------------------------------------------------- diff --git a/storage/HashTable.proto b/storage/HashTable.proto index 90bc9f7..6eabf60 100644 --- a/storage/HashTable.proto +++ b/storage/HashTable.proto @@ -34,10 +34,10 @@ message HashTable { required HashTableImplType hash_table_impl_type = 1; repeated Type key_types = 2; required uint64 estimated_num_entries = 3; - repeated uint32 build_side_bloom_filter_id = 4; - message ProbeSideBloomFilter { + message BloomFilter { required uint32 bloom_filter_id = 1; required uint32 attr_id = 2; } - repeated ProbeSideBloomFilter probe_side_bloom_filters = 6; + repeated BloomFilter probe_side_bloom_filters = 4; + repeated BloomFilter build_side_bloom_filters = 5; } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTableFactory.hpp ---------------------------------------------------------------------- diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp index df2962a..fbb3d41 100644 --- a/storage/HashTableFactory.hpp +++ b/storage/HashTableFactory.hpp @@ -318,9 +318,15 @@ class HashTableFactory { // individual implementations of the hash table constructors. // Check if there are any build side bloom filter defined on the hash table. - if (proto.build_side_bloom_filter_id_size() > 0) { + if (proto.build_side_bloom_filters_size() > 0) { hash_table->enableBuildSideBloomFilter(); - hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get()); + for (int j = 0; j < proto.build_side_bloom_filters_size(); ++j) { + const auto build_side_bloom_filter = proto.build_side_bloom_filters(j); + hash_table->addBuildSideBloomFilter( + bloom_filters[build_side_bloom_filter.bloom_filter_id()].get()); + + hash_table->addBuildSideAttributeId(build_side_bloom_filter.attr_id()); + } } // Check if there are any probe side bloom filters defined on the hash table.