Multiple build attributes

Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/7d868b0d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/7d868b0d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/7d868b0d

Branch: refs/heads/adaptive-bloom-filters
Commit: 7d868b0ddc9c59e535bade506439d9ab0b873c66
Parents: 91e4982
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Fri Jul 29 20:11:32 2016 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Fri Jul 29 20:11:32 2016 -0500

----------------------------------------------------------------------
 query_optimizer/ExecutionHeuristics.cpp |  4 +-
 storage/HashTable.hpp                   | 66 +++++++++++++++++++---------
 storage/HashTable.proto                 |  6 +--
 storage/HashTableFactory.hpp            | 10 ++++-
 4 files changed, 60 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp 
b/query_optimizer/ExecutionHeuristics.cpp
index 26c4378..372f6f3 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -65,7 +65,9 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan 
*query_plan,
                          bloom_filter_config.builder),
           std::make_pair(bloom_filter_id, info.build_operator_index_));
 
-      hash_table_proto->add_build_side_bloom_filter_id(bloom_filter_id);
+      auto *build_side_bloom_filter = 
hash_table_proto->add_build_side_bloom_filters();
+      build_side_bloom_filter->set_bloom_filter_id(bloom_filter_id);
+      
build_side_bloom_filter->set_attr_id(info.build_side_bloom_filter_ids_[i]);
       std::cout << "Build " << build_side_bf.attribute->toString()
                 << " @" << bloom_filter_config.builder << "\n";
     }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 04c2ca8..bb06ace 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -1020,8 +1020,12 @@ class HashTable : public HashTableBase<resizable,
    *
    * @param bloom_filter The pointer to the bloom filter.
    **/
-  inline void setBuildSideBloomFilter(BloomFilter *bloom_filter) {
-    build_bloom_filter_ = bloom_filter;
+  inline void addBuildSideBloomFilter(BloomFilter *bloom_filter) {
+    build_bloom_filters_.emplace_back(bloom_filter);
+  }
+
+  inline void addBuildSideAttributeId(const attribute_id build_attribute_id) {
+    build_attribute_ids_.push_back(build_attribute_id);
   }
 
   /**
@@ -1333,7 +1337,8 @@ class HashTable : public HashTableBase<resizable,
   // Data structures used for bloom filter optimized semi-joins.
   bool has_build_side_bloom_filter_ = false;
   bool has_probe_side_bloom_filter_ = false;
-  BloomFilter *build_bloom_filter_;
+  std::vector<BloomFilter *> build_bloom_filters_;
+  std::vector<attribute_id> build_attribute_ids_;
   std::vector<const BloomFilter*> probe_bloom_filters_;
   std::vector<attribute_id> probe_attribute_ids_;
 
@@ -1481,11 +1486,26 @@ HashTablePutResult HashTable<ValueT, resizable, 
serializable, force_key_copy, al
                                                         &prealloc_state);
       }
     }
-    std::unique_ptr<BloomFilter> thread_local_bloom_filter;
+
     if (has_build_side_bloom_filter_) {
-      thread_local_bloom_filter.reset(new 
BloomFilter(build_bloom_filter_->getNumberOfHashes(),
-                                                      
build_bloom_filter_->getBitArraySize()));
+      for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) {
+        auto *build_bloom_filter = build_bloom_filters_[i];
+        std::unique_ptr<BloomFilter> thread_local_bloom_filter(
+            new BloomFilter(build_bloom_filter->getNumberOfHashes(),
+                            build_bloom_filter->getBitArraySize()));
+        const auto &build_attr = build_attribute_ids_[i];
+        const std::size_t attr_size =
+            accessor->template 
getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second;
+        while (accessor->next()) {
+          thread_local_bloom_filter->insertUnSafe(
+              static_cast<const std::uint8_t 
*>(accessor->getUntypedValue(build_attr)),
+              attr_size);
+        }
+        build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get());
+        accessor->beginIteration();
+      }
     }
+
     if (resizable) {
       while (result == HashTablePutResult::kOutOfSpace) {
         {
@@ -1501,11 +1521,6 @@ HashTablePutResult HashTable<ValueT, resizable, 
serializable, force_key_copy, al
                                        variable_size,
                                        (*functor)(*accessor),
                                        using_prealloc ? &prealloc_state : 
nullptr);
-            // Insert into bloom filter, if enabled.
-            if (has_build_side_bloom_filter_) {
-              thread_local_bloom_filter->insertUnSafe(static_cast<const 
std::uint8_t *>(key.getDataPtr()),
-                                                      key.getDataSize());
-            }
             if (result == HashTablePutResult::kDuplicateKey) {
               DEBUG_ASSERT(!using_prealloc);
               return result;
@@ -1531,20 +1546,11 @@ HashTablePutResult HashTable<ValueT, resizable, 
serializable, force_key_copy, al
                                    variable_size,
                                    (*functor)(*accessor),
                                    using_prealloc ? &prealloc_state : nullptr);
-        // Insert into bloom filter, if enabled.
-        if (has_build_side_bloom_filter_) {
-          thread_local_bloom_filter->insertUnSafe(static_cast<const 
std::uint8_t *>(key.getDataPtr()),
-                                                  key.getDataSize());
-        }
         if (result != HashTablePutResult::kOK) {
           return result;
         }
       }
     }
-    // Update the build side bloom filter with thread local copy, if available.
-    if (has_build_side_bloom_filter_) {
-      build_bloom_filter_->bitwiseOr(thread_local_bloom_filter.get());
-    }
 
     return HashTablePutResult::kOK;
   });
@@ -1610,6 +1616,26 @@ HashTablePutResult HashTable<ValueT, resizable, 
serializable, force_key_copy, al
                                                         &prealloc_state);
       }
     }
+
+    if (has_build_side_bloom_filter_) {
+      for (std::size_t i = 0; i < build_bloom_filters_.size(); ++i) {
+        auto *build_bloom_filter = build_bloom_filters_[i];
+        std::unique_ptr<BloomFilter> thread_local_bloom_filter(
+            new BloomFilter(build_bloom_filter->getNumberOfHashes(),
+                            build_bloom_filter->getBitArraySize()));
+        const auto &build_attr = build_attribute_ids_[i];
+        const std::size_t attr_size =
+            accessor->template 
getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, build_attr).second;
+        while (accessor->next()) {
+          thread_local_bloom_filter->insertUnSafe(
+              static_cast<const std::uint8_t 
*>(accessor->getUntypedValue(build_attr)),
+              attr_size);
+        }
+        build_bloom_filter->bitwiseOr(thread_local_bloom_filter.get());
+        accessor->beginIteration();
+      }
+    }
+
     if (resizable) {
       while (result == HashTablePutResult::kOutOfSpace) {
         {

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTable.proto
----------------------------------------------------------------------
diff --git a/storage/HashTable.proto b/storage/HashTable.proto
index 90bc9f7..6eabf60 100644
--- a/storage/HashTable.proto
+++ b/storage/HashTable.proto
@@ -34,10 +34,10 @@ message HashTable {
   required HashTableImplType hash_table_impl_type = 1;
   repeated Type key_types = 2;
   required uint64 estimated_num_entries = 3;
-  repeated uint32 build_side_bloom_filter_id = 4;
-  message ProbeSideBloomFilter {
+  message BloomFilter {
     required uint32 bloom_filter_id = 1;
     required uint32 attr_id = 2;
   }
-  repeated ProbeSideBloomFilter probe_side_bloom_filters = 6;
+  repeated BloomFilter probe_side_bloom_filters = 4;
+  repeated BloomFilter build_side_bloom_filters = 5;  
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/7d868b0d/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index df2962a..fbb3d41 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -318,9 +318,15 @@ class HashTableFactory {
     //                 individual implementations of the hash table 
constructors.
 
     // Check if there are any build side bloom filter defined on the hash 
table.
-    if (proto.build_side_bloom_filter_id_size() > 0) {
+    if (proto.build_side_bloom_filters_size() > 0) {
       hash_table->enableBuildSideBloomFilter();
-      
hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get());
+      for (int j = 0; j < proto.build_side_bloom_filters_size(); ++j) {
+        const auto build_side_bloom_filter = proto.build_side_bloom_filters(j);
+        hash_table->addBuildSideBloomFilter(
+            bloom_filters[build_side_bloom_filter.bloom_filter_id()].get());
+
+        hash_table->addBuildSideAttributeId(build_side_bloom_filter.attr_id());
+      }
     }
 
     // Check if there are any probe side bloom filters defined on the hash 
table.

Reply via email to