Repository: incubator-quickstep
Updated Branches:
  refs/heads/fix-vs-adaptive-bloom-filters [created] a0bca3501


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/a0bca350
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/a0bca350
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/a0bca350

Branch: refs/heads/fix-vs-adaptive-bloom-filters
Commit: a0bca3501977264590ee5d1a529400d8572b873b
Parents: bb90613
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Fri Jul 22 00:05:06 2016 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Fri Jul 22 00:05:06 2016 -0500

----------------------------------------------------------------------
 CMakeLists.txt                            |   1 +
 cli/QuickstepCli.cpp                      |   9 ++
 query_optimizer/ExecutionHeuristics.cpp   |  18 ++-
 relational_operators/HashJoinOperator.cpp |   2 +
 storage/HashTable.hpp                     |   1 +
 utility/BloomFilterAdapter.hpp            |  18 ++-
 utility/CMakeLists.txt                    |   4 +
 utility/EventProfiler.cpp                 |  29 ++++
 utility/EventProfiler.hpp                 | 188 +++++++++++++++++++++++++
 9 files changed, 259 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20e1fb9..b4728a1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -760,6 +760,7 @@ target_link_libraries(quickstep_cli_shell
                       quickstep_queryoptimizer_QueryProcessor
                       quickstep_storage_PreloaderThread
                       quickstep_threading_ThreadIDBasedMap
+                      quickstep_utility_EventProfiler
                       quickstep_utility_Macros
                       quickstep_utility_PtrVector
                       quickstep_utility_SqlError

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index 02a55a0..4f88d74 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -75,6 +75,7 @@ typedef quickstep::LineReaderDumb LineReaderImpl;
 
 #include "storage/PreloaderThread.hpp"
 #include "threading/ThreadIDBasedMap.hpp"
+#include "utility/EventProfiler.hpp"
 #include "utility/Macros.hpp"
 #include "utility/PtrVector.hpp"
 #include "utility/SqlError.hpp"
@@ -185,6 +186,8 @@ DEFINE_string(profile_file_name, "",
               // To put things in perspective, the first run is, in my 
experiments, about 5-10
               // times more expensive than the average run. That means the 
query needs to be
               // run at least a hundred times to make the impact of the first 
run small (< 5 %).
+DEFINE_string(profile_output, "",
+              "Output file name for writing the profiled events.");
 
 }  // namespace quickstep
 
@@ -433,6 +436,7 @@ int main(int argc, char* argv[]) {
         }
 
         DCHECK(query_handle->getQueryPlanMutable() != nullptr);
+        quickstep::simple_profiler.clear();
         start = std::chrono::steady_clock::now();
         QueryExecutionUtil::ConstructAndSendAdmitRequestMessage(
             main_thread_client_id,
@@ -470,6 +474,11 @@ int main(int argc, char* argv[]) {
             foreman.printWorkOrderProfilingResults(query_handle->query_id(),
                                                    stdout);
           }
+          if (!quickstep::FLAGS_profile_output.empty()) {
+            std::ofstream ofs(quickstep::FLAGS_profile_output, std::ios::out);
+            quickstep::simple_profiler.writeToStream(ofs);
+            ofs.close();
+          }
         } catch (const std::exception &e) {
           fprintf(stderr, "QUERY EXECUTION ERROR: %s\n", e.what());
           break;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp 
b/query_optimizer/ExecutionHeuristics.cpp
index 2833066..ed49881 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -17,6 +17,7 @@
 
 #include "query_optimizer/ExecutionHeuristics.hpp"
 
+#include <algorithm>
 #include <cstddef>
 #include <utility>
 #include <unordered_map>
@@ -39,6 +40,9 @@ DEFINE_int32(bloom_num_bits_per_tuple, kNumBitsPerByte,
 DEFINE_int32(bloom_num_hash_fns, 3,
     "Number of hash functions used in the Bloom filter.");
 
+DEFINE_bool(reverse_bloom_filter_order, false,
+    "Reverse the initial ordering of bloom filters.");
+
 void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
                                                 serialization::QueryContext 
*query_context_proto) {
   // Currently this only optimizes left deep joins using bloom filters.
@@ -72,6 +76,7 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan 
*query_plan,
     // Only chains of length greater than one are suitable candidates for 
semi-join optimization.
     if (chained_nodes.size() > 1) {
       std::unordered_map<QueryContext::bloom_filter_id, 
std::vector<attribute_id>> probe_bloom_filter_info;
+      std::vector<QueryContext::bloom_filter_id> bloom_filter_ids;
       for (const std::size_t node : chained_nodes) {
         if (!hash_joins_[node].is_selection_) {
           continue;
@@ -89,16 +94,21 @@ void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan 
*query_plan,
             ->add_build_side_bloom_filter_id(bloom_filter_id);
 
         probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, 
hash_joins_[node].probe_attributes_));
+        bloom_filter_ids.emplace_back(bloom_filter_id);
       }
 
       // Add probe-side bloom filter information to the corresponding hash 
table proto for each build-side bloom filter.
-      for (const std::pair<QueryContext::bloom_filter_id, 
std::vector<attribute_id>>
-               &bloom_filter_info : probe_bloom_filter_info) {
+      std::sort(bloom_filter_ids.begin(), bloom_filter_ids.end());
+      if (FLAGS_reverse_bloom_filter_order) {
+        std::reverse(bloom_filter_ids.begin(), bloom_filter_ids.end());
+      }
+      for (const auto bf_id : bloom_filter_ids) {
+        const auto &bloom_filter_info = probe_bloom_filter_info[bf_id];
         auto *probe_side_bloom_filter =
             
query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
                                   ->add_probe_side_bloom_filters();
-        
probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
-        for (const attribute_id &probe_attribute_id : 
bloom_filter_info.second) {
+        probe_side_bloom_filter->set_probe_side_bloom_filter_id(bf_id);
+        for (const attribute_id &probe_attribute_id : bloom_filter_info) {
           probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
         }
       }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/relational_operators/HashJoinOperator.cpp
----------------------------------------------------------------------
diff --git a/relational_operators/HashJoinOperator.cpp 
b/relational_operators/HashJoinOperator.cpp
index 5a36d65..adf39bb 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -61,6 +61,8 @@ namespace quickstep {
 
 DEFINE_int64(bloom_adapter_batch_size, 4000,
              "Number of tuples to probe in bulk in Bloom filter adapter.");
+DEFINE_bool(adapt_bloom_filter, true,
+             "Whether to adaptively adjust the ordering of bloom filters.");
 
 namespace {
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index a74d71c..f1aeda4 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -47,6 +47,7 @@
 namespace quickstep {
 
 DECLARE_int64(bloom_adapter_batch_size);
+DECLARE_bool(adapt_bloom_filters);
 
 /** \addtogroup Storage
  *  @{

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/utility/BloomFilterAdapter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp
index f1916dc..9ed4065 100644
--- a/utility/BloomFilterAdapter.hpp
+++ b/utility/BloomFilterAdapter.hpp
@@ -59,13 +59,17 @@ class BloomFilterAdapter {
   }
 
   template <typename ValueAccessorT, bool adapt_filters = true>
-  std::size_t bulkProbe(
+  inline std::size_t bulkProbe(
       const ValueAccessorT *accessor,
       std::vector<tuple_id> &batch) {
     std::size_t out_size = batch.size();
-    for (auto &entry : bloom_filter_entries_)
-      out_size = bulkProbeBloomFilterEntry(*entry, accessor, batch, out_size);
-    adaptEntryOrder();
+    for (auto &entry : bloom_filter_entries_) {
+      out_size = bulkProbeBloomFilterEntry<ValueAccessorT, adapt_filters>(
+          *entry, accessor, batch, out_size);
+    }
+    if (adapt_filters) {
+      adaptEntryOrder();
+    }
     return out_size;
   }
 
@@ -81,7 +85,7 @@ class BloomFilterAdapter {
           cnt(0) {
     }
 
-    static bool isBetterThan(const BloomFilterEntry *a,
+    static inline bool isBetterThan(const BloomFilterEntry *a,
                              const BloomFilterEntry *b) {
       return a->miss_rate > b->miss_rate;
       // return static_cast<std::uint64_t>(a.miss) * b.cnt
@@ -97,7 +101,7 @@ class BloomFilterAdapter {
   };
 
   template <typename ValueAccessorT, bool adapt_filters = true>
-  std::size_t bulkProbeBloomFilterEntry(
+  inline std::size_t bulkProbeBloomFilterEntry(
       BloomFilterEntry &entry,
       const ValueAccessorT *accessor,
       std::vector<tuple_id> &batch,
@@ -125,7 +129,7 @@ class BloomFilterAdapter {
     return out_size;
   }
 
-  void adaptEntryOrder() {
+  inline void adaptEntryOrder() {
     for (auto &entry : bloom_filter_entries_)
       entry->miss_rate = static_cast<float>(entry->miss) / entry->cnt;
     std::sort(bloom_filter_entries_.begin(),

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 378c4c9..61b1866 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -168,6 +168,7 @@ add_library(quickstep_utility_Cast ../empty_src.cpp 
Cast.hpp)
 add_library(quickstep_utility_CheckSnprintf ../empty_src.cpp CheckSnprintf.hpp)
 add_library(quickstep_utility_DAG ../empty_src.cpp DAG.hpp)
 add_library(quickstep_utility_EqualsAnyConstant ../empty_src.cpp 
EqualsAnyConstant.hpp)
+add_library(quickstep_utility_EventProfiler EventProfiler.cpp 
EventProfiler.hpp)
 add_library(quickstep_utility_Glob Glob.cpp Glob.hpp)
 add_library(quickstep_utility_HashPair ../empty_src.cpp HashPair.hpp)
 add_library(quickstep_utility_Macros ../empty_src.cpp Macros.hpp)
@@ -230,6 +231,8 @@ target_link_libraries(quickstep_utility_CheckSnprintf
 target_link_libraries(quickstep_utility_DAG
                       glog
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_utility_EventProfiler
+                      quickstep_threading_Mutex)
 target_link_libraries(quickstep_utility_Glob
                       glog)
 target_link_libraries(quickstep_utility_MemStream
@@ -309,6 +312,7 @@ target_link_libraries(quickstep_utility
                       quickstep_utility_CheckSnprintf
                       quickstep_utility_DAG
                       quickstep_utility_EqualsAnyConstant
+                      quickstep_utility_EventProfiler
                       quickstep_utility_Glob
                       quickstep_utility_HashPair
                       quickstep_utility_Macros

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/utility/EventProfiler.cpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.cpp b/utility/EventProfiler.cpp
new file mode 100644
index 0000000..728ebff
--- /dev/null
+++ b/utility/EventProfiler.cpp
@@ -0,0 +1,29 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#include "utility/EventProfiler.hpp"
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace quickstep {
+
+EventProfiler<int, std::size_t> simple_profiler;
+EventProfiler<std::size_t> relop_profiler;
+
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/a0bca350/utility/EventProfiler.hpp
----------------------------------------------------------------------
diff --git a/utility/EventProfiler.hpp b/utility/EventProfiler.hpp
new file mode 100644
index 0000000..70024e6
--- /dev/null
+++ b/utility/EventProfiler.hpp
@@ -0,0 +1,188 @@
+/**
+ *   Copyright 2016, Quickstep Research Group, Computer Sciences Department,
+ *     University of Wisconsin—Madison.
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ **/
+
+#ifndef QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_
+#define QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_
+
+#include <chrono>
+#include <cstddef>
+#include <cstring>
+#include <ctime>
+#include <iomanip>
+#include <map>
+#include <ostream>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "threading/Mutex.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+
+/** \addtogroup Utility
+ *  @{
+ */
+
+using clock = std::chrono::steady_clock;
+
+template <typename TagT, typename ...PayloadT>
+class EventProfiler {
+
+ public:
+  EventProfiler()
+      : zero_time_(clock::now()) {
+  }
+
+  struct EventInfo {
+    clock::time_point start_time;
+    clock::time_point end_time;
+    bool is_finished;
+    std::tuple<PayloadT...> payload;
+
+    explicit EventInfo(const clock::time_point &start_time_in)
+        : start_time(start_time_in),
+          is_finished(false) {
+    }
+
+    EventInfo()
+        : start_time(clock::now()),
+          is_finished(false) {
+    }
+
+    inline void setPayload(PayloadT &&...in_payload) {
+      payload = std::make_tuple(in_payload...);
+    }
+
+    inline void endEvent() {
+      end_time = clock::now();
+      is_finished = true;
+    }
+  };
+
+  struct EventContainer {
+    EventContainer()
+        : context(0) {}
+
+    inline void startEvent(const TagT &tag) {
+      events[tag].emplace_back(clock::now());
+    }
+
+    inline void endEvent(const TagT &tag) {
+      auto &event_info = events.at(tag).back();
+      event_info.is_finished = true;
+      event_info.end_time = clock::now();
+    }
+
+    inline std::vector<EventInfo> *getEventLine(const TagT &tag) {
+      return &events[tag];
+    }
+
+    inline void setContext(int context_in) {
+      context = context_in;
+    }
+
+    inline int getContext() const {
+      return context;
+    }
+
+    std::map<TagT, std::vector<EventInfo>> events;
+    int context;
+  };
+
+  EventContainer *getContainer() {
+    MutexLock lock(mutex_);
+    return &thread_map_[std::this_thread::get_id()];
+  }
+
+  void writeToStream(std::ostream &os) const {
+    time_t rawtime;
+    time(&rawtime);
+    char event_id[32];
+    strftime(event_id, sizeof event_id, "%Y-%m-%d %H:%M:%S", 
localtime(&rawtime));
+
+    int thread_id = 0;
+    for (const auto &thread_ctx : thread_map_) {
+      for (const auto &event_group : thread_ctx.second.events) {
+        for (const auto &event_info : event_group.second) {
+          CHECK(event_info.is_finished) << "Unfinished profiling event";
+
+          os << std::setprecision(12)
+             << event_id << ","
+             << thread_id << "," << event_group.first << ",";
+
+          PrintTuple(os, event_info.payload, ",");
+
+          os << std::chrono::duration<double>(event_info.start_time - 
zero_time_).count()
+             << ","
+             << std::chrono::duration<double>(event_info.end_time - 
zero_time_).count()
+             << "\n";
+        }
+      }
+      ++thread_id;
+    }
+  }
+
+  void clear() {
+    zero_time_ = clock::now();
+    thread_map_.clear();
+  }
+
+  const std::map<std::thread::id, EventContainer> &containers() {
+    return thread_map_;
+  }
+
+  const clock::time_point &zero_time() {
+    return zero_time_;
+  }
+
+ private:
+  template<class Tuple, std::size_t N>
+  struct TuplePrinter {
+    static void Print(std::ostream &os, const Tuple &t, const std::string 
&sep) {
+      TuplePrinter<Tuple, N-1>::Print(os, t, sep);
+      os << std::get<N-1>(t) << sep;
+    }
+  };
+
+  template<class Tuple>
+  struct TuplePrinter<Tuple, 1> {
+    static void Print(std::ostream &os, const Tuple &t, const std::string 
&sep) {
+      os << std::get<0>(t) << sep;
+    }
+  };
+
+  template<class... Args>
+  static void PrintTuple(std::ostream &os, const std::tuple<Args...>& t, const 
std::string &sep) {
+    TuplePrinter<decltype(t), sizeof...(Args)>::Print(os, t, sep);
+  }
+
+  clock::time_point zero_time_;
+  std::map<std::thread::id, EventContainer> thread_map_;
+  Mutex mutex_;
+};
+
+extern EventProfiler<int, std::size_t> simple_profiler;
+extern EventProfiler<std::size_t> relop_profiler;
+
+/** @} */
+
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_UTILITY_EVENT_PROFILER_HPP_

Reply via email to