This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 1c60956335675ea0b6ba87a5548cce37634e8306
Author: Bankim Bhavsar <[email protected]>
AuthorDate: Fri Mar 13 23:04:22 2020 -0700

    [util] Minor changes in BlockBloomFilter requested by Impala
    
    1) Do not include glog/logging.h and gflags/gflags.h in public header file
    for utilities like block_bloom_filter.h and hash_util.h as it causes 
problems
    with code-gen/IR in Impala.
    
    2) Public OrEqualArray() static function in BlockBloomFilter to avoid
    a copy.
    This change forced using static function pointers that are initialized
    only once.
    
    Change-Id: I5944f80f4c071ce787eded3f5b41d3bc56560cd0
    Reviewed-on: http://gerrit.cloudera.org:8080/15450
    Tested-by: Adar Dembo <[email protected]>
    Reviewed-by: Adar Dembo <[email protected]>
---
 src/kudu/util/block_bloom_filter-test.cc | 11 ++++++
 src/kudu/util/block_bloom_filter.cc      | 59 +++++++++++++++++++++++++-------
 src/kudu/util/block_bloom_filter.h       | 47 +++++++++++++++++--------
 src/kudu/util/block_bloom_filter_avx2.cc |  5 ++-
 src/kudu/util/hash_util.h                |  8 +++--
 5 files changed, 100 insertions(+), 30 deletions(-)

diff --git a/src/kudu/util/block_bloom_filter-test.cc 
b/src/kudu/util/block_bloom_filter-test.cc
index 8e12099..fa9b989 100644
--- a/src/kudu/util/block_bloom_filter-test.cc
+++ b/src/kudu/util/block_bloom_filter-test.cc
@@ -20,6 +20,7 @@
 #include <cmath> // IWYU pragma: keep
 #include <cstdint>
 #include <cstdlib>
+#include <cstring>
 #include <iosfwd>
 #include <memory>
 #include <unordered_set>
@@ -60,6 +61,7 @@ class BlockBloomFilterTest : public KuduTest {
 
   BlockBloomFilter* CreateBloomFilter(size_t log_space_bytes) {
     FLAGS_disable_blockbloomfilter_avx2 = (MakeRand() & 0x1) == 0;
+    BlockBloomFilter::InitializeFunctionPtrs();
 
     unique_ptr<BlockBloomFilter> bf(new BlockBloomFilter(allocator_));
     CHECK_OK(bf->Init(log_space_bytes, FAST_HASH, 0));
@@ -318,5 +320,14 @@ TEST_F(BlockBloomFilterTest, Or) {
   Status s = bf4->Or(*bf5);
   ASSERT_TRUE(s.IsInvalidArgument());
   ASSERT_STR_CONTAINS(s.ToString(), "Directory size don't match");
+
+  // Test the public OrEqualArray() function.
+  static constexpr size_t kNumBytes = 64;
+  unique_ptr<uint8_t[]> a_ptr(new uint8_t[kNumBytes]);
+  unique_ptr<uint8_t[]> b_ptr(new uint8_t[kNumBytes]);
+  memset(a_ptr.get(), 0xDE, kNumBytes);
+  memset(b_ptr.get(), 0, kNumBytes);
+  ASSERT_OK(BlockBloomFilter::OrEqualArray(kNumBytes, a_ptr.get(), 
b_ptr.get()));
+  ASSERT_EQ(0, memcmp(a_ptr.get(), b_ptr.get(), kNumBytes));
 }
 }  // namespace kudu
diff --git a/src/kudu/util/block_bloom_filter.cc 
b/src/kudu/util/block_bloom_filter.cc
index 40a3d64..bb48aa0 100644
--- a/src/kudu/util/block_bloom_filter.cc
+++ b/src/kudu/util/block_bloom_filter.cc
@@ -24,10 +24,12 @@
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
+#include <mutex>
 #include <string>
 
 #include <gflags/gflags.h>
 
+#include "kudu/gutil/cpu.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/block_bloom_filter.pb.h"
 #include "kudu/util/flag_tags.h"
@@ -43,22 +45,24 @@ DEFINE_bool(disable_blockbloomfilter_avx2, false,
             "that doesn't support AVX2.");
 TAG_FLAG(disable_blockbloomfilter_avx2, hidden);
 
+// Flag used to initialize the static function pointers for the 
BlockBloomFilter class.
+static std::once_flag g_init_func_ptrs_flag;
+
 namespace kudu {
 
+// Initialize the static member variables from BlockBloomFilter class.
 constexpr uint32_t BlockBloomFilter::kRehash[8] __attribute__((aligned(32)));
 const base::CPU BlockBloomFilter::kCpu = base::CPU();
 // constexpr data member requires initialization in the class declaration.
 // Hence no duplicate initialization in the definition here.
 constexpr BlockBloomFilter* const BlockBloomFilter::kAlwaysTrueFilter;
 
-BlockBloomFilter::BlockBloomFilter(BlockBloomFilterBufferAllocatorIf* 
buffer_allocator) :
-  always_false_(true),
-  buffer_allocator_(buffer_allocator),
-  log_num_buckets_(0),
-  directory_mask_(0),
-  directory_(nullptr),
-  hash_algorithm_(UNKNOWN_HASH),
-  hash_seed_(0) {
+decltype(&BlockBloomFilter::BucketInsert) 
BlockBloomFilter::bucket_insert_func_ptr_ = nullptr;
+decltype(&BlockBloomFilter::BucketFind) 
BlockBloomFilter::bucket_find_func_ptr_ = nullptr;
+decltype(&BlockBloomFilter::OrEqualArrayNoAVX2) 
BlockBloomFilter::or_equal_array_func_ptr_ =
+    nullptr;
+
+void BlockBloomFilter::InitializeFunctionPtrs() {
 #ifdef USE_AVX2
   if (has_avx2()) {
     bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsertAVX2;
@@ -67,15 +71,30 @@ 
BlockBloomFilter::BlockBloomFilter(BlockBloomFilterBufferAllocatorIf* buffer_all
   } else {
     bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsert;
     bucket_find_func_ptr_ = &BlockBloomFilter::BucketFind;
-    or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArray;
+    or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArrayNoAVX2;
   }
 #else
   bucket_insert_func_ptr_ = &BlockBloomFilter::BucketInsert;
   bucket_find_func_ptr_ = &BlockBloomFilter::BucketFind;
-  or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArray;
+  or_equal_array_func_ptr_ = &BlockBloomFilter::OrEqualArrayNoAVX2;
 #endif
 }
 
+BlockBloomFilter::BlockBloomFilter(BlockBloomFilterBufferAllocatorIf* 
buffer_allocator) :
+  always_false_(true),
+  buffer_allocator_(buffer_allocator),
+  log_num_buckets_(0),
+  directory_mask_(0),
+  directory_(nullptr),
+  hash_algorithm_(UNKNOWN_HASH),
+  hash_seed_(0) {
+  std::call_once(g_init_func_ptrs_flag, InitializeFunctionPtrs);
+
+  DCHECK_NOTNULL(bucket_insert_func_ptr_);
+  DCHECK_NOTNULL(bucket_find_func_ptr_);
+  DCHECK_NOTNULL(or_equal_array_func_ptr_);
+}
+
 BlockBloomFilter::~BlockBloomFilter() {
   Close();
 }
@@ -273,8 +292,20 @@ bool BlockBloomFilter::operator!=(const BlockBloomFilter& 
rhs) const {
   return !(rhs == *this);
 }
 
-void BlockBloomFilter::OrEqualArray(size_t n, const uint8_t* __restrict__ in,
-                                    uint8_t* __restrict__ out) {
+Status BlockBloomFilter::OrEqualArray(size_t n, const uint8_t* __restrict__ in,
+                                      uint8_t* __restrict__ out) {
+  if ((n % kBucketByteSize) != 0) {
+    return Status::InvalidArgument(Substitute("Input size $0 not a multiple of 
32-bytes", n));
+  }
+
+  std::call_once(g_init_func_ptrs_flag, InitializeFunctionPtrs);
+  DCHECK_NOTNULL(or_equal_array_func_ptr_);
+  (*or_equal_array_func_ptr_)(n, in, out);
+  return Status::OK();
+}
+
+void BlockBloomFilter::OrEqualArrayNoAVX2(size_t n, const uint8_t* 
__restrict__ in,
+                                          uint8_t* __restrict__ out) {
   // The trivial loop out[i] |= in[i] should auto-vectorize with gcc at -O3, 
but it is not
   // written in a way that is very friendly to auto-vectorization. Instead, we 
manually
   // vectorize, increasing the speed by up to 56x.
@@ -323,6 +354,10 @@ Status BlockBloomFilter::Or(const BlockBloomFilter& other) 
{
   return Status::OK();
 }
 
+bool BlockBloomFilter::has_avx2() {
+  return !FLAGS_disable_blockbloomfilter_avx2 && kCpu.has_avx2();
+}
+
 shared_ptr<DefaultBlockBloomFilterBufferAllocator>
     DefaultBlockBloomFilterBufferAllocator::GetSingletonSharedPtr() {
   // Meyer's Singleton.
diff --git a/src/kudu/util/block_bloom_filter.h 
b/src/kudu/util/block_bloom_filter.h
index 6a9a991..35b2166 100644
--- a/src/kudu/util/block_bloom_filter.h
+++ b/src/kudu/util/block_bloom_filter.h
@@ -23,10 +23,9 @@
 #include <limits>
 #include <memory>
 
-#include <gflags/gflags_declare.h>
-#include <glog/logging.h>
+// Including glog/logging.h causes problems while compiling in Apache Impala 
for codegen.
+// IWYU pragma: no_include <glog/logging.h>
 
-#include "kudu/gutil/cpu.h"
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/port.h"
 #include "kudu/util/hash.pb.h"
@@ -35,13 +34,15 @@
 #include "kudu/util/slice.h"
 #include "kudu/util/status.h"
 
+namespace base {
+class CPU;
+}  // namespace base
+
 namespace kudu {
 class Arena;
 class BlockBloomFilterPB;
 }  // namespace kudu
 
-DECLARE_bool(disable_blockbloomfilter_avx2);
-
 namespace kudu {
 
 // Forward declaration.
@@ -159,6 +160,11 @@ class BlockBloomFilter {
   // - Or'ing with kAlwaysTrueFilter is disallowed.
   Status Or(const BlockBloomFilter& other);
 
+  // Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n' 
bytes where 'n'
+  // is multiple of 32-bytes.
+  static Status OrEqualArray(size_t n, const uint8_t* __restrict__ in,
+                             uint8_t* __restrict__ out);
+
   // Returns whether the Bloom filter is empty and hence would return false 
for all lookups.
   bool always_false() const {
     return always_false_;
@@ -195,6 +201,8 @@ class BlockBloomFilter {
 
   // log2(number of bytes in a bucket)
   static constexpr int kLogBucketByteSize = 5;
+  // Bucket size in bytes.
+  static constexpr size_t kBucketByteSize = 1UL << kLogBucketByteSize;
 
   static_assert((1 << kLogBucketWordBits) == 
std::numeric_limits<BucketWord>::digits,
       "BucketWord must have a bit-width that is be a power of 2, like 64 for 
uint64_t.");
@@ -229,8 +237,10 @@ class BlockBloomFilter {
 
   bool BucketFind(uint32_t bucket_idx, uint32_t hash) const noexcept;
 
-  // Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n'.
-  static void OrEqualArray(size_t n, const uint8_t* __restrict__ in, uint8_t* 
__restrict__ out);
+  // Computes out[i] |= in[i] for the arrays 'in' and 'out' of length 'n' 
without using AVX2
+  // operations.
+  static void OrEqualArrayNoAVX2(size_t n, const uint8_t* __restrict__ in,
+                                 uint8_t* __restrict__ out);
 
 #ifdef USE_AVX2
   // Same as Insert(), but skips the CPU check and assumes that AVX2 is 
available.
@@ -250,11 +260,16 @@ class BlockBloomFilter {
                                uint8_t* __restrict__ out) 
__attribute__((target("avx2")));
 #endif
 
-  // Function pointers initialized in constructor to avoid run-time cost
-  // in hot-path of Find and Insert operations.
-  decltype(&BlockBloomFilter::BucketInsert) bucket_insert_func_ptr_;
-  decltype(&BlockBloomFilter::BucketFind) bucket_find_func_ptr_;
-  decltype(&BlockBloomFilter::OrEqualArray) or_equal_array_func_ptr_;
+  // Function pointers initialized just once to avoid run-time cost
+  // in hot-path of Find, Insert and Or operations.
+  static decltype(&BlockBloomFilter::BucketInsert) bucket_insert_func_ptr_;
+  static decltype(&BlockBloomFilter::BucketFind) bucket_find_func_ptr_;
+  static decltype(&BlockBloomFilter::OrEqualArrayNoAVX2) 
or_equal_array_func_ptr_;
+
+  // Helper function to initialize the function pointers above based on whether
+  // compiler and/or CPU at run-time supports AVX2.
+  // It also helps testing both AVX2 and non-AVX2 code paths.
+  static void InitializeFunctionPtrs();
 
   // Size of the internal directory structure in bytes.
   int64_t directory_size() const {
@@ -262,9 +277,7 @@ class BlockBloomFilter {
   }
 
   // Detect at run-time whether CPU supports AVX2
-  static bool has_avx2() {
-    return !FLAGS_disable_blockbloomfilter_avx2 && kCpu.has_avx2();
-  }
+  static bool has_avx2();
 
   // Some constants used in hashing. #defined for efficiency reasons.
 #define BLOOM_HASH_CONSTANTS                                             \
@@ -292,6 +305,10 @@ class BlockBloomFilter {
   }
 
   DISALLOW_COPY_AND_ASSIGN(BlockBloomFilter);
+
+  // Allow BlockBloomFilterTest unit test to invoke the private 
InitializeFunctionPtrs()
+  // function to test both AVX2 and non-AVX2 code paths.
+  friend class BlockBloomFilterTest;
 };
 
 // Generic interface to allocate and de-allocate memory for the 
BlockBloomFilter.
diff --git a/src/kudu/util/block_bloom_filter_avx2.cc 
b/src/kudu/util/block_bloom_filter_avx2.cc
index 93c3ff6..551c75d 100644
--- a/src/kudu/util/block_bloom_filter_avx2.cc
+++ b/src/kudu/util/block_bloom_filter_avx2.cc
@@ -82,8 +82,11 @@ void BlockBloomFilter::InsertAvx2(const uint32_t hash) 
noexcept {
 
 void BlockBloomFilter::OrEqualArrayAVX2(size_t n, const uint8_t* __restrict__ 
in,
                                         uint8_t* __restrict__ out) {
-  constexpr size_t kAVXRegisterBytes = sizeof(__m256d);
+  static constexpr size_t kAVXRegisterBytes = sizeof(__m256d);
+  static_assert(kAVXRegisterBytes == kBucketByteSize,
+      "Unexpected AVX register bytes");
   DCHECK_EQ(n % kAVXRegisterBytes, 0) << "Invalid Bloom filter directory size";
+
   const uint8_t* const in_end = in + n;
   for (; in != in_end; (in += kAVXRegisterBytes), (out += kAVXRegisterBytes)) {
     const double* double_in = reinterpret_cast<const double*>(in);
diff --git a/src/kudu/util/hash_util.h b/src/kudu/util/hash_util.h
index f9ac3f7..71055e1 100644
--- a/src/kudu/util/hash_util.h
+++ b/src/kudu/util/hash_util.h
@@ -19,7 +19,9 @@
 #define KUDU_UTIL_HASH_UTIL_H
 
 #include <cstdint>
-#include <glog/logging.h>
+
+// Including glog/logging.h causes problems while compiling in Apache Impala 
for codegen.
+// IWYU pragma: no_include <glog/logging.h>
 
 #include "kudu/gutil/port.h"
 #include "kudu/util/hash.pb.h"
@@ -134,7 +136,9 @@ class HashUtil {
       case FAST_HASH:
         return FastHash32(data.data(), data.size(), seed);
       default:
-        LOG(FATAL) << "Not implemented 32-bit hash function: " << 
hash_algorithm;
+        // Can't use LOG(FATAL)/CHECK() since including glog/logging.h causes 
problems
+        // with code-gen in Impala.
+        abort();
     }
   }
 

Reply via email to