[ 
https://issues.apache.org/jira/browse/ARROW-1928?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16446796#comment-16446796
 ] 

ASF GitHub Bot commented on ARROW-1928:
---------------------------------------

xhochy closed pull request #1915: ARROW-1928: [C++] Add 
BitmapReader/BitmapWriter benchmarks
URL: https://github.com/apache/arrow/pull/1915
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc 
b/cpp/src/arrow/util/bit-util-benchmark.cc
index 8969dd80b1..06aa2ed773 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -28,13 +28,147 @@
 namespace arrow {
 namespace BitUtil {
 
-static void BM_CopyBitmap(benchmark::State& state) {  // NOLINT non-const 
reference
-  const int kBufferSize = state.range(0);
+// A naive bitmap reader implementation, meant as a baseline against
+// internal::BitmapReader
+
+class NaiveBitmapReader {
+ public:
+  NaiveBitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t 
length)
+      : bitmap_(bitmap), position_(0) {}
+
+  bool IsSet() const {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    return (bitmap_[byte_offset] & (1 << bit_offset)) == 0;
+  }
+
+  bool IsNotSet() const { return !IsSet(); }
+
+  void Next() { ++position_; }
 
+ private:
+  const uint8_t* bitmap_;
+  int64_t position_;
+};
+
+// A naive bitmap writer implementation, meant as a baseline against
+// internal::BitmapWriter
+
+class NaiveBitmapWriter {
+ public:
+  NaiveBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0) {}
+
+  void Set() {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    bitmap_[byte_offset] |= static_cast<uint8_t>(1 << bit_offset);
+  }
+
+  void Clear() {
+    const int64_t byte_offset = position_ / 8;
+    const int64_t bit_offset = position_ % 8;
+    bitmap_[byte_offset] &= 0xFF ^ static_cast<uint8_t>(1 << bit_offset);
+  }
+
+  void Next() { ++position_; }
+
+  void Finish() {}
+
+  int64_t position() const { return position_; }
+
+ private:
+  uint8_t* bitmap_;
+  int64_t position_;
+};
+
+static std::shared_ptr<Buffer> CreateRandomBuffer(int64_t nbytes) {
   std::shared_ptr<Buffer> buffer;
-  ASSERT_OK(AllocateBuffer(default_memory_pool(), kBufferSize, &buffer));
-  memset(buffer->mutable_data(), 0, kBufferSize);
-  test::random_bytes(kBufferSize, 0, buffer->mutable_data());
+  ABORT_NOT_OK(AllocateBuffer(default_memory_pool(), nbytes, &buffer));
+  memset(buffer->mutable_data(), 0, nbytes);
+  test::random_bytes(nbytes, 0, buffer->mutable_data());
+  return buffer;
+}
+
+template <typename BitmapReaderType>
+static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  const uint8_t* bitmap = buffer->data();
+
+  while (state.KeepRunning()) {
+    {
+      BitmapReaderType reader(bitmap, 0, num_bits);
+      int64_t total = 0;
+      for (int64_t i = 0; i < num_bits; i++) {
+        total += reader.IsSet();
+        reader.Next();
+      }
+      benchmark::DoNotOptimize(total);
+    }
+    {
+      BitmapReaderType reader(bitmap, 0, num_bits);
+      int64_t total = 0;
+      for (int64_t i = 0; i < num_bits; i++) {
+        total += !reader.IsNotSet();
+        reader.Next();
+      }
+      benchmark::DoNotOptimize(total);
+    }
+  }
+  state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+template <typename BitmapWriterType>
+static void BenchmarkBitmapWriter(benchmark::State& state, int64_t nbytes) {
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  uint8_t* bitmap = buffer->mutable_data();
+
+  while (state.KeepRunning()) {
+    {
+      BitmapWriterType writer(bitmap, 0, num_bits);
+      for (int64_t i = 0; i < num_bits; i++) {
+        writer.Set();
+        writer.Next();
+      }
+      writer.Finish();
+      benchmark::ClobberMemory();
+    }
+    {
+      BitmapWriterType writer(bitmap, 0, num_bits);
+      for (int64_t i = 0; i < num_bits; i++) {
+        writer.Clear();
+        writer.Next();
+      }
+      writer.Finish();
+      benchmark::ClobberMemory();
+    }
+  }
+  state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
+}
+
+static void BM_NaiveBitmapReader(benchmark::State& state) {
+  BenchmarkBitmapReader<NaiveBitmapReader>(state, state.range(0));
+}
+
+static void BM_BitmapReader(benchmark::State& state) {
+  BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
+}
+
+static void BM_NaiveBitmapWriter(benchmark::State& state) {
+  BenchmarkBitmapWriter<NaiveBitmapWriter>(state, state.range(0));
+}
+
+static void BM_BitmapWriter(benchmark::State& state) {
+  BenchmarkBitmapWriter<internal::BitmapWriter>(state, state.range(0));
+}
+
+static void BM_CopyBitmap(benchmark::State& state) {  // NOLINT non-const 
reference
+  const int kBufferSize = state.range(0);
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(kBufferSize);
 
   const int num_bits = kBufferSize * 8;
   const uint8_t* src = buffer->data();
@@ -54,5 +188,19 @@ BENCHMARK(BM_CopyBitmap)
     ->MinTime(1.0)
     ->Unit(benchmark::kMicrosecond);
 
+BENCHMARK(BM_NaiveBitmapReader)
+    ->Args({100000})
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_NaiveBitmapWriter)
+    ->Args({100000})
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_BitmapWriter)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+
 }  // namespace BitUtil
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5f171a07ce..3255ead081 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -428,7 +428,7 @@ class BitmapReader {
   void Next() {
     ++bit_offset_;
     ++position_;
-    if (bit_offset_ == 8) {
+    if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
       bit_offset_ = 0;
       ++byte_offset_;
       if (ARROW_PREDICT_TRUE(position_ < length_)) {
@@ -453,23 +453,23 @@ class BitmapWriter {
       : bitmap_(bitmap), position_(0), length_(length) {
     current_byte_ = 0;
     byte_offset_ = start_offset / 8;
-    bit_offset_ = start_offset % 8;
+    bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
     if (length > 0) {
       current_byte_ = bitmap[byte_offset_];
     }
   }
 
-  void Set() { current_byte_ |= BitUtil::kBitmask[bit_offset_]; }
+  void Set() { current_byte_ |= bit_mask_; }
 
-  void Clear() { current_byte_ &= BitUtil::kFlippedBitmask[bit_offset_]; }
+  void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
 
   void Next() {
-    ++bit_offset_;
+    bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
     ++position_;
-    bitmap_[byte_offset_] = current_byte_;
-    if (bit_offset_ == 8) {
-      bit_offset_ = 0;
-      ++byte_offset_;
+    if (bit_mask_ == 0) {
+      // Finished this byte, need advancing
+      bit_mask_ = 0x01;
+      bitmap_[byte_offset_++] = current_byte_;
       if (ARROW_PREDICT_TRUE(position_ < length_)) {
         current_byte_ = bitmap_[byte_offset_];
       }
@@ -477,10 +477,9 @@ class BitmapWriter {
   }
 
   void Finish() {
-    if (ARROW_PREDICT_TRUE(position_ < length_)) {
-      if (bit_offset_ != 0) {
-        bitmap_[byte_offset_] = current_byte_;
-      }
+    // Store current byte if we didn't went past bitmap storage
+    if (bit_mask_ != 0x01 || position_ < length_) {
+      bitmap_[byte_offset_] = current_byte_;
     }
   }
 
@@ -492,8 +491,8 @@ class BitmapWriter {
   int64_t length_;
 
   uint8_t current_byte_;
+  uint8_t bit_mask_;
   int64_t byte_offset_;
-  int64_t bit_offset_;
 };
 
 }  // namespace internal


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [C++] Add benchmarks comparing performance of internal::BitmapReader/Writer 
> with naive approaches
> -------------------------------------------------------------------------------------------------
>
>                 Key: ARROW-1928
>                 URL: https://issues.apache.org/jira/browse/ARROW-1928
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: C++
>            Reporter: Wes McKinney
>            Assignee: Antoine Pitrou
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> The performance may also vary across platforms/compilers. This would be 
> helpful to know how much they help



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to