This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 27b869a  ARROW-2649: [C++] Add GenerateBits() function to improve 
bitmap writing performance
27b869a is described below

commit 27b869ae5df31f3be61e76e99999d96ea7d9b557
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Jun 8 15:47:36 2018 -0400

    ARROW-2649: [C++] Add GenerateBits() function to improve bitmap writing 
performance
    
    Also a GenerateBitsUnrolled() for higher performance where warranted.
    
    Benchmarks:
    - GenerateBits is 1.8x faster than BitmapWriter
    - GenerateBitsUnrolled is 2.9x faster than BitmapWriter
    - BooleanBuilder is now 3x faster than with BitmapWriter
      (and around 9x faster than it was with SetBitTo calls)
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #2093 from pitrou/ARROW-2649-generate-bits and squashes the 
following commits:
    
    0ef0d12a <Antoine Pitrou> ARROW-2649:  Add GenerateBits() function
---
 cpp/src/arrow/builder.cc                 | 54 +++++--------------
 cpp/src/arrow/compute/kernels/cast.cc    | 15 ++----
 cpp/src/arrow/util/bit-util-benchmark.cc | 92 ++++++++++++++++++++++++++------
 cpp/src/arrow/util/bit-util-test.cc      | 74 +++++++++++++++++++++++++
 cpp/src/arrow/util/bit-util.h            | 85 +++++++++++++++++++++++++++--
 5 files changed, 249 insertions(+), 71 deletions(-)

diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 65018de..9a8fc7d 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -210,7 +210,7 @@ void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
   memset(null_bitmap_data_ + ((length_ + pad_to_byte) / 8), 0xFF,
          static_cast<size_t>(fast_length));
 
-  // Trailing bytes
+  // Trailing bits
   for (int64_t i = length_ + pad_to_byte + (fast_length * 8); i < new_length; 
++i) {
     BitUtil::SetBit(null_bitmap_data_, i);
   }
@@ -775,16 +775,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, 
int64_t length,
                                     const uint8_t* valid_bytes) {
   RETURN_NOT_OK(Reserve(length));
 
-  internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i] != 0) {
-      bit_writer.Set();
-    } else {
-      bit_writer.Clear();
-    }
-    bit_writer.Next();
-  }
-  bit_writer.Finish();
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++] 
!= 0; });
 
   // this updates length_
   ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
@@ -801,16 +794,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, 
int64_t length,
   RETURN_NOT_OK(Reserve(length));
   DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
 
-  internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      bit_writer.Set();
-    } else {
-      bit_writer.Clear();
-    }
-    bit_writer.Next();
-  }
-  bit_writer.Finish();
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++]; 
});
 
   // this updates length_
   ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -846,16 +832,9 @@ Status BooleanBuilder::AppendValues(const 
std::vector<bool>& values,
   RETURN_NOT_OK(Reserve(length));
   DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
 
-  internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      bit_writer.Set();
-    } else {
-      bit_writer.Clear();
-    }
-    bit_writer.Next();
-  }
-  bit_writer.Finish();
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++]; 
});
 
   // this updates length_
   ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -871,16 +850,9 @@ Status BooleanBuilder::AppendValues(const 
std::vector<bool>& values) {
   const int64_t length = static_cast<int64_t>(values.size());
   RETURN_NOT_OK(Reserve(length));
 
-  internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      bit_writer.Set();
-    } else {
-      bit_writer.Clear();
-    }
-    bit_writer.Next();
-  }
-  bit_writer.Finish();
+  int64_t i = 0;
+  internal::GenerateBitsUnrolled(raw_data_, length_, length,
+                                 [values, &i]() -> bool { return values[i++]; 
});
 
   // this updates length_
   ArrayBuilder::UnsafeSetNotNull(length);
diff --git a/cpp/src/arrow/compute/kernels/cast.cc 
b/cpp/src/arrow/compute/kernels/cast.cc
index 7987a1c..39925d7 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -200,18 +200,9 @@ struct CastFunctor<O, I,
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     auto in_data = GetValues<typename I::c_type>(input, 1);
-    internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
-                                           output->offset, input.length);
-
-    for (int64_t i = 0; i < input.length; ++i) {
-      if (*in_data++ != 0) {
-        writer.Set();
-      } else {
-        writer.Clear();
-      }
-      writer.Next();
-    }
-    writer.Finish();
+    const auto generate = [&in_data]() -> bool { return *in_data++ != 0; };
+    internal::GenerateBitsUnrolled(output->buffers[1]->mutable_data(), 
output->offset,
+                                   input.length, generate);
   }
 };
 
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc 
b/cpp/src/arrow/util/bit-util-benchmark.cc
index ace19e9..183e43f 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -126,26 +126,48 @@ static void BenchmarkBitmapWriter(benchmark::State& 
state, int64_t nbytes) {
 
   const int64_t num_bits = nbytes * 8;
   uint8_t* bitmap = buffer->mutable_data();
+  const bool pattern[] = {false, false, false, true, true, true};
 
   while (state.KeepRunning()) {
-    {
-      BitmapWriterType writer(bitmap, 0, num_bits);
-      for (int64_t i = 0; i < num_bits; i++) {
+    int64_t pattern_index = 0;
+    BitmapWriterType writer(bitmap, 0, num_bits);
+    for (int64_t i = 0; i < num_bits; i++) {
+      if (pattern[pattern_index++]) {
         writer.Set();
-        writer.Next();
-      }
-      writer.Finish();
-      benchmark::ClobberMemory();
-    }
-    {
-      BitmapWriterType writer(bitmap, 0, num_bits);
-      for (int64_t i = 0; i < num_bits; i++) {
+      } else {
         writer.Clear();
-        writer.Next();
       }
-      writer.Finish();
-      benchmark::ClobberMemory();
+      if (pattern_index == sizeof(pattern) / sizeof(bool)) {
+        pattern_index = 0;
+      }
+      writer.Next();
     }
+    writer.Finish();
+    benchmark::ClobberMemory();
+  }
+  state.SetBytesProcessed(int64_t(state.iterations()) * nbytes);
+}
+
+template <typename GenerateBitsFunctorType>
+static void BenchmarkGenerateBits(benchmark::State& state, int64_t nbytes) {
+  std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+  const int64_t num_bits = nbytes * 8;
+  uint8_t* bitmap = buffer->mutable_data();
+  // pattern should be the same as in BenchmarkBitmapWriter
+  const bool pattern[] = {false, false, false, true, true, true};
+
+  while (state.KeepRunning()) {
+    int64_t pattern_index = 0;
+    const auto generate = [&]() -> bool {
+      bool b = pattern[pattern_index++];
+      if (pattern_index == sizeof(pattern) / sizeof(bool)) {
+        pattern_index = 0;
+      }
+      return b;
+    };
+    GenerateBitsFunctorType()(bitmap, 0, num_bits, generate);
+    benchmark::ClobberMemory();
   }
   state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
 }
@@ -170,6 +192,28 @@ static void BM_FirstTimeBitmapWriter(benchmark::State& 
state) {
   BenchmarkBitmapWriter<internal::FirstTimeBitmapWriter>(state, 
state.range(0));
 }
 
+struct GenerateBitsFunctor {
+  template <class Generator>
+  void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, 
Generator&& g) {
+    return internal::GenerateBits(bitmap, start_offset, length, g);
+  }
+};
+
+struct GenerateBitsUnrolledFunctor {
+  template <class Generator>
+  void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, 
Generator&& g) {
+    return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
+  }
+};
+
+static void BM_GenerateBits(benchmark::State& state) {
+  BenchmarkGenerateBits<GenerateBitsFunctor>(state, state.range(0));
+}
+
+static void BM_GenerateBitsUnrolled(benchmark::State& state) {
+  BenchmarkGenerateBits<GenerateBitsUnrolledFunctor>(state, state.range(0));
+}
+
 static void BM_CopyBitmap(benchmark::State& state) {  // NOLINT non-const 
reference
   const int kBufferSize = static_cast<int>(state.range(0));
   std::shared_ptr<Buffer> buffer = CreateRandomBuffer(kBufferSize);
@@ -201,13 +245,31 @@ 
BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicro
 
 BENCHMARK(BM_NaiveBitmapWriter)
     ->Args({100000})
+    ->Repetitions(2)
     ->MinTime(1.0)
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK(BM_BitmapWriter)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BitmapWriter)
+    ->Args({100000})
+    ->Repetitions(2)
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
 
 BENCHMARK(BM_FirstTimeBitmapWriter)
     ->Args({100000})
+    ->Repetitions(2)
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_GenerateBits)
+    ->Args({100000})
+    ->Repetitions(2)
+    ->MinTime(1.0)
+    ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_GenerateBitsUnrolled)
+    ->Args({100000})
+    ->Repetitions(2)
     ->MinTime(1.0)
     ->Unit(benchmark::kMicrosecond);
 
diff --git a/cpp/src/arrow/util/bit-util-test.cc 
b/cpp/src/arrow/util/bit-util-test.cc
index 5e0b696..ae8923d 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -282,6 +282,80 @@ TEST(FirstTimeBitmapWriter, NormalOperation) {
   }
 }
 
+// Tests for GenerateBits and GenerateBitsUnrolled
+
+struct GenerateBitsFunctor {
+  template <class Generator>
+  void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, 
Generator&& g) {
+    return internal::GenerateBits(bitmap, start_offset, length, g);
+  }
+};
+
+struct GenerateBitsUnrolledFunctor {
+  template <class Generator>
+  void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length, 
Generator&& g) {
+    return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
+  }
+};
+
+template <typename T>
+class TestGenerateBits : public ::testing::Test {};
+
+typedef ::testing::Types<GenerateBitsFunctor, GenerateBitsUnrolledFunctor>
+    GenerateBitsTypes;
+TYPED_TEST_CASE(TestGenerateBits, GenerateBitsTypes);
+
+TYPED_TEST(TestGenerateBits, NormalOperation) {
+  const int kSourceSize = 256;
+  uint8_t source[kSourceSize];
+  test::random_bytes(kSourceSize, 0, source);
+
+  const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32};
+  const int64_t lengths[] = {0,  1,  2,  3,  4,   5,   6,   7,   8,   9,   12, 
 16,
+                             17, 21, 31, 32, 100, 201, 202, 203, 204, 205, 
206, 207};
+  const uint8_t fill_bytes[] = {0x00, 0xff};
+
+  for (const int64_t start_offset : start_offsets) {
+    for (const int64_t length : lengths) {
+      for (const uint8_t fill_byte : fill_bytes) {
+        uint8_t bitmap[kSourceSize];
+        memset(bitmap, fill_byte, kSourceSize);
+        // First call GenerateBits
+        {
+          int64_t ncalled = 0;
+          internal::BitmapReader reader(source, 0, length);
+          TypeParam()(bitmap, start_offset, length, [&]() -> bool {
+            bool b = reader.IsSet();
+            reader.Next();
+            ++ncalled;
+            return b;
+          });
+          ASSERT_EQ(ncalled, length);
+        }
+        // Then check generated contents
+        {
+          internal::BitmapReader source_reader(source, 0, length);
+          internal::BitmapReader result_reader(bitmap, start_offset, length);
+          for (int64_t i = 0; i < length; ++i) {
+            ASSERT_EQ(source_reader.IsSet(), result_reader.IsSet())
+                << "mismatch at bit #" << i;
+            source_reader.Next();
+            result_reader.Next();
+          }
+        }
+        // Check bits preceding and following generated contents weren't 
clobbered
+        {
+          internal::BitmapReader reader_before(bitmap, 0, start_offset);
+          for (int64_t i = 0; i < start_offset; ++i) {
+            ASSERT_EQ(reader_before.IsSet(), fill_byte == 0xff)
+                << "mismatch at preceding bit #" << start_offset - i;
+          }
+        }
+      }
+    }
+  }
+}
+
 TEST(BitmapAnd, Aligned) {
   std::shared_ptr<Buffer> left, right, out;
   int64_t length;
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index c012f7c..6c06d9c 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -99,6 +99,9 @@ static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 
247, 239, 223, 191,
 // Bitmask selecting the (k - 1) preceding bits in a byte
 static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
 
+// the bitwise complement version of kPrecedingBitmask
+static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 
192, 128};
+
 static inline int64_t CeilByte(int64_t size) { return (size + 7) & ~7; }
 
 static inline int64_t BytesForBits(int64_t size) { return CeilByte(size) / 8; }
@@ -465,7 +468,7 @@ class BitmapWriter {
   BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
       : bitmap_(bitmap), position_(0), length_(length) {
     byte_offset_ = start_offset / 8;
-    bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
+    bit_mask_ = BitUtil::kBitmask[start_offset % 8];
     if (length > 0) {
       current_byte_ = bitmap[byte_offset_];
     } else {
@@ -519,7 +522,7 @@ class FirstTimeBitmapWriter {
       : bitmap_(bitmap), position_(0), length_(length) {
     current_byte_ = 0;
     byte_offset_ = start_offset / 8;
-    bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
+    bit_mask_ = BitUtil::kBitmask[start_offset % 8];
     if (length > 0) {
       current_byte_ = bitmap[byte_offset_] & 
BitUtil::kPrecedingBitmask[start_offset % 8];
     } else {
@@ -561,7 +564,83 @@ class FirstTimeBitmapWriter {
   int64_t byte_offset_;
 };
 
-// TODO: add a std::generate-like function for writing bitmaps?
+// A std::generate() like function to write sequential bits into a bitmap area.
+// Bits preceding the bitmap area are preserved, bits following the bitmap
+// area may be clobbered.
+
+template <class Generator>
+void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, 
Generator&& g) {
+  if (length == 0) {
+    return;
+  }
+  uint8_t* cur = bitmap + start_offset / 8;
+  uint8_t bit_mask = BitUtil::kBitmask[start_offset % 8];
+  uint8_t current_byte = *cur & BitUtil::kPrecedingBitmask[start_offset % 8];
+
+  for (int64_t index = 0; index < length; ++index) {
+    const bool bit = g();
+    current_byte = bit ? (current_byte | bit_mask) : current_byte;
+    bit_mask = static_cast<uint8_t>(bit_mask << 1);
+    if (bit_mask == 0) {
+      bit_mask = 1;
+      *cur++ = current_byte;
+      current_byte = 0;
+    }
+  }
+  if (bit_mask != 1) {
+    *cur++ = current_byte;
+  }
+}
+
+// Like GenerateBits(), but unrolls its main loop for higher performance.
+
+template <class Generator>
+void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t 
length,
+                          Generator&& g) {
+  if (length == 0) {
+    return;
+  }
+  uint8_t current_byte;
+  uint8_t* cur = bitmap + start_offset / 8;
+  const uint64_t start_bit_offset = start_offset % 8;
+  uint8_t bit_mask = BitUtil::kBitmask[start_bit_offset];
+  int64_t remaining = length;
+
+  if (bit_mask != 0x01) {
+    current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
+    while (bit_mask != 0 && remaining > 0) {
+      current_byte = g() ? (current_byte | bit_mask) : current_byte;
+      bit_mask = static_cast<uint8_t>(bit_mask << 1);
+      --remaining;
+    }
+    *cur++ = current_byte;
+  }
+
+  int64_t remaining_bytes = remaining / 8;
+  while (remaining_bytes-- > 0) {
+    current_byte = 0;
+    current_byte = g() ? current_byte | 0x01 : current_byte;
+    current_byte = g() ? current_byte | 0x02 : current_byte;
+    current_byte = g() ? current_byte | 0x04 : current_byte;
+    current_byte = g() ? current_byte | 0x08 : current_byte;
+    current_byte = g() ? current_byte | 0x10 : current_byte;
+    current_byte = g() ? current_byte | 0x20 : current_byte;
+    current_byte = g() ? current_byte | 0x40 : current_byte;
+    current_byte = g() ? current_byte | 0x80 : current_byte;
+    *cur++ = current_byte;
+  }
+
+  int64_t remaining_bits = remaining % 8;
+  if (remaining_bits) {
+    current_byte = 0;
+    bit_mask = 0x01;
+    while (remaining_bits-- > 0) {
+      current_byte = g() ? (current_byte | bit_mask) : current_byte;
+      bit_mask = static_cast<uint8_t>(bit_mask << 1);
+    }
+    *cur++ = current_byte;
+  }
+}
 
 }  // namespace internal
 

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to