This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 27b869a ARROW-2649: [C++] Add GenerateBits() function to improve
bitmap writing performance
27b869a is described below
commit 27b869ae5df31f3be61e76e99999d96ea7d9b557
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Jun 8 15:47:36 2018 -0400
ARROW-2649: [C++] Add GenerateBits() function to improve bitmap writing
performance
Also a GenerateBitsUnrolled() for higher performance where warranted.
Benchmarks:
- GenerateBits is 1.8x faster than BitmapWriter
- GenerateBitsUnrolled is 2.9x faster than BitmapWriter
- BooleanBuilder is now 3x faster than with BitmapWriter
(and around 9x faster than it was with SetBitTo calls)
Author: Antoine Pitrou <[email protected]>
Closes #2093 from pitrou/ARROW-2649-generate-bits and squashes the
following commits:
0ef0d12a <Antoine Pitrou> ARROW-2649: Add GenerateBits() function
---
cpp/src/arrow/builder.cc | 54 +++++--------------
cpp/src/arrow/compute/kernels/cast.cc | 15 ++----
cpp/src/arrow/util/bit-util-benchmark.cc | 92 ++++++++++++++++++++++++++------
cpp/src/arrow/util/bit-util-test.cc | 74 +++++++++++++++++++++++++
cpp/src/arrow/util/bit-util.h | 85 +++++++++++++++++++++++++++--
5 files changed, 249 insertions(+), 71 deletions(-)
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 65018de..9a8fc7d 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -210,7 +210,7 @@ void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
memset(null_bitmap_data_ + ((length_ + pad_to_byte) / 8), 0xFF,
static_cast<size_t>(fast_length));
- // Trailing bytes
+ // Trailing bits
for (int64_t i = length_ + pad_to_byte + (fast_length * 8); i < new_length;
++i) {
BitUtil::SetBit(null_bitmap_data_, i);
}
@@ -775,16 +775,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values,
int64_t length,
const uint8_t* valid_bytes) {
RETURN_NOT_OK(Reserve(length));
- internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
- for (int64_t i = 0; i < length; ++i) {
- if (values[i] != 0) {
- bit_writer.Set();
- } else {
- bit_writer.Clear();
- }
- bit_writer.Next();
- }
- bit_writer.Finish();
+ int64_t i = 0;
+ internal::GenerateBitsUnrolled(raw_data_, length_, length,
+ [values, &i]() -> bool { return values[i++]
!= 0; });
// this updates length_
ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
@@ -801,16 +794,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values,
int64_t length,
RETURN_NOT_OK(Reserve(length));
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
- internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
- for (int64_t i = 0; i < length; ++i) {
- if (values[i]) {
- bit_writer.Set();
- } else {
- bit_writer.Clear();
- }
- bit_writer.Next();
- }
- bit_writer.Finish();
+ int64_t i = 0;
+ internal::GenerateBitsUnrolled(raw_data_, length_, length,
+ [values, &i]() -> bool { return values[i++];
});
// this updates length_
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -846,16 +832,9 @@ Status BooleanBuilder::AppendValues(const
std::vector<bool>& values,
RETURN_NOT_OK(Reserve(length));
DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
- internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
- for (int64_t i = 0; i < length; ++i) {
- if (values[i]) {
- bit_writer.Set();
- } else {
- bit_writer.Clear();
- }
- bit_writer.Next();
- }
- bit_writer.Finish();
+ int64_t i = 0;
+ internal::GenerateBitsUnrolled(raw_data_, length_, length,
+ [values, &i]() -> bool { return values[i++];
});
// this updates length_
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -871,16 +850,9 @@ Status BooleanBuilder::AppendValues(const
std::vector<bool>& values) {
const int64_t length = static_cast<int64_t>(values.size());
RETURN_NOT_OK(Reserve(length));
- internal::FirstTimeBitmapWriter bit_writer(raw_data_, length_, length);
- for (int64_t i = 0; i < length; ++i) {
- if (values[i]) {
- bit_writer.Set();
- } else {
- bit_writer.Clear();
- }
- bit_writer.Next();
- }
- bit_writer.Finish();
+ int64_t i = 0;
+ internal::GenerateBitsUnrolled(raw_data_, length_, length,
+ [values, &i]() -> bool { return values[i++];
});
// this updates length_
ArrayBuilder::UnsafeSetNotNull(length);
diff --git a/cpp/src/arrow/compute/kernels/cast.cc
b/cpp/src/arrow/compute/kernels/cast.cc
index 7987a1c..39925d7 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -200,18 +200,9 @@ struct CastFunctor<O, I,
void operator()(FunctionContext* ctx, const CastOptions& options,
const ArrayData& input, ArrayData* output) {
auto in_data = GetValues<typename I::c_type>(input, 1);
- internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
- output->offset, input.length);
-
- for (int64_t i = 0; i < input.length; ++i) {
- if (*in_data++ != 0) {
- writer.Set();
- } else {
- writer.Clear();
- }
- writer.Next();
- }
- writer.Finish();
+ const auto generate = [&in_data]() -> bool { return *in_data++ != 0; };
+ internal::GenerateBitsUnrolled(output->buffers[1]->mutable_data(),
output->offset,
+ input.length, generate);
}
};
diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc
b/cpp/src/arrow/util/bit-util-benchmark.cc
index ace19e9..183e43f 100644
--- a/cpp/src/arrow/util/bit-util-benchmark.cc
+++ b/cpp/src/arrow/util/bit-util-benchmark.cc
@@ -126,26 +126,48 @@ static void BenchmarkBitmapWriter(benchmark::State&
state, int64_t nbytes) {
const int64_t num_bits = nbytes * 8;
uint8_t* bitmap = buffer->mutable_data();
+ const bool pattern[] = {false, false, false, true, true, true};
while (state.KeepRunning()) {
- {
- BitmapWriterType writer(bitmap, 0, num_bits);
- for (int64_t i = 0; i < num_bits; i++) {
+ int64_t pattern_index = 0;
+ BitmapWriterType writer(bitmap, 0, num_bits);
+ for (int64_t i = 0; i < num_bits; i++) {
+ if (pattern[pattern_index++]) {
writer.Set();
- writer.Next();
- }
- writer.Finish();
- benchmark::ClobberMemory();
- }
- {
- BitmapWriterType writer(bitmap, 0, num_bits);
- for (int64_t i = 0; i < num_bits; i++) {
+ } else {
writer.Clear();
- writer.Next();
}
- writer.Finish();
- benchmark::ClobberMemory();
+ if (pattern_index == sizeof(pattern) / sizeof(bool)) {
+ pattern_index = 0;
+ }
+ writer.Next();
}
+ writer.Finish();
+ benchmark::ClobberMemory();
+ }
+ state.SetBytesProcessed(int64_t(state.iterations()) * nbytes);
+}
+
+template <typename GenerateBitsFunctorType>
+static void BenchmarkGenerateBits(benchmark::State& state, int64_t nbytes) {
+ std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
+
+ const int64_t num_bits = nbytes * 8;
+ uint8_t* bitmap = buffer->mutable_data();
+ // pattern should be the same as in BenchmarkBitmapWriter
+ const bool pattern[] = {false, false, false, true, true, true};
+
+ while (state.KeepRunning()) {
+ int64_t pattern_index = 0;
+ const auto generate = [&]() -> bool {
+ bool b = pattern[pattern_index++];
+ if (pattern_index == sizeof(pattern) / sizeof(bool)) {
+ pattern_index = 0;
+ }
+ return b;
+ };
+ GenerateBitsFunctorType()(bitmap, 0, num_bits, generate);
+ benchmark::ClobberMemory();
}
state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes);
}
@@ -170,6 +192,28 @@ static void BM_FirstTimeBitmapWriter(benchmark::State&
state) {
BenchmarkBitmapWriter<internal::FirstTimeBitmapWriter>(state,
state.range(0));
}
+struct GenerateBitsFunctor {
+ template <class Generator>
+ void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length,
Generator&& g) {
+ return internal::GenerateBits(bitmap, start_offset, length, g);
+ }
+};
+
+struct GenerateBitsUnrolledFunctor {
+ template <class Generator>
+ void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length,
Generator&& g) {
+ return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
+ }
+};
+
+static void BM_GenerateBits(benchmark::State& state) {
+ BenchmarkGenerateBits<GenerateBitsFunctor>(state, state.range(0));
+}
+
+static void BM_GenerateBitsUnrolled(benchmark::State& state) {
+ BenchmarkGenerateBits<GenerateBitsUnrolledFunctor>(state, state.range(0));
+}
+
static void BM_CopyBitmap(benchmark::State& state) { // NOLINT non-const
reference
const int kBufferSize = static_cast<int>(state.range(0));
std::shared_ptr<Buffer> buffer = CreateRandomBuffer(kBufferSize);
@@ -201,13 +245,31 @@
BENCHMARK(BM_BitmapReader)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicro
BENCHMARK(BM_NaiveBitmapWriter)
->Args({100000})
+ ->Repetitions(2)
->MinTime(1.0)
->Unit(benchmark::kMicrosecond);
-BENCHMARK(BM_BitmapWriter)->Args({100000})->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_BitmapWriter)
+ ->Args({100000})
+ ->Repetitions(2)
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
BENCHMARK(BM_FirstTimeBitmapWriter)
->Args({100000})
+ ->Repetitions(2)
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_GenerateBits)
+ ->Args({100000})
+ ->Repetitions(2)
+ ->MinTime(1.0)
+ ->Unit(benchmark::kMicrosecond);
+
+BENCHMARK(BM_GenerateBitsUnrolled)
+ ->Args({100000})
+ ->Repetitions(2)
->MinTime(1.0)
->Unit(benchmark::kMicrosecond);
diff --git a/cpp/src/arrow/util/bit-util-test.cc
b/cpp/src/arrow/util/bit-util-test.cc
index 5e0b696..ae8923d 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -282,6 +282,80 @@ TEST(FirstTimeBitmapWriter, NormalOperation) {
}
}
+// Tests for GenerateBits and GenerateBitsUnrolled
+
+struct GenerateBitsFunctor {
+ template <class Generator>
+ void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length,
Generator&& g) {
+ return internal::GenerateBits(bitmap, start_offset, length, g);
+ }
+};
+
+struct GenerateBitsUnrolledFunctor {
+ template <class Generator>
+ void operator()(uint8_t* bitmap, int64_t start_offset, int64_t length,
Generator&& g) {
+ return internal::GenerateBitsUnrolled(bitmap, start_offset, length, g);
+ }
+};
+
+template <typename T>
+class TestGenerateBits : public ::testing::Test {};
+
+typedef ::testing::Types<GenerateBitsFunctor, GenerateBitsUnrolledFunctor>
+ GenerateBitsTypes;
+TYPED_TEST_CASE(TestGenerateBits, GenerateBitsTypes);
+
+TYPED_TEST(TestGenerateBits, NormalOperation) {
+ const int kSourceSize = 256;
+ uint8_t source[kSourceSize];
+ test::random_bytes(kSourceSize, 0, source);
+
+ const int64_t start_offsets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 21, 31, 32};
+ const int64_t lengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12,
16,
+ 17, 21, 31, 32, 100, 201, 202, 203, 204, 205,
206, 207};
+ const uint8_t fill_bytes[] = {0x00, 0xff};
+
+ for (const int64_t start_offset : start_offsets) {
+ for (const int64_t length : lengths) {
+ for (const uint8_t fill_byte : fill_bytes) {
+ uint8_t bitmap[kSourceSize];
+ memset(bitmap, fill_byte, kSourceSize);
+ // First call GenerateBits
+ {
+ int64_t ncalled = 0;
+ internal::BitmapReader reader(source, 0, length);
+ TypeParam()(bitmap, start_offset, length, [&]() -> bool {
+ bool b = reader.IsSet();
+ reader.Next();
+ ++ncalled;
+ return b;
+ });
+ ASSERT_EQ(ncalled, length);
+ }
+ // Then check generated contents
+ {
+ internal::BitmapReader source_reader(source, 0, length);
+ internal::BitmapReader result_reader(bitmap, start_offset, length);
+ for (int64_t i = 0; i < length; ++i) {
+ ASSERT_EQ(source_reader.IsSet(), result_reader.IsSet())
+ << "mismatch at bit #" << i;
+ source_reader.Next();
+ result_reader.Next();
+ }
+ }
+ // Check bits preceding and following generated contents weren't
clobbered
+ {
+ internal::BitmapReader reader_before(bitmap, 0, start_offset);
+ for (int64_t i = 0; i < start_offset; ++i) {
+ ASSERT_EQ(reader_before.IsSet(), fill_byte == 0xff)
+ << "mismatch at preceding bit #" << start_offset - i;
+ }
+ }
+ }
+ }
+ }
+}
+
TEST(BitmapAnd, Aligned) {
std::shared_ptr<Buffer> left, right, out;
int64_t length;
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index c012f7c..6c06d9c 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -99,6 +99,9 @@ static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251,
247, 239, 223, 191,
// Bitmask selecting the (k - 1) preceding bits in a byte
static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+// the bitwise complement version of kPrecedingBitmask
+static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224,
192, 128};
+
static inline int64_t CeilByte(int64_t size) { return (size + 7) & ~7; }
static inline int64_t BytesForBits(int64_t size) { return CeilByte(size) / 8; }
@@ -465,7 +468,7 @@ class BitmapWriter {
BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap), position_(0), length_(length) {
byte_offset_ = start_offset / 8;
- bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
+ bit_mask_ = BitUtil::kBitmask[start_offset % 8];
if (length > 0) {
current_byte_ = bitmap[byte_offset_];
} else {
@@ -519,7 +522,7 @@ class FirstTimeBitmapWriter {
: bitmap_(bitmap), position_(0), length_(length) {
current_byte_ = 0;
byte_offset_ = start_offset / 8;
- bit_mask_ = static_cast<uint8_t>(1 << (start_offset % 8));
+ bit_mask_ = BitUtil::kBitmask[start_offset % 8];
if (length > 0) {
current_byte_ = bitmap[byte_offset_] &
BitUtil::kPrecedingBitmask[start_offset % 8];
} else {
@@ -561,7 +564,83 @@ class FirstTimeBitmapWriter {
int64_t byte_offset_;
};
-// TODO: add a std::generate-like function for writing bitmaps?
+// A std::generate() like function to write sequential bits into a bitmap area.
+// Bits preceding the bitmap area are preserved, bits following the bitmap
+// area may be clobbered.
+
+template <class Generator>
+void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length,
Generator&& g) {
+ if (length == 0) {
+ return;
+ }
+ uint8_t* cur = bitmap + start_offset / 8;
+ uint8_t bit_mask = BitUtil::kBitmask[start_offset % 8];
+ uint8_t current_byte = *cur & BitUtil::kPrecedingBitmask[start_offset % 8];
+
+ for (int64_t index = 0; index < length; ++index) {
+ const bool bit = g();
+ current_byte = bit ? (current_byte | bit_mask) : current_byte;
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ if (bit_mask == 0) {
+ bit_mask = 1;
+ *cur++ = current_byte;
+ current_byte = 0;
+ }
+ }
+ if (bit_mask != 1) {
+ *cur++ = current_byte;
+ }
+}
+
+// Like GenerateBits(), but unrolls its main loop for higher performance.
+
+template <class Generator>
+void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t
length,
+ Generator&& g) {
+ if (length == 0) {
+ return;
+ }
+ uint8_t current_byte;
+ uint8_t* cur = bitmap + start_offset / 8;
+ const uint64_t start_bit_offset = start_offset % 8;
+ uint8_t bit_mask = BitUtil::kBitmask[start_bit_offset];
+ int64_t remaining = length;
+
+ if (bit_mask != 0x01) {
+ current_byte = *cur & BitUtil::kPrecedingBitmask[start_bit_offset];
+ while (bit_mask != 0 && remaining > 0) {
+ current_byte = g() ? (current_byte | bit_mask) : current_byte;
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ --remaining;
+ }
+ *cur++ = current_byte;
+ }
+
+ int64_t remaining_bytes = remaining / 8;
+ while (remaining_bytes-- > 0) {
+ current_byte = 0;
+ current_byte = g() ? current_byte | 0x01 : current_byte;
+ current_byte = g() ? current_byte | 0x02 : current_byte;
+ current_byte = g() ? current_byte | 0x04 : current_byte;
+ current_byte = g() ? current_byte | 0x08 : current_byte;
+ current_byte = g() ? current_byte | 0x10 : current_byte;
+ current_byte = g() ? current_byte | 0x20 : current_byte;
+ current_byte = g() ? current_byte | 0x40 : current_byte;
+ current_byte = g() ? current_byte | 0x80 : current_byte;
+ *cur++ = current_byte;
+ }
+
+ int64_t remaining_bits = remaining % 8;
+ if (remaining_bits) {
+ current_byte = 0;
+ bit_mask = 0x01;
+ while (remaining_bits-- > 0) {
+ current_byte = g() ? (current_byte | bit_mask) : current_byte;
+ bit_mask = static_cast<uint8_t>(bit_mask << 1);
+ }
+ *cur++ = current_byte;
+ }
+}
} // namespace internal
--
To stop receiving notification emails like this one, please contact
[email protected].