Repository: arrow Updated Branches: refs/heads/master 1f26040f5 -> ad0157547
ARROW-553: C++: Faster valid bitmap building Author: Uwe L. Korn <[email protected]> Closes #338 from xhochy/ARROW-553 and squashes the following commits: 1c1ee3d [Uwe L. Korn] ARROW-553: C++: Faster valid bitmap building Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ad015754 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ad015754 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ad015754 Branch: refs/heads/master Commit: ad0157547a4f5e6e51fa2f712c2ed9477489a20c Parents: 1f26040 Author: Uwe L. Korn <[email protected]> Authored: Mon Feb 13 09:03:46 2017 -0500 Committer: Wes McKinney <[email protected]> Committed: Mon Feb 13 09:03:46 2017 -0500 ---------------------------------------------------------------------- cpp/src/arrow/builder.cc | 41 ++++++++++++++++++-- .../jemalloc/jemalloc-builder-benchmark.cc | 2 +- 2 files changed, 38 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/ad015754/cpp/src/arrow/builder.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index dddadee..f5c13f9 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -114,18 +114,51 @@ void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int32_t leng UnsafeSetNotNull(length); return; } + + int byte_offset = length_ / 8; + int bit_offset = length_ % 8; + uint8_t bitset = null_bitmap_data_[byte_offset]; + for (int32_t i = 0; i < length; ++i) { - // TODO(emkornfield) Optimize for large values of length? - UnsafeAppendToBitmap(valid_bytes[i] > 0); + if (valid_bytes[i]) { + bitset |= (1 << bit_offset); + } else { + bitset &= ~(1 << bit_offset); + ++null_count_; + } + + bit_offset++; + if (bit_offset == 8) { + bit_offset = 0; + null_bitmap_data_[byte_offset] = bitset; + byte_offset++; + // TODO: Except for the last byte, this shouldn't be needed + bitset = null_bitmap_data_[byte_offset]; + } } + if (bit_offset != 0) { null_bitmap_data_[byte_offset] = bitset; } + length_ += length; } void ArrayBuilder::UnsafeSetNotNull(int32_t length) { const int32_t new_length = length + length_; - // TODO(emkornfield) Optimize for large values of length? - for (int32_t i = length_; i < new_length; ++i) { + + // Fill up the bytes until we have a byte alignment + int32_t pad_to_byte = 8 - (length_ % 8); + if (pad_to_byte == 8) { pad_to_byte = 0; } + for (int32_t i = 0; i < pad_to_byte; ++i) { + BitUtil::SetBit(null_bitmap_data_, i); + } + + // Fast bitsetting + int32_t fast_length = (length - pad_to_byte) / 8; + memset(null_bitmap_data_ + ((length_ + pad_to_byte) / 8), 255, fast_length); + + // Trailing bytes + for (int32_t i = length_ + pad_to_byte + (fast_length * 8); i < new_length; ++i) { BitUtil::SetBit(null_bitmap_data_, i); } + length_ = new_length; } http://git-wip-us.apache.org/repos/asf/arrow/blob/ad015754/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc index 58dbaa3..d69c304 100644 --- a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc +++ b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc @@ -42,6 +42,6 @@ static void BM_BuildPrimitiveArrayNoNulls( state.iterations() * data.size() * sizeof(int64_t) * kFinalSize); } -BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(5)->Unit(benchmark::kMillisecond); } // namespace arrow
