This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bbca717  ARROW-4341: [C++] Refactor Primitive builders and 
BooleanBuilder to use TypedBufferBuilder<T>
bbca717 is described below

commit bbca7178adf1214425f95164a887989b018707f8
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Sat Feb 16 17:44:46 2019 -0600

    ARROW-4341: [C++] Refactor Primitive builders and BooleanBuilder to use 
TypedBufferBuilder<T>
    
    This reduces code duplication.
    
    Author: Benjamin Kietzman <[email protected]>
    Author: Wes McKinney <[email protected]>
    
    Closes #3575 from bkietz/ARROW-4341-primitive-builders-use-bufferbuilder 
and squashes the following commits:
    
    3ef29721b <Wes McKinney> Fix BooleanBuilder::AppendNulls, remove 
valid_bytes argument from AppendNulls methods
    40c4d8d5c <Benjamin Kietzman> TypedBufferBuilder<bool>'s output was not 
correctly sized
    b389c1308 <Wes McKinney> Revert changes to arrow/util/logging.h
    daf524423 <Wes McKinney> Revert change to UnsafeAppend that broke Python 
unit test
    3cc5a0c46 <Wes McKinney> Restore memory zeroing. Add missing override
    21ce28558 <Wes McKinney> Fix RETURN_NOT_OK usages
    d4ab3b539 <Wes McKinney> Move NumericBuilder implementation to headers to 
avoid symbol visibility concerns
    6c1e99d4e <Wes McKinney> Add TypedBufferBuilder<bool> UnsafeAppend 
compile-time option to not track falses. Restore faster code from before this 
patch for appending C arrays and vector<bool>
    09d2bfe8f <Benjamin Kietzman> reduce unnecessary zeroing in BufferBuilder
    bd736c3db <Benjamin Kietzman> add ArrowLogIgnore and use for release mode 
DCHECK*
    7ba692cc9 <Benjamin Kietzman> moving to iterator append in NumericBuilder
    188b7b9cc <Benjamin Kietzman> fix format
    893457306 <Benjamin Kietzman> add explicit cast
    88e57fe58 <Benjamin Kietzman> remove PrimitiveBuilder
    9c050b4dc <Benjamin Kietzman> Use TypedBufferBuilder for PrimitiveBuilder
    078497a38 <Benjamin Kietzman> fix BooleanBuilder::AppendNull
    88eb71c97 <Benjamin Kietzman> Use TypedBufferBuilder<bool> in BooleanBuilder
---
 c_glib/arrow-glib/array-builder.cpp      |   4 +-
 cpp/src/arrow/array-test.cc              |  43 ++++--
 cpp/src/arrow/array/builder_adaptive.h   |   7 +-
 cpp/src/arrow/array/builder_base.cc      |   6 +
 cpp/src/arrow/array/builder_base.h       |   2 +
 cpp/src/arrow/array/builder_primitive.cc | 169 ++----------------------
 cpp/src/arrow/array/builder_primitive.h  | 217 +++++++++++++++----------------
 cpp/src/arrow/buffer-builder.h           |  40 +++++-
 cpp/src/arrow/buffer-test.cc             |   4 +
 9 files changed, 201 insertions(+), 291 deletions(-)

diff --git a/c_glib/arrow-glib/array-builder.cpp 
b/c_glib/arrow-glib/array-builder.cpp
index 095c68d..afdae8c 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -136,9 +136,7 @@ garrow_array_builder_append_nulls(GArrowArrayBuilder 
*builder,
 
   auto arrow_builder =
     static_cast<BUILDER>(garrow_array_builder_get_raw(builder));
-  uint8_t valid_bytes[n];
-  memset(valid_bytes, 0, sizeof(uint8_t) * n);
-  auto status = arrow_builder->AppendNulls(valid_bytes, n);
+  auto status = arrow_builder->AppendNulls(n);
   return garrow_error_check(error, status, context);
 }
 
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 3b348de..dead8de 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -478,7 +478,11 @@ void TestPrimitiveBuilder<PBoolean>::Check(const 
std::unique_ptr<BooleanBuilder>
       ASSERT_EQ(draws_[i] != 0, actual) << i;
     }
   }
-  ASSERT_TRUE(result->Equals(*expected));
+  AssertArraysEqual(*result, *expected);
+
+  // buffers are correctly sized
+  ASSERT_EQ(result->data()->buffers[0]->size(), BitUtil::BytesForBits(size));
+  ASSERT_EQ(result->data()->buffers[1]->size(), BitUtil::BytesForBits(size));
 
   // Builder is now reset
   ASSERT_EQ(0, builder->length());
@@ -518,15 +522,13 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
 
 TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) {
   const int64_t size = 10;
-  const uint8_t valid_bytes[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
-
-  ASSERT_OK(this->builder_->AppendNulls(valid_bytes, size));
+  ASSERT_OK(this->builder_->AppendNulls(size));
 
   std::shared_ptr<Array> result;
   FinishAndCheckPadding(this->builder_.get(), &result);
 
   for (int64_t i = 0; i < size; ++i) {
-    ASSERT_EQ(result->IsValid(i), static_cast<bool>(valid_bytes[i]));
+    ASSERT_FALSE(result->IsValid(i));
   }
 }
 
@@ -922,6 +924,27 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
   ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), 
this->builder_->capacity());
 }
 
+TEST(TestBooleanBuilder, AppendNullsAdvanceBuilder) {
+  BooleanBuilder builder;
+
+  std::vector<uint8_t> values = {1, 0, 0, 1};
+  std::vector<uint8_t> is_valid = {1, 1, 0, 1};
+
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(builder.AppendValues(values.data(), 2));
+  ASSERT_OK(builder.AppendNulls(1));
+  ASSERT_OK(builder.AppendValues(values.data() + 3, 1));
+  ASSERT_OK(builder.Finish(&arr));
+
+  ASSERT_EQ(1, arr->null_count());
+
+  const auto& barr = static_cast<const BooleanArray&>(*arr);
+  ASSERT_TRUE(barr.Value(0));
+  ASSERT_FALSE(barr.Value(1));
+  ASSERT_TRUE(barr.IsNull(2));
+  ASSERT_TRUE(barr.Value(3));
+}
+
 TEST(TestBooleanBuilder, TestStdBoolVectorAppend) {
   BooleanBuilder builder;
   BooleanBuilder builder_nn;
@@ -1391,13 +1414,12 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendNull) {
 
 TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
   constexpr int64_t size = 10;
-  const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
-  ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
+  ASSERT_OK(builder_->AppendNulls(size));
 
   Done();
 
   for (unsigned index = 0; index < size; ++index) {
-    ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
+    ASSERT_FALSE(result_->IsValid(index));
   }
 }
 
@@ -1526,13 +1548,12 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendNull) {
 
 TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
   constexpr int64_t size = 10;
-  const uint8_t valid_bytes[size] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
-  ASSERT_OK(builder_->AppendNulls(valid_bytes, size));
+  ASSERT_OK(builder_->AppendNulls(size));
 
   Done();
 
   for (unsigned index = 0; index < size; ++index) {
-    ASSERT_EQ(result_->IsValid(index), static_cast<bool>(valid_bytes[index]));
+    ASSERT_FALSE(result_->IsValid(index));
   }
 }
 
diff --git a/cpp/src/arrow/array/builder_adaptive.h 
b/cpp/src/arrow/array/builder_adaptive.h
index 6523de4..afbfca2 100644
--- a/cpp/src/arrow/array/builder_adaptive.h
+++ b/cpp/src/arrow/array/builder_adaptive.h
@@ -29,12 +29,13 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public 
ArrayBuilder {
  public:
   explicit AdaptiveIntBuilderBase(MemoryPool* pool);
 
-  /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated 
memory
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+  /// \brief Append multiple nulls
+  /// \param[in] length the number of nulls to append
+  Status AppendNulls(int64_t length) {
     ARROW_RETURN_NOT_OK(CommitPendingData());
     ARROW_RETURN_NOT_OK(Reserve(length));
     memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
-    UnsafeAppendToBitmap(valid_bytes, length);
+    UnsafeSetNull(length);
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/array/builder_base.cc 
b/cpp/src/arrow/array/builder_base.cc
index e805900..75baedd 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -106,4 +106,10 @@ void ArrayBuilder::UnsafeSetNotNull(int64_t length) {
   null_bitmap_builder_.UnsafeAppend(length, true);
 }
 
+void ArrayBuilder::UnsafeSetNull(int64_t length) {
+  length_ += length;
+  null_count_ += length;
+  null_bitmap_builder_.UnsafeAppend(length, false);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_base.h 
b/cpp/src/arrow/array/builder_base.h
index f4655fa..ebe1ece 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -162,6 +162,8 @@ class ARROW_EXPORT ArrayBuilder {
   // Set the next length bits to not null (i.e. valid).
   void UnsafeSetNotNull(int64_t length);
 
+  void UnsafeSetNull(int64_t length);
+
   static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* 
buffer);
 
   static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
diff --git a/cpp/src/arrow/array/builder_primitive.cc 
b/cpp/src/arrow/array/builder_primitive.cc
index a593f36..13e8f2e 100644
--- a/cpp/src/arrow/array/builder_primitive.cc
+++ b/cpp/src/arrow/array/builder_primitive.cc
@@ -45,104 +45,8 @@ Status 
NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   return Status::OK();
 }
 
-// ----------------------------------------------------------------------
-
-template <typename T>
-void PrimitiveBuilder<T>::Reset() {
-  data_.reset();
-  raw_data_ = nullptr;
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::Resize(int64_t capacity) {
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-  capacity = std::max(capacity, kMinBuilderCapacity);
-
-  int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, nbytes, &data_));
-  } else {
-    RETURN_NOT_OK(data_->Resize(nbytes));
-  }
-
-  raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
-  return ArrayBuilder::Resize(capacity);
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t 
length,
-                                         const uint8_t* valid_bytes) {
-  RETURN_NOT_OK(Reserve(length));
-
-  if (length > 0) {
-    std::memcpy(raw_data_ + length_, values,
-                
static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
-  }
-
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
-  return Status::OK();
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const value_type* values, int64_t 
length,
-                                         const std::vector<bool>& is_valid) {
-  RETURN_NOT_OK(Reserve(length));
-  DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
-  if (length > 0) {
-    std::memcpy(raw_data_ + length_, values,
-                
static_cast<std::size_t>(TypeTraits<T>::bytes_required(length)));
-  }
-
-  // length_ is update by these
-  ArrayBuilder::UnsafeAppendToBitmap(is_valid);
-  return Status::OK();
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& values,
-                                         const std::vector<bool>& is_valid) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()), 
is_valid);
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::AppendValues(const std::vector<value_type>& 
values) {
-  return AppendValues(values.data(), static_cast<int64_t>(values.size()));
-}
-
-template <typename T>
-Status PrimitiveBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  RETURN_NOT_OK(TrimBuffer(TypeTraits<T>::bytes_required(length_), 
data_.get()));
-  std::shared_ptr<Buffer> null_bitmap;
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-  *out = ArrayData::Make(type_, length_, {null_bitmap, data_}, null_count_);
-
-  data_ = nullptr;
-  capacity_ = length_ = null_count_ = 0;
-
-  return Status::OK();
-}
-
-template class PrimitiveBuilder<UInt8Type>;
-template class PrimitiveBuilder<UInt16Type>;
-template class PrimitiveBuilder<UInt32Type>;
-template class PrimitiveBuilder<UInt64Type>;
-template class PrimitiveBuilder<Int8Type>;
-template class PrimitiveBuilder<Int16Type>;
-template class PrimitiveBuilder<Int32Type>;
-template class PrimitiveBuilder<Int64Type>;
-template class PrimitiveBuilder<Date32Type>;
-template class PrimitiveBuilder<Date64Type>;
-template class PrimitiveBuilder<Time32Type>;
-template class PrimitiveBuilder<Time64Type>;
-template class PrimitiveBuilder<TimestampType>;
-template class PrimitiveBuilder<HalfFloatType>;
-template class PrimitiveBuilder<FloatType>;
-template class PrimitiveBuilder<DoubleType>;
-
 BooleanBuilder::BooleanBuilder(MemoryPool* pool)
-    : ArrayBuilder(boolean(), pool), data_(nullptr), raw_data_(nullptr) {}
+    : ArrayBuilder(boolean(), pool), data_builder_(pool) {}
 
 BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, 
MemoryPool* pool)
     : BooleanBuilder(pool) {
@@ -151,57 +55,23 @@ BooleanBuilder::BooleanBuilder(const 
std::shared_ptr<DataType>& type, MemoryPool
 
 void BooleanBuilder::Reset() {
   ArrayBuilder::Reset();
-  data_.reset();
-  raw_data_ = nullptr;
+  data_builder_.Reset();
 }
 
 Status BooleanBuilder::Resize(int64_t capacity) {
   RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
   capacity = std::max(capacity, kMinBuilderCapacity);
-
-  const int64_t new_bitmap_size = BitUtil::BytesForBits(capacity);
-  if (capacity_ == 0) {
-    RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_bitmap_size, &data_));
-    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
-    // We zero the memory for booleans to keep things simple; for some reason 
if
-    // we do not, even though we may write every bit (through in-place | or &),
-    // valgrind will still show a warning. If we do not zero the bytes here, we
-    // will have to be careful to zero them in AppendNull and AppendNulls. 
Also,
-    // zeroing the bits results in deterministic bits when each byte may have a
-    // mix of nulls and not nulls.
-    //
-    // We only zero up to new_bitmap_size because the padding was zeroed by
-    // AllocateResizableBuffer
-    memset(raw_data_, 0, static_cast<size_t>(new_bitmap_size));
-  } else {
-    const int64_t old_bitmap_capacity = data_->capacity();
-    RETURN_NOT_OK(data_->Resize(new_bitmap_size));
-    const int64_t new_bitmap_capacity = data_->capacity();
-    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
-
-    // See comment above about why we zero memory for booleans
-    memset(raw_data_ + old_bitmap_capacity, 0,
-           static_cast<size_t>(new_bitmap_capacity - old_bitmap_capacity));
-  }
-
+  RETURN_NOT_OK(data_builder_.Resize(capacity));
   return ArrayBuilder::Resize(capacity);
 }
 
 Status BooleanBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  int64_t bit_offset = length_ % 8;
-  if (bit_offset > 0) {
-    // Adjust last byte
-    data_->mutable_data()[length_ / 8] &= 
BitUtil::kPrecedingBitmask[bit_offset];
-  }
-
-  std::shared_ptr<Buffer> null_bitmap;
+  std::shared_ptr<Buffer> data, null_bitmap;
+  RETURN_NOT_OK(data_builder_.Finish(&data));
   RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-  RETURN_NOT_OK(TrimBuffer(BitUtil::BytesForBits(length_), data_.get()));
 
-  *out = ArrayData::Make(boolean(), length_, {null_bitmap, data_}, 
null_count_);
+  *out = ArrayData::Make(boolean(), length_, {null_bitmap, data}, null_count_);
 
-  data_ = nullptr;
   capacity_ = length_ = null_count_ = 0;
   return Status::OK();
 }
@@ -211,10 +81,8 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, 
int64_t length,
   RETURN_NOT_OK(Reserve(length));
 
   int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [values, &i]() -> bool { return values[i++] 
!= 0; });
-
-  // this updates length_
+  data_builder_.UnsafeAppend<false>(length,
+                                    [values, &i]() -> bool { return 
values[i++] != 0; });
   ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
   return Status::OK();
 }
@@ -223,12 +91,9 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, 
int64_t length,
                                     const std::vector<bool>& is_valid) {
   RETURN_NOT_OK(Reserve(length));
   DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
   int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [values, &i]() -> bool { return values[i++]; 
});
-
-  // this updates length_
+  data_builder_.UnsafeAppend<false>(length,
+                                    [values, &i]() -> bool { return 
values[i++]; });
   ArrayBuilder::UnsafeAppendToBitmap(is_valid);
   return Status::OK();
 }
@@ -247,12 +112,9 @@ Status BooleanBuilder::AppendValues(const 
std::vector<bool>& values,
   const int64_t length = static_cast<int64_t>(values.size());
   RETURN_NOT_OK(Reserve(length));
   DCHECK_EQ(length, static_cast<int64_t>(is_valid.size()));
-
   int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [&values, &i]() -> bool { return values[i++]; 
});
-
-  // this updates length_
+  data_builder_.UnsafeAppend<false>(length,
+                                    [&values, &i]() -> bool { return 
values[i++]; });
   ArrayBuilder::UnsafeAppendToBitmap(is_valid);
   return Status::OK();
 }
@@ -260,12 +122,9 @@ Status BooleanBuilder::AppendValues(const 
std::vector<bool>& values,
 Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
   const int64_t length = static_cast<int64_t>(values.size());
   RETURN_NOT_OK(Reserve(length));
-
   int64_t i = 0;
-  internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                 [&values, &i]() -> bool { return values[i++]; 
});
-
-  // this updates length_
+  data_builder_.UnsafeAppend<false>(length,
+                                    [&values, &i]() -> bool { return 
values[i++]; });
   ArrayBuilder::UnsafeSetNotNull(length);
   return Status::OK();
 }
diff --git a/cpp/src/arrow/array/builder_primitive.h 
b/cpp/src/arrow/array/builder_primitive.h
index bf3ec91..5a9b694 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -21,6 +21,7 @@
 #include <memory>
 #include <vector>
 
+#include "arrow/array.h"
 #include "arrow/array/builder_base.h"
 #include "arrow/type.h"
 
@@ -42,37 +43,53 @@ class ARROW_EXPORT NullBuilder : public ArrayBuilder {
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
 };
 
-template <typename Type>
-class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
+/// Base class for all Builders that emit an Array of a scalar numerical type.
+template <typename T>
+class NumericBuilder : public ArrayBuilder {
  public:
-  using value_type = typename Type::c_type;
+  using value_type = typename T::c_type;
+  using ArrayBuilder::ArrayBuilder;
 
-  explicit PrimitiveBuilder(const std::shared_ptr<DataType>& type, MemoryPool* 
pool)
-      : ArrayBuilder(type, pool), data_(NULLPTR), raw_data_(NULLPTR) {}
+  template <typename T1 = T>
+  explicit NumericBuilder(
+      typename std::enable_if<TypeTraits<T1>::is_parameter_free, 
MemoryPool*>::type pool
+          ARROW_MEMORY_POOL_DEFAULT)
+      : ArrayBuilder(TypeTraits<T1>::type_singleton(), pool) {}
 
-  using ArrayBuilder::Advance;
+  /// Append a single scalar and increase the size if necessary.
+  Status Append(const value_type val) {
+    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
+    UnsafeAppend(val);
+    return Status::OK();
+  }
 
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated 
memory
   /// The memory at the corresponding data slot is set to 0 to prevent
   /// uninitialized memory access
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+  Status AppendNulls(int64_t length) {
     ARROW_RETURN_NOT_OK(Reserve(length));
-    memset(raw_data_ + length_, 0,
-           static_cast<size_t>(TypeTraits<Type>::bytes_required(length)));
-    UnsafeAppendToBitmap(valid_bytes, length);
+    data_builder_.UnsafeAppend(length, static_cast<value_type>(0));
+    UnsafeSetNull(length);
     return Status::OK();
   }
 
   /// \brief Append a single null element
   Status AppendNull() {
     ARROW_RETURN_NOT_OK(Reserve(1));
-    memset(raw_data_ + length_, 0, sizeof(value_type));
+    data_builder_.UnsafeAppend(static_cast<value_type>(0));
     UnsafeAppendToBitmap(false);
     return Status::OK();
   }
 
-  value_type GetValue(int64_t index) const {
-    return reinterpret_cast<const value_type*>(data_->data())[index];
+  value_type GetValue(int64_t index) const { return 
data_builder_.data()[index]; }
+
+  void Reset() override { data_builder_.Reset(); }
+
+  Status Resize(int64_t capacity) override {
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+    capacity = std::max(capacity, kMinBuilderCapacity);
+    ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
+    return ArrayBuilder::Resize(capacity);
   }
 
   /// \brief Append a sequence of elements in one shot
@@ -82,7 +99,13 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
   /// indicates a valid (non-null) value
   /// \return Status
   Status AppendValues(const value_type* values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
+                      const uint8_t* valid_bytes = NULLPTR) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
 
   /// \brief Append a sequence of elements in one shot
   /// \param[in] values a contiguous C array of values
@@ -91,7 +114,13 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
   /// (0). Equal in length to values
   /// \return Status
   Status AppendValues(const value_type* values, int64_t length,
-                      const std::vector<bool>& is_valid);
+                      const std::vector<bool>& is_valid) {
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend(values, length);
+    // length_ is update by these
+    ArrayBuilder::UnsafeAppendToBitmap(is_valid);
+    return Status::OK();
+  }
 
   /// \brief Append a sequence of elements in one shot
   /// \param[in] values a std::vector of values
@@ -99,25 +128,35 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
   /// (0). Equal in length to values
   /// \return Status
   Status AppendValues(const std::vector<value_type>& values,
-                      const std::vector<bool>& is_valid);
+                      const std::vector<bool>& is_valid) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()), 
is_valid);
+  }
 
   /// \brief Append a sequence of elements in one shot
   /// \param[in] values a std::vector of values
   /// \return Status
-  Status AppendValues(const std::vector<value_type>& values);
+  Status AppendValues(const std::vector<value_type>& values) {
+    return AppendValues(values.data(), static_cast<int64_t>(values.size()));
+  }
+
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    std::shared_ptr<Buffer> data, null_bitmap;
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+    ARROW_RETURN_NOT_OK(data_builder_.Finish(&data));
+    *out = ArrayData::Make(type_, length_, {null_bitmap, data}, null_count_);
+    capacity_ = length_ = null_count_ = 0;
+    return Status::OK();
+  }
 
   /// \brief Append a sequence of elements in one shot
   /// \param[in] values_begin InputIterator to the beginning of the values
   /// \param[in] values_end InputIterator pointing to the end of the values
   /// \return Status
-
   template <typename ValuesIter>
   Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
+    data_builder_.UnsafeAppend(values_begin, values_end);
     // this updates the length_
     UnsafeSetNotNull(length);
     return Status::OK();
@@ -137,14 +176,11 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder 
{
                   "version instead");
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
-    // this updates the length_
-    for (int64_t i = 0; i != length; ++i) {
-      UnsafeAppendToBitmap(*valid_begin);
-      ++valid_begin;
-    }
+    data_builder_.UnsafeAppend(values_begin, values_end);
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
     return Status::OK();
   }
 
@@ -154,71 +190,37 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder 
{
       ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
-
-    std::copy(values_begin, values_end, raw_data_ + length_);
-
+    data_builder_.UnsafeAppend(values_begin, values_end);
     // this updates the length_
     if (valid_begin == NULLPTR) {
       UnsafeSetNotNull(length);
     } else {
-      for (int64_t i = 0; i != length; ++i) {
-        UnsafeAppendToBitmap(*valid_begin);
-        ++valid_begin;
-      }
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
+      length_ = null_bitmap_builder_.length();
+      null_count_ = null_bitmap_builder_.false_count();
     }
 
     return Status::OK();
   }
 
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
-  void Reset() override;
-
-  Status Resize(int64_t capacity) override;
-
- protected:
-  std::shared_ptr<ResizableBuffer> data_;
-  value_type* raw_data_;
-};
-
-/// Base class for all Builders that emit an Array of a scalar numerical type.
-template <typename T>
-class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
- public:
-  using typename PrimitiveBuilder<T>::value_type;
-  using PrimitiveBuilder<T>::PrimitiveBuilder;
-
-  template <typename T1 = T>
-  explicit NumericBuilder(
-      typename std::enable_if<TypeTraits<T1>::is_parameter_free, 
MemoryPool*>::type pool
-          ARROW_MEMORY_POOL_DEFAULT)
-      : PrimitiveBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
-
-  using ArrayBuilder::UnsafeAppendNull;
-  using ArrayBuilder::UnsafeAppendToBitmap;
-  using PrimitiveBuilder<T>::AppendValues;
-  using PrimitiveBuilder<T>::Resize;
-  using PrimitiveBuilder<T>::Reserve;
-
-  /// Append a single scalar and increase the size if necessary.
-  Status Append(const value_type val) {
-    ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
-    UnsafeAppend(val);
-    return Status::OK();
-  }
-
   /// Append a single scalar under the assumption that the underlying Buffer is
   /// large enough.
   ///
   /// This method does not capacity-check; make sure to call Reserve
   /// beforehand.
   void UnsafeAppend(const value_type val) {
-    raw_data_[length_] = val;
-    UnsafeAppendToBitmap(true);
+    ArrayBuilder::UnsafeAppendToBitmap(true);
+    data_builder_.UnsafeAppend(val);
+  }
+
+  void UnsafeAppendNull() {
+    ArrayBuilder::UnsafeAppendToBitmap(false);
+    data_builder_.UnsafeAppend(0);
   }
 
  protected:
-  using PrimitiveBuilder<T>::length_;
-  using PrimitiveBuilder<T>::raw_data_;
+  TypedBufferBuilder<value_type> data_builder_;
 };
 
 // Builders
@@ -249,21 +251,17 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
 
   explicit BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* 
pool);
 
-  using ArrayBuilder::Advance;
-  using ArrayBuilder::UnsafeAppendNull;
-
   /// Write nulls as uint8_t* (0 value indicates null) into pre-allocated 
memory
-  Status AppendNulls(const uint8_t* valid_bytes, int64_t length) {
+  Status AppendNulls(int64_t length) {
     ARROW_RETURN_NOT_OK(Reserve(length));
-    UnsafeAppendToBitmap(valid_bytes, length);
-
+    data_builder_.UnsafeAppend(length, false);
+    UnsafeSetNull(length);
     return Status::OK();
   }
 
   Status AppendNull() {
     ARROW_RETURN_NOT_OK(Reserve(1));
-    UnsafeAppendToBitmap(false);
-
+    UnsafeAppendNull();
     return Status::OK();
   }
 
@@ -278,14 +276,15 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
 
   /// Scalar append, without checking for capacity
   void UnsafeAppend(const bool val) {
-    if (val) {
-      BitUtil::SetBit(raw_data_, length_);
-    } else {
-      BitUtil::ClearBit(raw_data_, length_);
-    }
+    data_builder_.UnsafeAppend(val);
     UnsafeAppendToBitmap(true);
   }
 
+  void UnsafeAppendNull() {
+    data_builder_.UnsafeAppend(false);
+    UnsafeAppendToBitmap(false);
+  }
+
   void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
 
   /// \brief Append a sequence of elements in one shot
@@ -340,10 +339,8 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
   Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
     // this updates length_
     UnsafeSetNotNull(length);
     return Status::OK();
@@ -364,15 +361,12 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
 
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
-    // this updates length_
-    for (int64_t i = 0; i != length; ++i) {
-      ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
-      ++valid_begin;
-    }
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
+    null_bitmap_builder_.UnsafeAppend<true>(
+        length, [&valid_begin]() -> bool { return *valid_begin++; });
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
     return Status::OK();
   }
 
@@ -382,21 +376,17 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
       ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
     int64_t length = static_cast<int64_t>(std::distance(values_begin, 
values_end));
     ARROW_RETURN_NOT_OK(Reserve(length));
+    data_builder_.UnsafeAppend<false>(
+        length, [&values_begin]() -> bool { return *values_begin++; });
 
-    auto iter = values_begin;
-    internal::GenerateBitsUnrolled(raw_data_, length_, length,
-                                   [&iter]() -> bool { return *(iter++); });
-
-    // this updates the length_
     if (valid_begin == NULLPTR) {
       UnsafeSetNotNull(length);
     } else {
-      for (int64_t i = 0; i != length; ++i) {
-        ArrayBuilder::UnsafeAppendToBitmap(*valid_begin);
-        ++valid_begin;
-      }
+      null_bitmap_builder_.UnsafeAppend<true>(
+          length, [&valid_begin]() -> bool { return *valid_begin++; });
     }
-
+    length_ = null_bitmap_builder_.length();
+    null_count_ = null_bitmap_builder_.false_count();
     return Status::OK();
   }
 
@@ -405,8 +395,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
   Status Resize(int64_t capacity) override;
 
  protected:
-  std::shared_ptr<ResizableBuffer> data_;
-  uint8_t* raw_data_;
+  TypedBufferBuilder<bool> data_builder_;
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index b27fbd8..a843021 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -24,6 +24,7 @@
 #include <cstring>
 #include <memory>
 #include <string>
+#include <utility>
 
 #include "arrow/buffer.h"
 #include "arrow/status.h"
@@ -57,7 +58,6 @@ class ARROW_EXPORT BufferBuilder {
       return Status::OK();
     }
     int64_t old_capacity = capacity_;
-
     if (buffer_ == NULLPTR) {
       ARROW_RETURN_NOT_OK(AllocateResizableBuffer(pool_, new_capacity, 
&buffer_));
     } else {
@@ -131,6 +131,9 @@ class ARROW_EXPORT BufferBuilder {
   // Advance pointer and zero out memory
   Status Advance(const int64_t length) { return Append(length, 0); }
 
+  // Advance pointer, but don't allocate or zero memory
+  void UnsafeAdvance(const int64_t length) { size_ += length; }
+
   // Unsafe methods don't check existing size
   void UnsafeAppend(const void* data, const int64_t length) {
     memcpy(data_ + size_, data, static_cast<size_t>(length));
@@ -153,6 +156,7 @@ class ARROW_EXPORT BufferBuilder {
   /// \return Status
   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
     ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
+    if (size_ != 0) buffer_->ZeroPadding();
     *out = buffer_;
     Reset();
     return Status::OK();
@@ -211,6 +215,14 @@ class TypedBufferBuilder<T, typename 
std::enable_if<std::is_arithmetic<T>::value
                                 num_elements * sizeof(T));
   }
 
+  template <typename Iter>
+  void UnsafeAppend(Iter values_begin, Iter values_end) {
+    int64_t num_elements = static_cast<int64_t>(std::distance(values_begin, 
values_end));
+    auto data = mutable_data() + length();
+    bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
+    std::copy(values_begin, values_end, data);
+  }
+
   void UnsafeAppend(const int64_t num_copies, T value) {
     auto data = mutable_data() + length();
     bytes_builder_.UnsafeAppend(num_copies * sizeof(T), 0);
@@ -284,7 +296,7 @@ class TypedBufferBuilder<bool> {
     int64_t i = 0;
     internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, 
[&] {
       bool value = bytes[i++];
-      if (!value) ++false_count_;
+      false_count_ += !value;
       return value;
     });
     bit_length_ += num_elements;
@@ -292,12 +304,27 @@ class TypedBufferBuilder<bool> {
 
   void UnsafeAppend(const int64_t num_copies, bool value) {
     BitUtil::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
-    if (!value) {
-      false_count_ += num_copies;
-    }
+    false_count_ += num_copies * !value;
     bit_length_ += num_copies;
   }
 
+  template <bool count_falses, typename Generator>
+  void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
+    if (num_elements == 0) return;
+
+    if (count_falses) {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, 
num_elements, [&] {
+        bool value = gen();
+        false_count_ += !value;
+        return value;
+      });
+    } else {
+      internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
+                                     std::forward<Generator>(gen));
+    }
+    bit_length_ += num_elements;
+  }
+
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
     const int64_t old_byte_capacity = bytes_builder_.capacity();
     const int64_t new_byte_capacity = BitUtil::BytesForBits(new_capacity);
@@ -320,6 +347,9 @@ class TypedBufferBuilder<bool> {
   }
 
   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
+    // set bytes_builder_.size_ == byte size of data
+    bytes_builder_.UnsafeAdvance(BitUtil::BytesForBits(bit_length_) -
+                                 bytes_builder_.length());
     bit_length_ = false_count_ = 0;
     return bytes_builder_.Finish(out, shrink_to_fit);
   }
diff --git a/cpp/src/arrow/buffer-test.cc b/cpp/src/arrow/buffer-test.cc
index 9ae96e7..e503b28 100644
--- a/cpp/src/arrow/buffer-test.cc
+++ b/cpp/src/arrow/buffer-test.cc
@@ -350,6 +350,8 @@ TEST(TestBufferBuilder, BasicBoolBufferBuilderUsage) {
   for (int i = 0; i != nvalues; ++i) {
     ASSERT_EQ(BitUtil::GetBit(built->data(), i + 1), 
static_cast<bool>(values[i]));
   }
+
+  ASSERT_EQ(built->size(), BitUtil::BytesForBits(nvalues + 1));
 }
 
 TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
@@ -367,6 +369,8 @@ TEST(TestBufferBuilder, BoolBufferBuilderAppendCopies) {
   for (int i = 0; i != 13 + 17; ++i) {
     EXPECT_EQ(BitUtil::GetBit(built->data(), i), i < 13) << "index = " << i;
   }
+
+  ASSERT_EQ(built->size(), BitUtil::BytesForBits(13 + 17));
 }
 
 template <typename T>

Reply via email to