This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a6ace68533 GH-33971: [C++] Fix AdaptiveIntBuilder to always populate 
data buffer (#33994)
a6ace68533 is described below

commit a6ace685339a8f7fb786af66ef9328b7b3f7eb68
Author: Weston Pace <[email protected]>
AuthorDate: Tue Feb 7 08:52:28 2023 -0800

    GH-33971: [C++] Fix AdaptiveIntBuilder to always populate data buffer 
(#33994)
    
    If the AdaptiveIntBuilder was empty it would yield a null for the values 
buffer.  The byte_size.h utilities were not expecting this which led to the 
error reported in the issue.
    
    Example:
    
    ```
    >>> pa.array([], type=pa.dictionary(pa.int32(), pa.string())).buffers()
    [None, None]
    ```
    * Closes: #33971
    
    Authored-by: Weston Pace <[email protected]>
    Signed-off-by: Weston Pace <[email protected]>
---
 cpp/src/arrow/array/array_dict_test.cc  | 12 ++++++++++++
 cpp/src/arrow/array/array_test.cc       | 14 +++++++++++++-
 cpp/src/arrow/array/builder_adaptive.cc |  8 +++++++-
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/array/array_dict_test.cc 
b/cpp/src/arrow/array/array_dict_test.cc
index bfa732f165..54cf3a73d6 100644
--- a/cpp/src/arrow/array/array_dict_test.cc
+++ b/cpp/src/arrow/array/array_dict_test.cc
@@ -157,6 +157,18 @@ TYPED_TEST(TestDictionaryBuilder, MakeBuilder) {
   AssertArraysEqual(expected, *result);
 }
 
+TYPED_TEST(TestDictionaryBuilder, Empty) {
+  DictionaryBuilder<TypeParam> dictionary_builder;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> empty_arr, 
dictionary_builder.Finish());
+  std::shared_ptr<DictionaryArray> empty_dict_arr =
+      checked_pointer_cast<DictionaryArray>(empty_arr);
+  // Ensure that the indices value buffer is initialized
+  ASSERT_NE(nullptr, empty_dict_arr->data()->buffers[1]);
+  // Ensure that the dictionary's value buffer is initialized
+  ASSERT_NE(nullptr, empty_dict_arr->dictionary());
+  ASSERT_NE(nullptr, empty_dict_arr->dictionary()->data()->buffers[1]);
+}
+
 TYPED_TEST(TestDictionaryBuilder, ArrayConversion) {
   auto type = std::make_shared<TypeParam>();
 
diff --git a/cpp/src/arrow/array/array_test.cc 
b/cpp/src/arrow/array/array_test.cc
index 3c4b65c551..71e125bd86 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -2320,7 +2320,17 @@ class TestAdaptiveIntBuilder : public TestBuilder {
     builder_ = std::make_shared<AdaptiveIntBuilder>(pool_);
   }
 
-  void Done() { FinishAndCheckPadding(builder_.get(), &result_); }
+  void EnsureValuesBufferNotNull(const ArrayData& data) {
+    // The values buffer should be initialized
+    ASSERT_EQ(2, data.buffers.size());
+    ASSERT_NE(nullptr, data.buffers[1]);
+    ASSERT_NE(nullptr, data.buffers[1]->data());
+  }
+
+  void Done() {
+    FinishAndCheckPadding(builder_.get(), &result_);
+    EnsureValuesBufferNotNull(*result_->data());
+  }
 
   template <typename ExpectedType>
   void TestAppendValues() {
@@ -2571,6 +2581,8 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendEmptyValue) {
   AssertArraysEqual(*result_, *ArrayFromJSON(int8(), "[null, null, 0, 42, 0, 
0]"));
 }
 
+TEST_F(TestAdaptiveIntBuilder, Empty) { Done(); }
+
 TEST(TestAdaptiveIntBuilderWithStartIntSize, TestReset) {
   auto builder = std::make_shared<AdaptiveIntBuilder>(
       static_cast<uint8_t>(sizeof(int16_t)), default_memory_pool());
diff --git a/cpp/src/arrow/array/builder_adaptive.cc 
b/cpp/src/arrow/array/builder_adaptive.cc
index f6255a564f..3012891cf7 100644
--- a/cpp/src/arrow/array/builder_adaptive.cc
+++ b/cpp/src/arrow/array/builder_adaptive.cc
@@ -139,7 +139,13 @@ Status 
AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
   RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
 
-  *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
+  std::shared_ptr<Buffer> values_buffer = data_;
+  if (!values_buffer) {
+    ARROW_ASSIGN_OR_RAISE(values_buffer, AllocateBuffer(0, pool_));
+  }
+
+  *out = ArrayData::Make(type(), length_, {null_bitmap, 
std::move(values_buffer)},
+                         null_count_);
 
   data_ = nullptr;
   capacity_ = length_ = null_count_ = 0;

Reply via email to