This is an automated email from the ASF dual-hosted git repository.
westonpace pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a6ace68533 GH-33971: [C++] Fix AdaptiveIntBuilder to always populate
data buffer (#33994)
a6ace68533 is described below
commit a6ace685339a8f7fb786af66ef9328b7b3f7eb68
Author: Weston Pace <[email protected]>
AuthorDate: Tue Feb 7 08:52:28 2023 -0800
GH-33971: [C++] Fix AdaptiveIntBuilder to always populate data buffer
(#33994)
If the AdaptiveIntBuilder was empty it would yield a null for the values
buffer. The byte_size.h utilities were not expecting this which led to the
error reported in the issue.
Example:
```
>>> pa.array([], type=pa.dictionary(pa.int32(), pa.string())).buffers()
[None, None]
```
* Closes: #33971
Authored-by: Weston Pace <[email protected]>
Signed-off-by: Weston Pace <[email protected]>
---
cpp/src/arrow/array/array_dict_test.cc | 12 ++++++++++++
cpp/src/arrow/array/array_test.cc | 14 +++++++++++++-
cpp/src/arrow/array/builder_adaptive.cc | 8 +++++++-
3 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/array/array_dict_test.cc
b/cpp/src/arrow/array/array_dict_test.cc
index bfa732f165..54cf3a73d6 100644
--- a/cpp/src/arrow/array/array_dict_test.cc
+++ b/cpp/src/arrow/array/array_dict_test.cc
@@ -157,6 +157,18 @@ TYPED_TEST(TestDictionaryBuilder, MakeBuilder) {
AssertArraysEqual(expected, *result);
}
+TYPED_TEST(TestDictionaryBuilder, Empty) {
+ DictionaryBuilder<TypeParam> dictionary_builder;
+ ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> empty_arr,
dictionary_builder.Finish());
+ std::shared_ptr<DictionaryArray> empty_dict_arr =
+ checked_pointer_cast<DictionaryArray>(empty_arr);
+ // Ensure that the indices value buffer is initialized
+ ASSERT_NE(nullptr, empty_dict_arr->data()->buffers[1]);
+ // Ensure that the dictionary's value buffer is initialized
+ ASSERT_NE(nullptr, empty_dict_arr->dictionary());
+ ASSERT_NE(nullptr, empty_dict_arr->dictionary()->data()->buffers[1]);
+}
+
TYPED_TEST(TestDictionaryBuilder, ArrayConversion) {
auto type = std::make_shared<TypeParam>();
diff --git a/cpp/src/arrow/array/array_test.cc
b/cpp/src/arrow/array/array_test.cc
index 3c4b65c551..71e125bd86 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -2320,7 +2320,17 @@ class TestAdaptiveIntBuilder : public TestBuilder {
builder_ = std::make_shared<AdaptiveIntBuilder>(pool_);
}
- void Done() { FinishAndCheckPadding(builder_.get(), &result_); }
+ void EnsureValuesBufferNotNull(const ArrayData& data) {
+ // The values buffer should be initialized
+ ASSERT_EQ(2, data.buffers.size());
+ ASSERT_NE(nullptr, data.buffers[1]);
+ ASSERT_NE(nullptr, data.buffers[1]->data());
+ }
+
+ void Done() {
+ FinishAndCheckPadding(builder_.get(), &result_);
+ EnsureValuesBufferNotNull(*result_->data());
+ }
template <typename ExpectedType>
void TestAppendValues() {
@@ -2571,6 +2581,8 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendEmptyValue) {
AssertArraysEqual(*result_, *ArrayFromJSON(int8(), "[null, null, 0, 42, 0,
0]"));
}
+TEST_F(TestAdaptiveIntBuilder, Empty) { Done(); }
+
TEST(TestAdaptiveIntBuilderWithStartIntSize, TestReset) {
auto builder = std::make_shared<AdaptiveIntBuilder>(
static_cast<uint8_t>(sizeof(int16_t)), default_memory_pool());
diff --git a/cpp/src/arrow/array/builder_adaptive.cc
b/cpp/src/arrow/array/builder_adaptive.cc
index f6255a564f..3012891cf7 100644
--- a/cpp/src/arrow/array/builder_adaptive.cc
+++ b/cpp/src/arrow/array/builder_adaptive.cc
@@ -139,7 +139,13 @@ Status
AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
RETURN_NOT_OK(TrimBuffer(length_ * int_size_, data_.get()));
- *out = ArrayData::Make(type(), length_, {null_bitmap, data_}, null_count_);
+ std::shared_ptr<Buffer> values_buffer = data_;
+ if (!values_buffer) {
+ ARROW_ASSIGN_OR_RAISE(values_buffer, AllocateBuffer(0, pool_));
+ }
+
+ *out = ArrayData::Make(type(), length_, {null_bitmap,
std::move(values_buffer)},
+ null_count_);
data_ = nullptr;
capacity_ = length_ = null_count_ = 0;