This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1b262a29e5 GH-37437: [C++] Fix MakeArrayOfNull for list array with
large string values type (#37467)
1b262a29e5 is described below
commit 1b262a29e5cddb2534457fadfe0b77bd7767f297
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Thu Oct 5 17:27:20 2023 +0200
GH-37437: [C++] Fix MakeArrayOfNull for list array with large string values
type (#37467)
### Rationale for this change
`MakeArrayOfNull` for list type was assuming that the values child field
didn't need to be considered, but those values could also require a minimum
buffer size (eg for offsets) and which could be of greater size than the list
offsets if those are int32 offsets.
### Are these changes tested?
Yes
* Closes: #37437
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/array/array_test.cc | 5 +++++
cpp/src/arrow/array/util.cc | 6 ++++--
cpp/src/arrow/array/validate.cc | 4 +++-
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/array/array_test.cc
b/cpp/src/arrow/array/array_test.cc
index 0b82a82fbd..2bef9d725d 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -388,7 +388,12 @@ static std::vector<std::shared_ptr<DataType>>
TestArrayUtilitiesAgainstTheseType
large_utf8(),
list(utf8()),
list(int64()), // NOTE: Regression case for ARROW-9071/MakeArrayOfNull
+ list(large_utf8()),
+ list(list(int64())),
+ list(list(large_utf8())),
+ large_list(utf8()),
large_list(large_utf8()),
+ large_list(list(large_utf8())),
fixed_size_list(utf8(), 3),
fixed_size_list(int64(), 4),
dictionary(int32(), utf8()),
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index e84ab404ad..98e9d51b5f 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -366,9 +366,11 @@ class NullArrayFactory {
}
template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T&) {
+ enable_if_var_size_list<T, Status> Visit(const T& type) {
// values array may be empty, but there must be at least one offset of 0
- return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
+ RETURN_NOT_OK(MaxOf(sizeof(typename T::offset_type) * (length_ + 1)));
+ RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
+ return Status::OK();
}
template <typename T>
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 0f2bd45835..19ff8e28b5 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -713,8 +713,10 @@ struct ValidateArrayImpl {
}
// An empty list array can have 0 offsets
- const auto required_offsets = (data.length > 0) ? data.length +
data.offset + 1 : 0;
const auto offsets_byte_size = data.buffers[1]->size();
+ const auto required_offsets = ((data.length > 0) || (offsets_byte_size >
0))
+ ? data.length + data.offset + 1
+ : 0;
if (offsets_byte_size / static_cast<int32_t>(sizeof(offset_type)) <
required_offsets) {
return Status::Invalid("Offsets buffer size (bytes): ",
offsets_byte_size,