bkietz commented on code in PR #38252:
URL: https://github.com/apache/arrow/pull/38252#discussion_r1424347149
##########
cpp/src/arrow/array/util.cc:
##########
@@ -367,231 +369,254 @@ static Result<std::shared_ptr<Scalar>>
MakeScalarForRunEndValue(
return std::make_shared<Int64Scalar>(run_end);
}
-// get the maximum buffer length required, then allocate a single zeroed buffer
-// to use anywhere a buffer is required
class NullArrayFactory {
public:
- struct GetBufferLength {
- GetBufferLength(const std::shared_ptr<DataType>& type, int64_t length)
- : type_(*type), length_(length),
buffer_length_(bit_util::BytesForBits(length)) {}
-
- Result<int64_t> Finish() && {
- RETURN_NOT_OK(VisitTypeInline(type_, this));
- return buffer_length_;
- }
-
- template <typename T, typename =
decltype(TypeTraits<T>::bytes_required(0))>
- Status Visit(const T&) {
- return MaxOf(TypeTraits<T>::bytes_required(length_));
- }
-
- template <typename T>
- enable_if_var_size_list<T, Status> Visit(const T& type) {
- // values array may be empty, but there must be at least one offset of 0
- RETURN_NOT_OK(MaxOf(sizeof(typename T::offset_type) * (length_ + 1)));
- RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), /*length=*/0)));
- return Status::OK();
- }
-
- template <typename T>
- enable_if_list_view<T, Status> Visit(const T& type) {
- RETURN_NOT_OK(MaxOf(sizeof(typename T::offset_type) * length_));
- RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), /*length=*/0)));
- return Status::OK();
- }
-
- template <typename T>
- enable_if_base_binary<T, Status> Visit(const T&) {
- // values buffer may be empty, but there must be at least one offset of 0
- return MaxOf(sizeof(typename T::offset_type) * (length_ + 1));
- }
-
- Status Visit(const BinaryViewType& type) {
- return MaxOf(sizeof(BinaryViewType::c_type) * length_);
- }
-
- Status Visit(const FixedSizeListType& type) {
- return MaxOf(GetBufferLength(type.value_type(), type.list_size() *
length_));
- }
-
- Status Visit(const FixedSizeBinaryType& type) {
- return MaxOf(type.byte_width() * length_);
- }
-
- Status Visit(const StructType& type) {
- for (const auto& child : type.fields()) {
- RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
- }
- return Status::OK();
- }
-
- Status Visit(const SparseUnionType& type) {
- // type codes
- RETURN_NOT_OK(MaxOf(length_));
- // will create children of the same length as the union
- for (const auto& child : type.fields()) {
- RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
- }
- return Status::OK();
- }
+ // For most types, every buffer in an entirely null array will contain
nothing but
+ // zeroes. For arrays of such types, we can allocate a single buffer and use
that in
+ // every position of the array data. The first stage of visitation handles
assessment
+ // of this buffer's size, the second uses the resulting buffer to build the
null array.
+ //
+ // The first stage may not allocate from the MemoryPool or raise a failing
status.
+ //
+ // In the second stage, `zero_buffer_` has been allocated and `out_` has:
+ // - type = type_
+ // - length = length_
+ // - null_count = length_ unless current output may have direct nulls,
+ // 0 otherwise
+ // - offset = 0
+ // - buffers = []
+ // - child_data = [nullptr] * type.num_fields()
+ // - dictionary = nullptr
+ bool presizing_zero_buffer_;
Review Comment:
Alright
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]