mapleFU commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1375656101
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -120,10 +135,14 @@ class ARROW_EXPORT ListArray : public
BaseListArray<ListType> {
/// This function does the bare minimum of validation of the offsets and
/// input types, and will allocate a new offsets array if necessary (i.e. if
/// the offsets contain any nulls). If the offsets do not have nulls, they
- /// are assumed to be well-formed
+ /// are assumed to be well-formed.
///
- /// Offsets of an Array's null bitmap can be present or an explicit
- /// null_bitmap, but not both.
+ /// If a null_bitmap is not provided, the nulls will be inferred from the
offsets' or
+ /// sizes' null bitmap. Only one of these two is allowed to have a null
bitmap. But if a
+ /// null_bitmap is provided, the offsets array and the sizes array can't
have nulls.
Review Comment:
Ooops, it doesn't have `sizes` here, right?
##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -113,33 +122,30 @@ Status ConcatenateOffsets(const BufferVector& buffers,
MemoryPool* pool,
values_ranges->resize(buffers.size());
// allocate output buffer
- int64_t out_length = 0;
- for (const auto& buffer : buffers) {
- out_length += buffer->size() / sizeof(Offset);
- }
- ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer((out_length + 1) *
sizeof(Offset), pool));
- auto dst = reinterpret_cast<Offset*>((*out)->mutable_data());
+ const int64_t out_size_in_bytes = SumBufferSizesInBytes(buffers);
+ ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(sizeof(Offset) +
out_size_in_bytes, pool));
Review Comment:
Just to make sure I understand this, actually this allocate one more element
for ListView?
##########
cpp/src/arrow/array/concatenate.cc:
##########
@@ -160,16 +166,69 @@ Status PutOffsets(const std::shared_ptr<Buffer>& src,
Offset first_offset, Offse
// Write offsets into dst, ensuring that the first offset written is
// first_offset
- auto adjustment = first_offset - src_begin[0];
+ auto displacement = first_offset - src_begin[0];
// NOTE: Concatenate can be called during IPC reads to append delta
dictionaries.
// Avoid UB on non-validated input by doing the addition in the unsigned
domain.
// (the result can later be validated using Array::ValidateFull)
- std::transform(src_begin, src_end, dst, [adjustment](Offset offset) {
- return SafeSignedAdd(offset, adjustment);
+ std::transform(src_begin, src_end, dst, [displacement](Offset offset) {
+ return SafeSignedAdd(offset, displacement);
});
return Status::OK();
}
+template <typename offset_type>
+void PutListViewOffsets(const Buffer& src, offset_type displacement,
offset_type* dst);
+
+// Concatenate buffers holding list-view offsets into a single buffer of
offsets
+//
+// value_ranges contains the relevant ranges of values in the child array
actually
+// referenced to by the views. Most commonly, these ranges will start from 0,
+// but when that is not the case, we need to adjust the displacement of
offsets.
+// The concatenated child array does not contain values from the beginning
+// if they are not referenced to by any view.
+template <typename offset_type>
+Status ConcatenateListViewOffsets(const BufferVector& buffers,
Review Comment:
A quick stupid question: would the size here 1 greater than `sizes`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]