danepitkin commented on code in PR #40482:
URL: https://github.com/apache/arrow/pull/40482#discussion_r1520729931
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -351,122 +481,21 @@ Result<std::shared_ptr<Array>>
FlattenListViewArray(const ListViewArrayT& list_v
return Concatenate(slices, memory_pool);
}
-std::shared_ptr<Array> BoxOffsets(const std::shared_ptr<DataType>& boxed_type,
- const ArrayData& data) {
- const int64_t num_offsets =
- is_list_view(data.type->id()) ? data.length : data.length + 1;
- std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, data.buffers[1]};
- auto offsets_data =
- std::make_shared<ArrayData>(boxed_type, /*length=*/num_offsets,
std::move(buffers),
- /*null_count=*/0, data.offset);
- return MakeArray(offsets_data);
-}
-
-std::shared_ptr<Array> BoxSizes(const std::shared_ptr<DataType>& boxed_type,
- const ArrayData& data) {
- DCHECK(is_list_view(data.type->id()));
- std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, data.buffers[2]};
- auto sizes_data =
- std::make_shared<ArrayData>(boxed_type, data.length, std::move(buffers),
- /*null_count=*/0, data.offset);
- return MakeArray(sizes_data);
-}
-
-template <typename DestListViewType, typename SrcListType>
-Result<std::shared_ptr<ArrayData>> ListViewFromListImpl(
- const std::shared_ptr<ArrayData>& list_data, MemoryPool* pool) {
- static_assert(
- std::is_same<typename SrcListType::offset_type,
- typename DestListViewType::offset_type>::value,
- "Offset types between list type and list-view type are expected to
match");
- using offset_type = typename SrcListType::offset_type;
- const auto& list_type = checked_cast<const SrcListType&>(*list_data->type);
-
- // To re-use the validity and offsets buffers, a sizes buffer with enough
- // padding on the beginning is allocated and filled with the sizes after
- // list_data->offset.
- const int64_t buffer_length = list_data->offset + list_data->length;
- ARROW_ASSIGN_OR_RAISE(auto sizes_buffer,
- AllocateBuffer(buffer_length * sizeof(offset_type),
pool));
- const auto* offsets = list_data->template GetValues<offset_type>(1, 0);
- auto* sizes = sizes_buffer->mutable_data_as<offset_type>();
- // Zero the initial padding area to avoid leaking any data when buffers are
- // sent over IPC or through the C Data interface.
- memset(sizes, 0, list_data->offset * sizeof(offset_type));
- for (int64_t i = list_data->offset; i < buffer_length; i++) {
- sizes[i] = offsets[i + 1] - offsets[i];
- }
- BufferVector buffers = {list_data->buffers[0], list_data->buffers[1],
- std::move(sizes_buffer)};
-
- return
ArrayData::Make(std::make_shared<DestListViewType>(list_type.value_type()),
- list_data->length, std::move(buffers),
- {list_data->child_data[0]}, list_data->null_count,
- list_data->offset);
-}
-
-template <typename DestListType, typename SrcListViewType>
-Result<std::shared_ptr<ArrayData>> ListFromListViewImpl(
- const std::shared_ptr<ArrayData>& list_view_data, MemoryPool* pool) {
- static_assert(
- std::is_same<typename SrcListViewType::offset_type,
- typename DestListType::offset_type>::value,
- "Offset types between list type and list-view type are expected to
match");
- using offset_type = typename DestListType::offset_type;
- using ListBuilderType = typename TypeTraits<DestListType>::BuilderType;
-
- const auto& list_view_type =
- checked_cast<const SrcListViewType&>(*list_view_data->type);
- const auto& value_type = list_view_type.value_type();
- const auto list_type = std::make_shared<DestListType>(value_type);
-
- ARROW_ASSIGN_OR_RAISE(auto sum_of_list_view_sizes,
-
list_util::internal::SumOfLogicalListSizes(*list_view_data));
- ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayBuilder> value_builder,
- MakeBuilder(value_type, pool));
- RETURN_NOT_OK(value_builder->Reserve(sum_of_list_view_sizes));
- auto list_builder = std::make_shared<ListBuilderType>(pool, value_builder,
list_type);
- RETURN_NOT_OK(list_builder->Reserve(list_view_data->length));
-
- ArraySpan values{*list_view_data->child_data[0]};
- const auto* in_validity_bitmap = list_view_data->GetValues<uint8_t>(0);
- const auto* in_offsets = list_view_data->GetValues<offset_type>(1);
- const auto* in_sizes = list_view_data->GetValues<offset_type>(2);
- for (int64_t i = 0; i < list_view_data->length; ++i) {
- const bool is_valid =
- !in_validity_bitmap ||
- bit_util::GetBit(in_validity_bitmap, list_view_data->offset + i);
- const int64_t size = is_valid ? in_sizes[i] : 0;
- RETURN_NOT_OK(list_builder->Append(is_valid, size));
- RETURN_NOT_OK(value_builder->AppendArraySlice(values, in_offsets[i],
size));
- }
- std::shared_ptr<ArrayData> list_array_data;
- RETURN_NOT_OK(list_builder->FinishInternal(&list_array_data));
- return list_array_data;
-}
-
-} // namespace
-
-namespace internal {
-
-template <typename TYPE>
-inline void SetListData(VarLengthListLikeArray<TYPE>* self,
- const std::shared_ptr<ArrayData>& data,
- Type::type expected_type_id) {
- ARROW_CHECK_EQ(data->buffers.size(), is_list_view(TYPE::type_id) ? 3 : 2);
- ARROW_CHECK_EQ(data->type->id(), expected_type_id);
- ARROW_CHECK_EQ(data->child_data.size(), 1);
-
- self->Array::SetData(data);
-
- self->list_type_ = checked_cast<const TYPE*>(data->type.get());
- self->raw_value_offsets_ =
- data->GetValuesSafe<typename TYPE::offset_type>(1, /*offset=*/0);
- // BaseListViewArray::SetData takes care of setting raw_value_sizes_.
-
- ARROW_CHECK_EQ(self->list_type_->value_type()->id(),
data->child_data[0]->type->id());
- DCHECK(self->list_type_->value_type()->Equals(data->child_data[0]->type));
- self->values_ = MakeArray(self->data_->child_data[0]);
+void InstantiateFlattenListArray() {
+ (void)FlattenListArray<ListArray, false>;
+ (void)FlattenListArray<ListArray, true>;
+ (void)FlattenListArray<LargeListArray, false>;
+ (void)FlattenListArray<LargeListArray, true>;
+ (void)FlattenListArray<FixedSizeListArray, false>;
+ (void)FlattenListArray<FixedSizeListArray, true>;
+ (void)FlattenListViewArray<ListViewArray, false, false>;
+ (void)FlattenListViewArray<ListViewArray, false, true>;
+ (void)FlattenListViewArray<ListViewArray, true, false>;
+ (void)FlattenListViewArray<ListViewArray, true, true>;
+ (void)FlattenListViewArray<LargeListViewArray, false, false>;
+ (void)FlattenListViewArray<LargeListViewArray, false, true>;
+ (void)FlattenListViewArray<LargeListViewArray, true, false>;
+ (void)FlattenListViewArray<LargeListViewArray, true, true>;
Review Comment:
Some of these instantiations are required since only PyArrow would
templatize the function with the last parameter (include_nulls) = true. Any
templatized versions of the APIs that aren't used in Arrow C++ won't be
compiled and exposed to PyArrow.
This isn't ideal and should probably be avoided entirely if possible.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]