felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1394892317
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -189,23 +260,126 @@ Result<std::shared_ptr<Array>> FlattenListArray(const
ListArrayT& list_array,
return Concatenate(non_null_fragments, memory_pool);
}
+template <typename ListViewArrayT, bool HasNulls>
+Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT&
list_view_array,
+ MemoryPool* memory_pool) {
+ using offset_type = typename ListViewArrayT::offset_type;
+ const int64_t list_view_array_offset = list_view_array.offset();
+ const int64_t list_view_array_length = list_view_array.length();
+ std::shared_ptr<arrow::Array> value_array = list_view_array.values();
+
+ if (list_view_array_length == 0) {
+ return SliceArrayWithOffsets(*value_array, 0, 0);
+ }
+
+ // If the list array is *all* nulls, then just return an empty array.
+ if constexpr (HasNulls) {
+ if (list_view_array.null_count() == list_view_array.length()) {
+ return MakeEmptyArray(value_array->type(), memory_pool);
+ }
+ }
+
+ const auto* validity = list_view_array.data()->template
GetValues<uint8_t>(0, 0);
+ const auto* offsets = list_view_array.data()->template
GetValues<offset_type>(1);
+ const auto* sizes = list_view_array.data()->template
GetValues<offset_type>(2);
+
+ auto is_null_or_empty = [&](int64_t i) {
+ if constexpr (HasNulls) {
+ if (!bit_util::GetBit(validity, list_view_array_offset + i)) {
+ return true;
+ }
+ }
+ return sizes[i] == 0;
+ };
+
+ // Index of the first valid, non-empty list-view.
+ int64_t first_i = 0;
+ for (; first_i < list_view_array_length; first_i++) {
+ if (!is_null_or_empty(first_i)) {
+ break;
+ }
+ }
+ // If all list-views are empty, return an empty array.
+ if (first_i == list_view_array_length) {
+ return MakeEmptyArray(value_array->type(), memory_pool);
+ }
+
+ std::vector<std::shared_ptr<Array>> slices;
+ {
Review Comment:
> Would you mind adding a comment about the perf expectations here?
Sure. Adding more text to the docstring.
> (sidenote: perhaps we need a Concatenate API variant that takes a
vector<ArraySpan>...)
I once designed an API that had vectors of non-owning objects and it was a
world of pain. We might come up with something better for Concatenate someday
-- something that doesn't involve `std::vector` even.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]