felipecrv commented on code in PR #35345:
URL: https://github.com/apache/arrow/pull/35345#discussion_r1319871274


##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -189,11 +261,109 @@ Result<std::shared_ptr<Array>> FlattenListArray(const 
ListArrayT& list_array,
   return Concatenate(non_null_fragments, memory_pool);
 }
 
+template <typename ListViewArrayT>
+Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT& 
list_view_array,
+                                                    MemoryPool* memory_pool) {
+  using offset_type = typename ListViewArrayT::offset_type;
+  const int64_t list_view_array_length = list_view_array.length();
+  std::shared_ptr<arrow::Array> value_array = list_view_array.values();
+
+  if (list_view_array_length == 0) {
+    return SliceArrayWithOffsets(*value_array, 0, 0);
+  }
+
+  // If the list array is *all* nulls, then just return an empty array.
+  if (list_view_array.null_count() == list_view_array.length()) {
+    return MakeEmptyArray(value_array->type(), memory_pool);
+  }
+
+  const auto* validity = list_view_array.data()->template 
GetValues<uint8_t>(0);
+  const auto* offsets = list_view_array.data()->template 
GetValues<offset_type>(1);
+  const auto* sizes = list_view_array.data()->template 
GetValues<offset_type>(2);
+
+  // If a ListViewArray:
+  //
+  //   1) does not contain nulls
+  //   2) has sorted offsets
+  //   3) every view is disjoint
+  //
+  // then simply slice its value array with the first offset and end of the 
last list
+  // view.
+  if (list_view_array.null_count() == 0) {
+    bool sorted_and_disjoint = true;
+    for (int64_t i = 1; sorted_and_disjoint && i < list_view_array_length; 
++i) {
+      sorted_and_disjoint &=
+          sizes[i - 1] == 0 || offsets[i] - offsets[i - 1] == sizes[i - 1];
+    }
+
+    if (sorted_and_disjoint) {
+      const auto begin_offset = list_view_array.value_offset(0);
+      const auto end_offset = 
list_view_array.value_offset(list_view_array_length - 1) +
+                              
list_view_array.value_length(list_view_array_length - 1);
+      return SliceArrayWithOffsets(*value_array, begin_offset, end_offset);
+    }
+  }
+
+  std::vector<std::shared_ptr<Array>> non_null_fragments;
+  // Index of first valid list-view and last offset
+  // of the current contiguous fragment in values.
+  int64_t first_i = -1;
+  offset_type end_offset = -1;
+  int64_t i = 0;
+  for (; i < list_view_array_length; i++) {
+    if ((validity && !bit_util::GetBit(validity, i)) || sizes[i] == 0) {
+      continue;
+    }
+    first_i = i;
+    end_offset = offsets[i] + sizes[i];
+    break;
+  }
+  i += 1;
+  for (; i < list_view_array_length; i++) {

Review Comment:
   The first version looks like what I initially written, but it contains one 
extra boolean test
   
   `if (first_i == kUninitialized) {`
   
   This is exactly what I wanted to get rid of with the two loops. When I 
looked past the aversion to having multiple loops, I could read the code and 
informally prove its correctness by induction: first loop establishes the base 
case and the second loop builds on that. Final check at the end for cases with 
only nulls or empty list-views.
   
   Can incorporate some of your style and do this?
   
   ```cpp
     constexpr int64_t kUninitialized = -1;
     int64_t first_i = kUninitialized;
     offset_type end_offset;
     int64_t i = 0;
     for (; i < list_view_array_length; i++) {
       if (is_null_or_empty(i)) continue;
   
       first_i = i;
       end_offset = offsets[i] + sizes[i];
       break;
     }
     i += 1;
     for (; i < list_view_array_length; i++) {
       if (is_null_or_empty(i)) continue;
   
       if (offsets[i] == end_offset) {
         end_offset += sizes[i];
         continue;
       }
       non_null_fragments.push_back(
           SliceArrayWithOffsets(*value_array, offsets[first_i], end_offset));
       first_i = i;
       end_offset = offsets[i] + sizes[i];
     }
     if (first_i != kUninitialized) {
       non_null_fragments.push_back(
           SliceArrayWithOffsets(*value_array, offsets[first_i], end_offset));
     }
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to