felipecrv commented on code in PR #41092:
URL: https://github.com/apache/arrow/pull/41092#discussion_r1560333423


##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -937,6 +1000,11 @@ Result<std::shared_ptr<Array>> 
FixedSizeListArray::Flatten(
   return FlattenListArray(*this, memory_pool);
 }
 
+Result<std::shared_ptr<Array>> FixedSizeListArray::FlattenRecursively(
+    MemoryPool* memory_pool) const {
+  return internal::FlattenLogicalListRecursively(*this, memory_pool);
+}

Review Comment:
   This can be defined in the header. Allowing the compiler to simply call 
`internal::FlattenLogicalListRecursively` because `FlattenRecursively` gets 
inlined.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;
+    switch (kind) {
+      case Type::LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out,
+            FlattenListArray(checked_cast<const ListArray&>(*in_array), 
memory_pool));
+        break;
+      }
+      case Type::LARGE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
LargeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::FIXED_SIZE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
FixedSizeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, true>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, false>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      case Type::LARGE_LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, true>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, false>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      default:
+        return Status::Invalid("Unknown or unsupported arrow nested type: ",
+                               in_array->type()->ToString());
+    }
+
+    in_array = out;
+    kind = in_array->type_id();

Review Comment:
   unnecessary given the changes suggested above



##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -103,6 +107,13 @@ class VarLengthListLikeArray : public Array {
     return values_->Slice(value_offset(i), value_length(i));
   }
 
+  /// \brief Flatten all level recursively until reach a non-list type, and 
return a
+  /// non-list type Array.

Review Comment:
   More complete docstring for this and the in `FixedSizeList`:
   
   ```cpp
   /// \brief A version of Flatten that keeps recursively flattening until an 
array of non-list values is reached.
   ///
   /// Array types considered to be lists by this function:
   ///  - list
   ///  - large_list
   ///  - list_view
   ///  - large_list_view
   ///  - fixed_size_list
   ///
   /// \see ListArray::Flatten
   ```



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {

Review Comment:
   You can use a `for` to make the transitions of `kind` more apparent:
   
   ```for (auto kind = in_array->type_id(); is_list_like(kind) || 
is_list_view(kind); kind = in_array->type_id())```



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;
+    switch (kind) {
+      case Type::LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out,
+            FlattenListArray(checked_cast<const ListArray&>(*in_array), 
memory_pool));

Review Comment:
   Prefer
   
   `checked_cast<const ListArray*>(in_array)->Flatten(memory_pool);`
   
   as we don't need to expand/inline the `FlattenListArray<>` template here.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;
+    switch (kind) {
+      case Type::LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out,
+            FlattenListArray(checked_cast<const ListArray&>(*in_array), 
memory_pool));
+        break;
+      }
+      case Type::LARGE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
LargeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::FIXED_SIZE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
FixedSizeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, true>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, false>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }

Review Comment:
   You don't need `has_nulls` if instead of dispatching the call via the 
template, you call the class member function instead:
   
   ```cpp
             ARROW_ASSIGN_OR_RAISE(
                 out, (checked_cast<const 
ListViewArray*>(in_array)->Flatten(memory_pool)));
   ```



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());

Review Comment:
   If you rename the function parameter to `in_array` you can name this one 
simply `array` which is a more fitting name for a loop accumulator variable.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {

Review Comment:
   You should exclude `Type::MAP` from the loop condition explicitly as we are 
not going to flatten maps, but they pass this predicate.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;
+    switch (kind) {
+      case Type::LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out,
+            FlattenListArray(checked_cast<const ListArray&>(*in_array), 
memory_pool));
+        break;
+      }
+      case Type::LARGE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
LargeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::FIXED_SIZE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
FixedSizeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, true>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, false>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      case Type::LARGE_LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, true>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, false>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      default:
+        return Status::Invalid("Unknown or unsupported arrow nested type: ",
+                               in_array->type()->ToString());

Review Comment:
   This can be:
   
   ```cpp
   default:
     Unreachable("unexpected non-list type");
     break;
   ```
   
   Because reaching this case would be a bug in your loop condition: there is 
one currently — you let `Type::MAP` pass and fails flattening `list<map<...>>` 
arrays which should simply flatten to `map<...>` arrays ;)



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;

Review Comment:
   Unnecessary. See comments below.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;

Review Comment:
   You don't need this temporary variable. You can use and assign on `array` on 
the same statement. Less local variables == less destructors to call at the end.



##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->values_ = MakeArray(self->data_->child_data[0]);
 }
 
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array& 
array,
+                                                             MemoryPool* 
memory_pool) {
+  Type::type kind = array.type_id();
+  std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+  while (is_list_like(kind) || is_list_view(kind)) {
+    const bool has_nulls = array.null_count() > 0;
+    std::shared_ptr<Array> out;
+    switch (kind) {
+      case Type::LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out,
+            FlattenListArray(checked_cast<const ListArray&>(*in_array), 
memory_pool));
+        break;
+      }
+      case Type::LARGE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
LargeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::FIXED_SIZE_LIST: {
+        ARROW_ASSIGN_OR_RAISE(
+            out, FlattenListArray(checked_cast<const 
FixedSizeListArray&>(*in_array),
+                                  memory_pool));
+        break;
+      }
+      case Type::LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, true>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<ListViewArray, false>(
+                       checked_cast<const ListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      case Type::LARGE_LIST_VIEW: {
+        if (has_nulls) {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, true>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              out, (FlattenListViewArray<LargeListViewArray, false>(
+                       checked_cast<const LargeListViewArray&>(*in_array), 
memory_pool)));
+          break;
+        }
+      }
+      default:
+        return Status::Invalid("Unknown or unsupported arrow nested type: ",
+                               in_array->type()->ToString());
+    }
+
+    in_array = out;
+    kind = in_array->type_id();
+  }
+  return std::move(in_array);

Review Comment:
   Don't move returned values. At this position, the compiler automatically 
turns the returned value into a `&&` and passes that to the `Result` 
constructor. It took me a while to learn this!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to