felipecrv commented on code in PR #41092:
URL: https://github.com/apache/arrow/pull/41092#discussion_r1560333423
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -937,6 +1000,11 @@ Result<std::shared_ptr<Array>>
FixedSizeListArray::Flatten(
return FlattenListArray(*this, memory_pool);
}
+Result<std::shared_ptr<Array>> FixedSizeListArray::FlattenRecursively(
+ MemoryPool* memory_pool) const {
+ return internal::FlattenLogicalListRecursively(*this, memory_pool);
+}
Review Comment:
This can be defined in the header. Allowing the compiler to simply call
`internal::FlattenLogicalListRecursively` because `FlattenRecursively` gets
inlined.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
+ switch (kind) {
+ case Type::LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out,
+ FlattenListArray(checked_cast<const ListArray&>(*in_array),
memory_pool));
+ break;
+ }
+ case Type::LARGE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
LargeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::FIXED_SIZE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
FixedSizeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, true>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, false>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ case Type::LARGE_LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, true>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, false>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ default:
+ return Status::Invalid("Unknown or unsupported arrow nested type: ",
+ in_array->type()->ToString());
+ }
+
+ in_array = out;
+ kind = in_array->type_id();
Review Comment:
unnecessary given the changes suggested above
##########
cpp/src/arrow/array/array_nested.h:
##########
@@ -103,6 +107,13 @@ class VarLengthListLikeArray : public Array {
return values_->Slice(value_offset(i), value_length(i));
}
+ /// \brief Flatten all level recursively until reach a non-list type, and
return a
+ /// non-list type Array.
Review Comment:
More complete docstring for this and the in `FixedSizeList`:
```cpp
/// \brief A version of Flatten that keeps recursively flattening until an
array of non-list values is reached.
///
/// Array types considered to be lists by this function:
/// - list
/// - large_list
/// - list_view
/// - large_list_view
/// - fixed_size_list
///
/// \see ListArray::Flatten
```
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
Review Comment:
You can use a `for` to make the transitions of `kind` more apparent:
```for (auto kind = in_array->type_id(); is_list_like(kind) ||
is_list_view(kind); kind = in_array->type_id())```
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
+ switch (kind) {
+ case Type::LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out,
+ FlattenListArray(checked_cast<const ListArray&>(*in_array),
memory_pool));
Review Comment:
Prefer
`checked_cast<const ListArray*>(in_array)->Flatten(memory_pool);`
as we don't need to expand/inline the `FlattenListArray<>` template here.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
+ switch (kind) {
+ case Type::LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out,
+ FlattenListArray(checked_cast<const ListArray&>(*in_array),
memory_pool));
+ break;
+ }
+ case Type::LARGE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
LargeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::FIXED_SIZE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
FixedSizeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, true>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, false>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
Review Comment:
You don't need `has_nulls` if instead of dispatching the call via the
template, you call the class member function instead:
```cpp
ARROW_ASSIGN_OR_RAISE(
out, (checked_cast<const
ListViewArray*>(in_array)->Flatten(memory_pool)));
```
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
Review Comment:
If you rename the function parameter to `in_array` you can name this one
simply `array` which is a more fitting name for a loop accumulator variable.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
Review Comment:
You should exclude `Type::MAP` from the loop condition explicitly as we are
not going to flatten maps, but they pass this predicate.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
+ switch (kind) {
+ case Type::LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out,
+ FlattenListArray(checked_cast<const ListArray&>(*in_array),
memory_pool));
+ break;
+ }
+ case Type::LARGE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
LargeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::FIXED_SIZE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
FixedSizeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, true>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, false>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ case Type::LARGE_LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, true>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, false>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ default:
+ return Status::Invalid("Unknown or unsupported arrow nested type: ",
+ in_array->type()->ToString());
Review Comment:
This can be:
```cpp
default:
Unreachable("unexpected non-list type");
break;
```
Because reaching this case would be a bug in your loop condition: there is
one currently — you let `Type::MAP` pass and fails flattening `list<map<...>>`
arrays which should simply flatten to `map<...>` arrays ;)
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
Review Comment:
Unnecessary. See comments below.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
Review Comment:
You don't need this temporary variable. You can use and assign on `array` on
the same statement. Less local variables == less destructors to call at the end.
##########
cpp/src/arrow/array/array_nested.cc:
##########
@@ -469,6 +469,69 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
self->values_ = MakeArray(self->data_->child_data[0]);
}
+Result<std::shared_ptr<Array>> FlattenLogicalListRecursively(const Array&
array,
+ MemoryPool*
memory_pool) {
+ Type::type kind = array.type_id();
+ std::shared_ptr<Array> in_array = array.Slice(0, array.length());
+ while (is_list_like(kind) || is_list_view(kind)) {
+ const bool has_nulls = array.null_count() > 0;
+ std::shared_ptr<Array> out;
+ switch (kind) {
+ case Type::LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out,
+ FlattenListArray(checked_cast<const ListArray&>(*in_array),
memory_pool));
+ break;
+ }
+ case Type::LARGE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
LargeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::FIXED_SIZE_LIST: {
+ ARROW_ASSIGN_OR_RAISE(
+ out, FlattenListArray(checked_cast<const
FixedSizeListArray&>(*in_array),
+ memory_pool));
+ break;
+ }
+ case Type::LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, true>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<ListViewArray, false>(
+ checked_cast<const ListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ case Type::LARGE_LIST_VIEW: {
+ if (has_nulls) {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, true>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(
+ out, (FlattenListViewArray<LargeListViewArray, false>(
+ checked_cast<const LargeListViewArray&>(*in_array),
memory_pool)));
+ break;
+ }
+ }
+ default:
+ return Status::Invalid("Unknown or unsupported arrow nested type: ",
+ in_array->type()->ToString());
+ }
+
+ in_array = out;
+ kind = in_array->type_id();
+ }
+ return std::move(in_array);
Review Comment:
Don't move returned values. At this position, the compiler automatically
turns the returned value into a `&&` and passes that to the `Result`
constructor. It took me a while to learn this!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]