felipecrv commented on code in PR #41295:
URL: https://github.com/apache/arrow/pull/41295#discussion_r1570965731


##########
cpp/src/arrow/compute/api_vector.cc:
##########
@@ -153,6 +153,8 @@ static auto kRankOptionsType = 
GetFunctionOptionsType<RankOptions>(
     DataMember("tiebreaker", &RankOptions::tiebreaker));
 static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
     DataMember("periods", &PairwiseOptions::periods));
+static auto kListFlattenOptionsType = 
GetFunctionOptionsType<ListFlattenOptions>(
+    DataMember("recursively", &ListFlattenOptions::recursively));

Review Comment:
   For the parameter name, we should go with `recursive`.



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -39,12 +39,26 @@ namespace {
 template <typename Type, typename offset_type = typename Type::offset_type>
 Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
   const ArraySpan& arr = batch[0].array;
+  const auto kind = arr.type->id();
   ArraySpan* out_arr = out->array_span_mutable();
   auto out_values = out_arr->GetValues<offset_type>(1);
-  const offset_type* offsets = arr.GetValues<offset_type>(1);
-  // Offsets are always well-defined and monotonic, even for null values
-  for (int64_t i = 0; i < arr.length; ++i) {
-    *out_values++ = offsets[i + 1] - offsets[i];
+  if (is_list_view(kind)) {

Review Comment:
   you can `is_list_view(*arr.type)` and avoid needing another local var -- 
`kind`



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -59,6 +73,24 @@ Status FixedSizeListValueLength(KernelContext* ctx, const 
ExecSpan& batch,
   return Status::OK();
 }
 
+template <typename InListType>
+void AddListValueLengthKernel(ScalarFunction* func,
+                              const std::shared_ptr<DataType>& out_type) {
+  auto in_type = {InputType(InListType::type_id)};
+  ScalarKernel kernel(in_type, out_type, ListValueLength<InListType>);
+  DCHECK_OK(func->AddKernel(std::move(kernel)));
+}
+

Review Comment:
   If you template-specialize, the calls in `AddListValueLengthKernels` can be 
uniform.
   
   ```suggestion
   template <>
   void AddListValueLengthKernel<FixedSizeListType>(
       ScalarFunction* func, const std::shared_ptr<DataType>& out_type) {
     auto in_type = {InputType(Type::FIXED_SIZE_LIST)};
     ScalarKernel kernel(in_type, out_type, FixedSizeListValueLength);
     DCHECK_OK(func->AddKernel(std::move(kernel)));
   }
   
   ```



##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -245,6 +245,18 @@ class ARROW_EXPORT PairwiseOptions : public 
FunctionOptions {
   int64_t periods = 1;
 };
 
+/// \brief Options for list_flatten function
+class ARROW_EXPORT ListFlattenOptions : public FunctionOptions {
+ public:
+  explicit ListFlattenOptions(bool recursively = false);
+  static constexpr char const kTypeName[] = "ListFlattenOptions";
+  static ListFlattenOptions Defaults() { return ListFlattenOptions(); }
+
+  /// Control the version of 'Flatten' that keeps recursively flattening
+  /// until an array of non-list values is reached.

Review Comment:
   "version" is an implementation detail
   ```suggestion
     /// \brief If true, the list is flattened recursively until a non-list
     /// array is formed.
   ```



##########
cpp/src/arrow/compute/kernels/codegen_internal.cc:
##########
@@ -57,8 +57,14 @@ Result<TypeHolder> LastType(KernelContext*, const 
std::vector<TypeHolder>& types
 }
 
 Result<TypeHolder> ListValuesType(KernelContext*, const 
std::vector<TypeHolder>& args) {

Review Comment:
   You should add a `bool recursive` parameter (without default value) and skip 
the for loop if it's `false` to match existing usage.



##########
cpp/src/arrow/compute/api_vector.cc:
##########
@@ -224,6 +226,10 @@ PairwiseOptions::PairwiseOptions(int64_t periods)
     : FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
 constexpr char PairwiseOptions::kTypeName[];
 
+ListFlattenOptions::ListFlattenOptions(bool recursively)
+    : FunctionOptions(internal::kListFlattenOptionsType), 
recursively(recursively) {}

Review Comment:
   `recursive`



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -39,12 +39,26 @@ namespace {
 template <typename Type, typename offset_type = typename Type::offset_type>
 Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
   const ArraySpan& arr = batch[0].array;
+  const auto kind = arr.type->id();
   ArraySpan* out_arr = out->array_span_mutable();
   auto out_values = out_arr->GetValues<offset_type>(1);
-  const offset_type* offsets = arr.GetValues<offset_type>(1);
-  // Offsets are always well-defined and monotonic, even for null values
-  for (int64_t i = 0; i < arr.length; ++i) {
-    *out_values++ = offsets[i + 1] - offsets[i];
+  if (is_list_view(kind)) {
+    // [Large]ListView's buffer layout:
+    //  buffer1 : valid bitmap
+    //  buffer2 : elements' start offset in current array
+    //  buffer3 : elements' size
+    //
+    // It's unnecessary to calculate according offsets.
+    const auto* sizes = arr.GetValues<offset_type>(2);
+    for (int64_t i = 0; i < arr.length; i++) {
+      *out_values++ = sizes[i];
+    }

Review Comment:
   I think this can become a `memcpy`
   
   ```suggestion
       const auto* sizes = arr.GetValues<offset_type>(2);
       if (arr.length() > 0) {
         memcpy(out_values, sizes, arr.length() * sizeof(offset_type));
       }
   ```



##########
cpp/src/arrow/compute/kernels/vector_nested.cc:
##########
@@ -29,8 +30,16 @@ namespace {
 
 template <typename Type>
 Status ListFlatten(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) 
{
+  auto recursively = OptionsWrapper<ListFlattenOptions>::Get(ctx).recursively;
   typename TypeTraits<Type>::ArrayType 
list_array(batch[0].array.ToArrayData());
-  ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool()));
+
+  std::shared_ptr<Array> result;
+  if (!recursively) {
+    ARROW_ASSIGN_OR_RAISE(result, list_array.Flatten(ctx->memory_pool()));
+  } else {
+    ARROW_ASSIGN_OR_RAISE(result, 
list_array.FlattenRecursively(ctx->memory_pool()));
+  }

Review Comment:
   This can be made more compact
   
   ```suggestion
     auto pool = ctx->memory_pool();
     ARROW_ASSIGN_OR_RAISE(
       auto result,
       (recursive ? list_array.FlattenRecursively(pool) : 
list_array.Flatten(pool)));
   ```



##########
cpp/src/arrow/compute/kernels/vector_nested.cc:
##########
@@ -110,7 +123,7 @@ const FunctionDoc list_flatten_doc(
     ("`lists` must have a list-like type.\n"
      "Return an array with the top list level flattened.\n"
      "Top-level null values in `lists` do not emit anything in the input."),

Review Comment:
   Need to re-phrase this since options can affect the behavior now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to