jorisvandenbossche commented on code in PR #14395:
URL: https://github.com/apache/arrow/pull/14395#discussion_r1016266074


##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -87,6 +89,203 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
   return Status::OK();
 }
 
+template <typename T>
+std::string ToString(const std::optional<T>& o) {
+  return o.has_value() ? std::to_string(*o) : "(nullopt)";
+}
+
+template <typename Type, typename IndexType>
+struct ListSlice {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+
+    // Invariants
+    if (!opts.stop.has_value()) {
+      // TODO(ARROW-18280): Support slicing to arbitrary end
+      // For variable size list, this would be the largest difference in 
offsets
+      // For fixed size list, this would be the fixed size.
+      return Status::NotImplemented(
+          "Slicing to end not yet implemented, please set `stop` parameter.");
+    }
+    if (opts.start < 0 || opts.start >= opts.stop.value()) {
+      // TODO(ARROW-18281): support start == stop which should give empty lists
+      return Status::Invalid("`start`(", opts.start,
+                             ") should be greater than 0 and smaller than 
`stop`(",
+                             ToString(opts.stop), ")");
+    }
+    if (opts.step != 1) {
+      // TODO(ARROW-18282): support step in slicing
+      return Status::NotImplemented(
+          "Setting `step` to anything other than 1 is not supported; got 
step=",
+          opts.step);
+    }
+
+    const ArraySpan& list_array = batch[0].array;
+    const Type* list_type = checked_cast<const Type*>(list_array.type);
+    const auto field_name = list_type->field(0)->name();
+    const auto value_type = list_type->field(0)->WithName(field_name);

Review Comment:
   I am not sure the `WithName` is actually needed here? Since you are getting 
the field of `list_type` as well, it should already have that name?
   
   So this could be simplified to:
   
   ```suggestion
       const auto value_type = list_type->field(0);
   ```
   
   ?



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -87,6 +89,203 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
   return Status::OK();
 }
 
+template <typename T>
+std::string ToString(const std::optional<T>& o) {
+  return o.has_value() ? std::to_string(*o) : "(nullopt)";
+}
+
+template <typename Type, typename IndexType>
+struct ListSlice {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+
+    // Invariants
+    if (!opts.stop.has_value()) {
+      // TODO(ARROW-18280): Support slicing to arbitrary end
+      // For variable size list, this would be the largest difference in 
offsets
+      // For fixed size list, this would be the fixed size.
+      return Status::NotImplemented(
+          "Slicing to end not yet implemented, please set `stop` parameter.");
+    }
+    if (opts.start < 0 || opts.start >= opts.stop.value()) {
+      // TODO(ARROW-18281): support start == stop which should give empty lists
+      return Status::Invalid("`start`(", opts.start,
+                             ") should be greater than 0 and smaller than 
`stop`(",
+                             ToString(opts.stop), ")");
+    }
+    if (opts.step != 1) {
+      // TODO(ARROW-18282): support step in slicing
+      return Status::NotImplemented(
+          "Setting `step` to anything other than 1 is not supported; got 
step=",
+          opts.step);
+    }
+
+    const ArraySpan& list_array = batch[0].array;
+    const Type* list_type = checked_cast<const Type*>(list_array.type);
+    const auto field_name = list_type->field(0)->name();
+    const auto value_type = list_type->field(0)->WithName(field_name);
+    const auto return_fixed_size_list = opts.return_fixed_size_list.value_or(
+        list_type->id() == arrow::Type::FIXED_SIZE_LIST);
+    std::unique_ptr<ArrayBuilder> builder;
+
+    // construct array values
+    if (return_fixed_size_list) {
+      RETURN_NOT_OK(MakeBuilder(
+          ctx->memory_pool(),
+          fixed_size_list(value_type,
+                          static_cast<int32_t>(opts.stop.value() - 
opts.start)),
+          &builder));
+      RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder));
+    } else {
+      if constexpr (std::is_same_v<Type, LargeListType>) {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder));
+      } else {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder));
+      }
+    }
+
+    // build output arrays and set result
+    ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+    out->value = std::move(result->data());
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts,
+                           ArrayBuilder& builder) {
+    if constexpr (std::is_same_v<Type, FixedSizeListType>) {
+      RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, 
builder));
+    } else {
+      RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder));
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch,
+                                                const ListSliceOptions& opts,
+                                                ArrayBuilder& builder) {
+    const auto list_size =
+        checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
+    const ArraySpan& list_array = batch[0].array;
+    const ArraySpan& list_values = list_array.child_data[0];
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (auto i = 0; i < list_array.length; ++i) {
+      auto offset = (i + list_array.offset) * list_size;
+      auto next_offset = offset + list_size;
+      if (list_array.IsNull(i)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromListType(const ExecSpan& batch,
+                                       const ListSliceOptions& opts,
+                                       ArrayBuilder& builder) {
+    const ArraySpan& list_array = batch[0].array;
+    const offset_type* offsets = list_array.GetValues<offset_type>(1);
+
+    const ArraySpan& list_values = list_array.child_data[0];
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (auto i = 0; i < list_array.length; ++i) {
+      const offset_type offset = offsets[i];
+      const offset_type next_offset = offsets[i + 1];
+      if (list_array.IsNull(i)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+  template <typename BuilderType>
+  static Status SetValues(BuilderType* list_builder, const offset_type offset,
+                          const offset_type next_offset, const 
ListSliceOptions* opts,
+                          const ArraySpan* list_values) {
+    auto value_builder = list_builder->value_builder();
+    auto cursor = offset;
+
+    RETURN_NOT_OK(list_builder->Append());
+    while (cursor < offset + (opts->stop.value() - opts->start)) {
+      if (cursor + opts->start >= next_offset) {
+        if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) {
+          break;  // don't pad nulls for variable sized list output
+        }
+        RETURN_NOT_OK(value_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(
+            value_builder->AppendArraySlice(*list_values, cursor + 
opts->start, 1));
+      }
+      ++cursor;
+    }
+    return Status::OK();
+  }
+};
+
+Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx,
+                                        const std::vector<TypeHolder>& types) {
+  const auto& opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+  const auto list_type = checked_cast<const BaseListType*>(types[0].type);
+  const auto field_name = list_type->field(0)->name();
+  const auto value_type = list_type->field(0)->WithName(field_name);

Review Comment:
   ```suggestion
     const auto value_type = list_type->field(0);
   ```



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -87,6 +89,203 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
   return Status::OK();
 }
 
+template <typename T>
+std::string ToString(const std::optional<T>& o) {
+  return o.has_value() ? std::to_string(*o) : "(nullopt)";
+}
+
+template <typename Type, typename IndexType>
+struct ListSlice {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+
+    // Invariants
+    if (!opts.stop.has_value()) {
+      // TODO(ARROW-18280): Support slicing to arbitrary end
+      // For variable size list, this would be the largest difference in 
offsets
+      // For fixed size list, this would be the fixed size.
+      return Status::NotImplemented(
+          "Slicing to end not yet implemented, please set `stop` parameter.");
+    }
+    if (opts.start < 0 || opts.start >= opts.stop.value()) {
+      // TODO(ARROW-18281): support start == stop which should give empty lists
+      return Status::Invalid("`start`(", opts.start,
+                             ") should be greater than 0 and smaller than 
`stop`(",
+                             ToString(opts.stop), ")");
+    }
+    if (opts.step != 1) {
+      // TODO(ARROW-18282): support step in slicing
+      return Status::NotImplemented(
+          "Setting `step` to anything other than 1 is not supported; got 
step=",
+          opts.step);
+    }
+
+    const ArraySpan& list_array = batch[0].array;
+    const Type* list_type = checked_cast<const Type*>(list_array.type);
+    const auto field_name = list_type->field(0)->name();
+    const auto value_type = list_type->field(0)->WithName(field_name);
+    const auto return_fixed_size_list = opts.return_fixed_size_list.value_or(
+        list_type->id() == arrow::Type::FIXED_SIZE_LIST);
+    std::unique_ptr<ArrayBuilder> builder;
+
+    // construct array values
+    if (return_fixed_size_list) {
+      RETURN_NOT_OK(MakeBuilder(
+          ctx->memory_pool(),
+          fixed_size_list(value_type,
+                          static_cast<int32_t>(opts.stop.value() - 
opts.start)),
+          &builder));
+      RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder));
+    } else {
+      if constexpr (std::is_same_v<Type, LargeListType>) {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder));
+      } else {
+        RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), 
&builder));
+        RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder));
+      }
+    }
+
+    // build output arrays and set result
+    ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+    out->value = std::move(result->data());
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArray(const ExecSpan& batch, const ListSliceOptions& opts,
+                           ArrayBuilder& builder) {
+    if constexpr (std::is_same_v<Type, FixedSizeListType>) {
+      RETURN_NOT_OK(BuildArrayFromFixedSizeListType<BuilderType>(batch, opts, 
builder));
+    } else {
+      RETURN_NOT_OK(BuildArrayFromListType<BuilderType>(batch, opts, builder));
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch,
+                                                const ListSliceOptions& opts,
+                                                ArrayBuilder& builder) {
+    const auto list_size =
+        checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
+    const ArraySpan& list_array = batch[0].array;
+    const ArraySpan& list_values = list_array.child_data[0];
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (auto i = 0; i < list_array.length; ++i) {
+      auto offset = (i + list_array.offset) * list_size;
+      auto next_offset = offset + list_size;
+      if (list_array.IsNull(i)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename BuilderType>
+  static Status BuildArrayFromListType(const ExecSpan& batch,
+                                       const ListSliceOptions& opts,
+                                       ArrayBuilder& builder) {
+    const ArraySpan& list_array = batch[0].array;
+    const offset_type* offsets = list_array.GetValues<offset_type>(1);
+
+    const ArraySpan& list_values = list_array.child_data[0];
+
+    auto list_builder = checked_cast<BuilderType*>(&builder);
+    for (auto i = 0; i < list_array.length; ++i) {
+      const offset_type offset = offsets[i];
+      const offset_type next_offset = offsets[i + 1];
+      if (list_array.IsNull(i)) {
+        RETURN_NOT_OK(list_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(SetValues<BuilderType>(list_builder, offset, 
next_offset, &opts,
+                                             &list_values));
+      }
+    }
+    return Status::OK();
+  }
+  template <typename BuilderType>
+  static Status SetValues(BuilderType* list_builder, const offset_type offset,
+                          const offset_type next_offset, const 
ListSliceOptions* opts,
+                          const ArraySpan* list_values) {
+    auto value_builder = list_builder->value_builder();
+    auto cursor = offset;
+
+    RETURN_NOT_OK(list_builder->Append());
+    while (cursor < offset + (opts->stop.value() - opts->start)) {
+      if (cursor + opts->start >= next_offset) {
+        if constexpr (!std::is_same_v<BuilderType, FixedSizeListBuilder>) {
+          break;  // don't pad nulls for variable sized list output
+        }
+        RETURN_NOT_OK(value_builder->AppendNull());
+      } else {
+        RETURN_NOT_OK(
+            value_builder->AppendArraySlice(*list_values, cursor + 
opts->start, 1));
+      }
+      ++cursor;
+    }
+    return Status::OK();
+  }
+};
+
+Result<TypeHolder> MakeListSliceResolve(KernelContext* ctx,
+                                        const std::vector<TypeHolder>& types) {
+  const auto& opts = OptionsWrapper<ListSliceOptions>::Get(ctx);
+  const auto list_type = checked_cast<const BaseListType*>(types[0].type);
+  const auto field_name = list_type->field(0)->name();
+  const auto value_type = list_type->field(0)->WithName(field_name);
+  const auto return_fixed_size_list =
+      opts.return_fixed_size_list.value_or(list_type->id() == 
Type::FIXED_SIZE_LIST);
+  if (return_fixed_size_list) {
+    if (!opts.stop.has_value()) {
+      return Status::NotImplemented(
+          "Unable to produce FixedSizeListArray without `stop` being set.");
+    }
+    return fixed_size_list(value_type,
+                           static_cast<int32_t>(opts.stop.value() - 
opts.start));
+  } else {
+    // Returning large list if that's what we got in and didn't ask for fixed 
size
+    if (list_type->id() == Type::LARGE_LIST) {
+      return large_list(value_type);
+    }
+    return list(value_type);
+  }
+}
+
+template <typename InListType, template <typename...> class Functor>
+void AddListSliceKernels(ScalarFunction* func) {
+  for (const auto& index_type : IntTypes()) {
+    auto inputs = {InputType(InListType::type_id)};
+    auto output = OutputType{MakeListSliceResolve};
+    auto scalar_exec = GenerateInteger<Functor, 
InListType>({index_type->id()});

Review Comment:
   I think this "GenerateInteger" is not needed here. It's used for 
"list_element" kernel, because there the "index" argument is an actual typed 
kernel input that can be any of the 8 integer types. 
   But here, are start/stop/step are options (and have a fixed type), and thus 
we don't have to generate all variants of the kernel for all integer types.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to