milesgranger commented on code in PR #14395: URL: https://github.com/apache/arrow/pull/14395#discussion_r1007742545
########## cpp/src/arrow/compute/kernels/scalar_nested.cc: ########## @@ -87,6 +89,206 @@ Status GetListElementIndex(const ExecValue& value, T* out) { return Status::OK(); } +template <typename Type, typename IndexType> +struct ListSlice { + using offset_type = typename Type::offset_type; + + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const auto opts = OptionsWrapper<ListSliceOptions>::Get(ctx); + + // Invariants + if (opts.start < 0 || (opts.start >= opts.stop && opts.stop != -1)) { + // TODO: support start == stop which should give empty lists + return Status::Invalid("`start`(", opts.start, + ") should be greater than 0 and smaller than `stop`(", + opts.stop, ")"); + } + if (opts.step != 1) { + // TODO: support step in slicing + return Status::NotImplemented( + "Setting `step` to anything other than 1 is not supported; got step=", + opts.step); + } + if (opts.stop == -1) { + // TODO: Support slicing to arbitrary end + // For variable size list, this would be the largest difference in offsets + // For fixed size list, this would be the fixed size. + return Status::NotImplemented( + "Setting `stop==-1` to signify slicing to end, not yet implemented."); + } + + const ArraySpan& list_ = batch[0].array; + const Type* list_type = checked_cast<const Type*>(list_.type); + const auto value_type = list_type->value_type(); + + // construct builder + std::unique_ptr<ArrayBuilder> builder; + if (opts.return_fixed_size_list) { + RETURN_NOT_OK(MakeBuilder( + ctx->memory_pool(), + fixed_size_list(value_type, static_cast<int32_t>(opts.stop - opts.start)), + &builder)); + } else { + if constexpr (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type), &builder)); + } else { + RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type), &builder)); + } + } + RETURN_NOT_OK(builder->Reserve(list_.length - 1)); + + // construct array values + if constexpr (std::is_same_v<Type, FixedSizeListType>) { + if (opts.return_fixed_size_list) { + RETURN_NOT_OK( + BuildArrayFromFixedSizeListType<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + return Status::Invalid( + "Requesting ListArray when slicing a FixedSizeList array would always " + "result in a FixedSizeList. Please set `return_fixed_size_list=true` when " + "slicing a FixedSizeList."); + } + } else { + if (opts.return_fixed_size_list) { + RETURN_NOT_OK( + BuildArrayFromListType<FixedSizeListBuilder>(batch, opts, *builder)); + } else { + if (std::is_same_v<Type, LargeListType>) { + RETURN_NOT_OK(BuildArrayFromListType<LargeListBuilder>(batch, opts, *builder)); + } else { + RETURN_NOT_OK(BuildArrayFromListType<ListBuilder>(batch, opts, *builder)); + } + } + } + + // build output arrays and set result + ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish()); + out->value = result->data(); + return Status::OK(); + } + template <typename BuilderType> + static Status BuildArrayFromFixedSizeListType(const ExecSpan& batch, + const ListSliceOptions opts, + ArrayBuilder& builder) { + const auto list_size = + checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size(); + const ArraySpan& list_ = batch[0].array; + const ArraySpan& list_values = list_.child_data[0]; + const offset_type n_offsets = list_values.length / list_size; + + auto list_builder = checked_cast<BuilderType*>(&builder); + for (offset_type offset = 0; offset < n_offsets * list_size; + offset = offset + list_size) { + auto next_offset = offset + list_size; + if (list_values.IsNull(offset)) { Review Comment: I think that works for ListArray, but not FixedSizeListArray, and checking it locally all fixed size input/output fail; think part of that is FixedSizeList doesn't have offsets. The offset here is the calculated offset into the FixedSizeListArray's values. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org