felipecrv commented on code in PR #42067:
URL: https://github.com/apache/arrow/pull/42067#discussion_r1635626416
##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -148,128 +191,197 @@ struct ListSlice {
return Status::Invalid("`step` must be >= 1, got: ", opts.step);
}
- const ArraySpan& list_array = batch[0].array;
- const Type* list_type = checked_cast<const Type*>(list_array.type);
- const auto value_type = list_type->field(0);
- const auto return_fixed_size_list = opts.return_fixed_size_list.value_or(
- list_type->id() == arrow::Type::FIXED_SIZE_LIST);
- std::unique_ptr<ArrayBuilder> builder;
-
- // should have been checked in resolver
- // if stop not set, then cannot return fixed size list without input being
fixed size
- // list b/c we cannot determine the max list element in type resolving.
- DCHECK(opts.stop.has_value() ||
- (!opts.stop.has_value() && (!return_fixed_size_list ||
- list_type->id() ==
arrow::Type::FIXED_SIZE_LIST)));
-
- // construct array values
- if (return_fixed_size_list) {
- int32_t stop;
- if (opts.stop.has_value()) {
- stop = static_cast<int32_t>(opts.stop.value());
- } else {
- DCHECK_EQ(list_type->id(), arrow::Type::FIXED_SIZE_LIST);
- stop = reinterpret_cast<const
FixedSizeListType*>(list_type)->list_size();
- }
- const auto size = std::max(stop - static_cast<int32_t>(opts.start), 0);
- const auto length = bit_util::CeilDiv(size, opts.step);
- RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(),
- fixed_size_list(value_type,
static_cast<int32_t>(length)),
- &builder));
- RETURN_NOT_OK(BuildArray<FixedSizeListBuilder>(batch, opts, *builder));
- } else {
- if constexpr (std::is_same_v<Type, LargeListType>) {
- RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), large_list(value_type),
&builder));
- RETURN_NOT_OK(BuildArray<LargeListBuilder>(batch, opts, *builder));
- } else {
- RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), list(value_type),
&builder));
- RETURN_NOT_OK(BuildArray<ListBuilder>(batch, opts, *builder));
- }
+ auto* pool = ctx->memory_pool();
+ ARROW_ASSIGN_OR_RAISE(auto output_type_holder, ListSliceOutputType(opts,
*list_type));
+ constexpr auto kInputTypeId = InListType::type_id;
+ auto output_type = output_type_holder.GetSharedPtr();
+ switch (output_type->id()) {
+ case Type::LIST:
+ DCHECK(kInputTypeId == Type::LIST || kInputTypeId ==
Type::FIXED_SIZE_LIST);
+ if constexpr (kInputTypeId == Type::LIST ||
+ kInputTypeId == Type::FIXED_SIZE_LIST) {
Review Comment:
It's always true at runtime or never reached, but the `constexpr` here and
on the other cases prevents the instantiations of
`ListSlice<InListType>::BuildArray<BuilderType>` with combinations of
`InListType` and `BuilderType` that will never have to run.
The `DCHECK` helps ensuring the expression in the `if constexpr` are not too
restrictive.
`LIST` can only be the output type when input is `LIST` or `FIXED_SIZE_LIST`
so only these are needed:
- `ListSlice<ListType>::BuildArray<ListBuilder>`
- `ListSlice<FixedSizeListType>::BuildArray<ListBuilder>`
`BuildArray<ListBuilder>` won't be instantiated for
`ListSlice<LargeListType>`, `ListSlice<ListViewType>`, and
`ListSlice<LargeListViewType>`.
`BuildArray<FixedSizeListBuilder>` is instantiated for all `ListSlice<T>`
because any input can lead to an FSL as output.
Then only `ListSlice<ListViewType>::BuildArray<ListViewBuilder>` and
`ListSlice<LargeListViewType>::BuildArray<LargeListViewBuilder>` are needed.
So instead of 5 X 5 = 25 `BuildArray<>` instances, we end up with only
2+1+5+1+1 = 10 instances in the binary.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]