lidavidm commented on a change in pull request #11080:
URL: https://github.com/apache/arrow/pull/11080#discussion_r710470727
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -2063,51 +2122,173 @@ struct CoalesceFunctor<Type,
enable_if_base_binary<Type>> {
}
static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
- // Special case: grab any leading non-null scalar or array arguments
+ return ExecVarWidthCoalesceImpl(
+ ctx, batch, out,
+ [&](ArrayBuilder* builder) {
+ int64_t reservation = 0;
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ const ArrayType array(datum.array());
+ reservation = std::max<int64_t>(reservation,
array.total_values_length());
+ } else {
+ const auto& scalar = *datum.scalar();
+ if (scalar.is_valid) {
+ const int64_t size = UnboxScalar<Type>::Unbox(scalar).size();
+ reservation = std::max<int64_t>(reservation, batch.length *
size);
+ }
+ }
+ }
+ return checked_cast<BuilderType*>(builder)->ReserveData(reservation);
+ },
+ [&](ArrayBuilder* builder, const Scalar& scalar) {
+ return checked_cast<BuilderType*>(builder)->Append(
+ UnboxScalar<Type>::Unbox(scalar));
+ });
+ }
+};
+
+template <>
+struct CoalesceFunctor<FixedSizeListType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
for (const auto& datum : batch.values) {
- if (datum.is_scalar()) {
- if (!datum.scalar()->is_valid) continue;
- ARROW_ASSIGN_OR_RAISE(
- *out, MakeArrayFromScalar(*datum.scalar(), batch.length,
ctx->memory_pool()));
- return Status::OK();
- } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
- *out = datum;
- return Status::OK();
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_var_size_list<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
}
- break;
}
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<MapType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<StructType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_union<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ // Unions don't have top-level nulls, so a specialized implementation is
needed
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalar(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
ArrayData* output = out->mutable_array();
- BuilderType builder(batch[0].type(), ctx->memory_pool());
- RETURN_NOT_OK(builder.Reserve(batch.length));
+ std::unique_ptr<ArrayBuilder> raw_builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+ RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+
+ // TODO: make sure differing union types are rejected
Review comment:
Actually, one more thing I want to do now is have CheckDispatchBest also
ensure that the promoted ValueDescrs from DispatchBest match the ones given to
the test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]