lidavidm commented on a change in pull request #11080:
URL: https://github.com/apache/arrow/pull/11080#discussion_r711073279
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -2063,51 +2122,173 @@ struct CoalesceFunctor<Type,
enable_if_base_binary<Type>> {
}
static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
- // Special case: grab any leading non-null scalar or array arguments
+ return ExecVarWidthCoalesceImpl(
+ ctx, batch, out,
+ [&](ArrayBuilder* builder) {
+ int64_t reservation = 0;
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ const ArrayType array(datum.array());
+ reservation = std::max<int64_t>(reservation,
array.total_values_length());
+ } else {
+ const auto& scalar = *datum.scalar();
+ if (scalar.is_valid) {
+ const int64_t size = UnboxScalar<Type>::Unbox(scalar).size();
+ reservation = std::max<int64_t>(reservation, batch.length *
size);
+ }
+ }
+ }
+ return checked_cast<BuilderType*>(builder)->ReserveData(reservation);
+ },
+ [&](ArrayBuilder* builder, const Scalar& scalar) {
+ return checked_cast<BuilderType*>(builder)->Append(
+ UnboxScalar<Type>::Unbox(scalar));
+ });
+ }
+};
+
+template <>
+struct CoalesceFunctor<FixedSizeListType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
for (const auto& datum : batch.values) {
- if (datum.is_scalar()) {
- if (!datum.scalar()->is_valid) continue;
- ARROW_ASSIGN_OR_RAISE(
- *out, MakeArrayFromScalar(*datum.scalar(), batch.length,
ctx->memory_pool()));
- return Status::OK();
- } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
- *out = datum;
- return Status::OK();
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_var_size_list<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
}
- break;
}
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<MapType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<StructType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_union<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ // Unions don't have top-level nulls, so a specialized implementation is
needed
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalar(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
ArrayData* output = out->mutable_array();
- BuilderType builder(batch[0].type(), ctx->memory_pool());
- RETURN_NOT_OK(builder.Reserve(batch.length));
+ std::unique_ptr<ArrayBuilder> raw_builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+ RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+
+ // TODO: make sure differing union types are rejected
Review comment:
Done - I also adjusted some tests and added a set of tests specifically
for the type promotion helpers we use.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]