pitrou commented on a change in pull request #11080:
URL: https://github.com/apache/arrow/pull/11080#discussion_r710192494
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -2063,51 +2122,173 @@ struct CoalesceFunctor<Type,
enable_if_base_binary<Type>> {
}
static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
- // Special case: grab any leading non-null scalar or array arguments
+ return ExecVarWidthCoalesceImpl(
+ ctx, batch, out,
+ [&](ArrayBuilder* builder) {
+ int64_t reservation = 0;
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ const ArrayType array(datum.array());
+ reservation = std::max<int64_t>(reservation,
array.total_values_length());
+ } else {
+ const auto& scalar = *datum.scalar();
+ if (scalar.is_valid) {
+ const int64_t size = UnboxScalar<Type>::Unbox(scalar).size();
+ reservation = std::max<int64_t>(reservation, batch.length *
size);
+ }
+ }
+ }
+ return checked_cast<BuilderType*>(builder)->ReserveData(reservation);
+ },
+ [&](ArrayBuilder* builder, const Scalar& scalar) {
+ return checked_cast<BuilderType*>(builder)->Append(
+ UnboxScalar<Type>::Unbox(scalar));
+ });
+ }
+};
+
+template <>
+struct CoalesceFunctor<FixedSizeListType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
for (const auto& datum : batch.values) {
- if (datum.is_scalar()) {
- if (!datum.scalar()->is_valid) continue;
- ARROW_ASSIGN_OR_RAISE(
- *out, MakeArrayFromScalar(*datum.scalar(), batch.length,
ctx->memory_pool()));
- return Status::OK();
- } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
- *out = datum;
- return Status::OK();
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_var_size_list<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
}
- break;
}
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<MapType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <>
+struct CoalesceFunctor<StructType> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalarCoalesce(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
+ std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+ return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+ }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_union<Type>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ // Unions don't have top-level nulls, so a specialized implementation is
needed
+ for (const auto& datum : batch.values) {
+ if (datum.is_array()) {
+ return ExecArray(ctx, batch, out);
+ }
+ }
+ return ExecScalar(ctx, batch, out);
+ }
+
+ static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum*
out) {
ArrayData* output = out->mutable_array();
- BuilderType builder(batch[0].type(), ctx->memory_pool());
- RETURN_NOT_OK(builder.Reserve(batch.length));
+ std::unique_ptr<ArrayBuilder> raw_builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+ RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+
+ // TODO: make sure differing union types are rejected
Review comment:
Do you mean to add a test for this?
##########
File path: c_glib/test/test-sparse-union-scalar.rb
##########
@@ -49,7 +49,7 @@ def test_equal
end
def test_to_s
- assert_equal("...", @scalar.to_s)
+ assert_equal("(2: number: int8 = -29)", @scalar.to_s)
Review comment:
This is a bit cryptic if you don't know a union is expected. What about
e.g. `union{number: int8 = -29}`?
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -1988,6 +1988,65 @@ Status ExecBinaryCoalesce(KernelContext* ctx, Datum
left, Datum right, int64_t l
return Status::OK();
}
+template <typename AppendScalar>
+static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecBatch&
batch,
+ Datum* out,
+ std::function<Status(ArrayBuilder*)>
reserve_data,
+ AppendScalar append_scalar) {
+ // Special case: grab any leading non-null scalar or array arguments
+ for (const auto& datum : batch.values) {
+ if (datum.is_scalar()) {
+ if (!datum.scalar()->is_valid) continue;
+ ARROW_ASSIGN_OR_RAISE(
+ *out, MakeArrayFromScalar(*datum.scalar(), batch.length,
ctx->memory_pool()));
+ return Status::OK();
+ } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
+ *out = datum;
+ return Status::OK();
+ }
+ break;
+ }
+ ArrayData* output = out->mutable_array();
+ std::unique_ptr<ArrayBuilder> raw_builder;
+ RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+ RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+ RETURN_NOT_OK(reserve_data(raw_builder.get()));
+
+ for (int64_t i = 0; i < batch.length; i++) {
+ bool set = false;
+ for (const auto& datum : batch.values) {
+ if (datum.is_scalar()) {
+ if (datum.scalar()->is_valid) {
+ RETURN_NOT_OK(append_scalar(raw_builder.get(), *datum.scalar()));
+ set = true;
+ break;
+ }
+ } else {
+ const ArrayData& source = *datum.array();
+ if (!source.MayHaveNulls() ||
+ BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) {
+ RETURN_NOT_OK(raw_builder->AppendArraySlice(source, i,
/*length=*/1));
Review comment:
I assume this isn't going to be very performant compared to e.g.
`raw_builder->Append(source_array.GetView(i))`. Not necessarily worth
addressing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]