pitrou commented on a change in pull request #11080:
URL: https://github.com/apache/arrow/pull/11080#discussion_r710192494



##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -2063,51 +2122,173 @@ struct CoalesceFunctor<Type, 
enable_if_base_binary<Type>> {
   }
 
   static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
-    // Special case: grab any leading non-null scalar or array arguments
+    return ExecVarWidthCoalesceImpl(
+        ctx, batch, out,
+        [&](ArrayBuilder* builder) {
+          int64_t reservation = 0;
+          for (const auto& datum : batch.values) {
+            if (datum.is_array()) {
+              const ArrayType array(datum.array());
+              reservation = std::max<int64_t>(reservation, 
array.total_values_length());
+            } else {
+              const auto& scalar = *datum.scalar();
+              if (scalar.is_valid) {
+                const int64_t size = UnboxScalar<Type>::Unbox(scalar).size();
+                reservation = std::max<int64_t>(reservation, batch.length * 
size);
+              }
+            }
+          }
+          return checked_cast<BuilderType*>(builder)->ReserveData(reservation);
+        },
+        [&](ArrayBuilder* builder, const Scalar& scalar) {
+          return checked_cast<BuilderType*>(builder)->Append(
+              UnboxScalar<Type>::Unbox(scalar));
+        });
+  }
+};
+
+template <>
+struct CoalesceFunctor<FixedSizeListType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
     for (const auto& datum : batch.values) {
-      if (datum.is_scalar()) {
-        if (!datum.scalar()->is_valid) continue;
-        ARROW_ASSIGN_OR_RAISE(
-            *out, MakeArrayFromScalar(*datum.scalar(), batch.length, 
ctx->memory_pool()));
-        return Status::OK();
-      } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
-        *out = datum;
-        return Status::OK();
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+    std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+    return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+  }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_var_size_list<Type>> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
       }
-      break;
     }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+    std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+    return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+  }
+};
+
+template <>
+struct CoalesceFunctor<MapType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+    std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+    return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+  }
+};
+
+template <>
+struct CoalesceFunctor<StructType> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalarCoalesce(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
+    std::function<Status(ArrayBuilder*)> reserve_data = ReserveNoData;
+    return ExecVarWidthCoalesce(ctx, batch, out, reserve_data);
+  }
+};
+
+template <typename Type>
+struct CoalesceFunctor<Type, enable_if_union<Type>> {
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Unions don't have top-level nulls, so a specialized implementation is 
needed
+    for (const auto& datum : batch.values) {
+      if (datum.is_array()) {
+        return ExecArray(ctx, batch, out);
+      }
+    }
+    return ExecScalar(ctx, batch, out);
+  }
+
+  static Status ExecArray(KernelContext* ctx, const ExecBatch& batch, Datum* 
out) {
     ArrayData* output = out->mutable_array();
-    BuilderType builder(batch[0].type(), ctx->memory_pool());
-    RETURN_NOT_OK(builder.Reserve(batch.length));
+    std::unique_ptr<ArrayBuilder> raw_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+    RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+
+    // TODO: make sure differing union types are rejected

Review comment:
       Do you mean to add a test for this?

##########
File path: c_glib/test/test-sparse-union-scalar.rb
##########
@@ -49,7 +49,7 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal("(2: number: int8 = -29)", @scalar.to_s)

Review comment:
       This is a bit cryptic if you don't know a union is expected. What about 
e.g. `union{number: int8 = -29}`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else.cc
##########
@@ -1988,6 +1988,65 @@ Status ExecBinaryCoalesce(KernelContext* ctx, Datum 
left, Datum right, int64_t l
   return Status::OK();
 }
 
+template <typename AppendScalar>
+static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecBatch& 
batch,
+                                       Datum* out,
+                                       std::function<Status(ArrayBuilder*)> 
reserve_data,
+                                       AppendScalar append_scalar) {
+  // Special case: grab any leading non-null scalar or array arguments
+  for (const auto& datum : batch.values) {
+    if (datum.is_scalar()) {
+      if (!datum.scalar()->is_valid) continue;
+      ARROW_ASSIGN_OR_RAISE(
+          *out, MakeArrayFromScalar(*datum.scalar(), batch.length, 
ctx->memory_pool()));
+      return Status::OK();
+    } else if (datum.is_array() && !datum.array()->MayHaveNulls()) {
+      *out = datum;
+      return Status::OK();
+    }
+    break;
+  }
+  ArrayData* output = out->mutable_array();
+  std::unique_ptr<ArrayBuilder> raw_builder;
+  RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), out->type(), &raw_builder));
+  RETURN_NOT_OK(raw_builder->Reserve(batch.length));
+  RETURN_NOT_OK(reserve_data(raw_builder.get()));
+
+  for (int64_t i = 0; i < batch.length; i++) {
+    bool set = false;
+    for (const auto& datum : batch.values) {
+      if (datum.is_scalar()) {
+        if (datum.scalar()->is_valid) {
+          RETURN_NOT_OK(append_scalar(raw_builder.get(), *datum.scalar()));
+          set = true;
+          break;
+        }
+      } else {
+        const ArrayData& source = *datum.array();
+        if (!source.MayHaveNulls() ||
+            BitUtil::GetBit(source.buffers[0]->data(), source.offset + i)) {
+          RETURN_NOT_OK(raw_builder->AppendArraySlice(source, i, 
/*length=*/1));

Review comment:
       I assume this isn't going to be very performant compared to e.g. 
`raw_builder->Append(source_array.GetView(i))`. Not necessarily worth 
addressing.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to