bkietz commented on a change in pull request #10390:
URL: https://github.com/apache/arrow/pull/10390#discussion_r639136875



##########
File path: cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
##########
@@ -516,6 +562,202 @@ std::shared_ptr<ScalarFunction> 
MakeUnarySignedArithmeticFunctionNotNull(
   return func;
 }
 
+using MinMaxState = OptionsWrapper<ElementWiseAggregateOptions>;
+
+// Implement a variadic scalar min/max kernel.
+template <typename OutType, typename Op>
+struct ScalarMinMax {
+  using OutValue = typename GetOutputType<OutType>::T;
+
+  static void ExecScalar(const ExecBatch& batch,
+                         const ElementWiseAggregateOptions& options, Scalar* 
out) {
+    // All arguments are scalar
+    OutValue value{};
+    bool valid = false;
+    for (const auto& arg : batch.values) {
+      // Ignore non-scalar arguments so we can use it in the 
mixed-scalar-and-array case
+      if (!arg.is_scalar()) continue;
+      const auto& scalar = *arg.scalar();
+      if (!scalar.is_valid) {
+        if (options.skip_nulls) continue;
+        out->is_valid = false;
+        return;
+      }
+      if (!valid) {
+        value = UnboxScalar<OutType>::Unbox(scalar);
+        valid = true;
+      } else {
+        value = Op::Call(value, UnboxScalar<OutType>::Unbox(scalar));
+      }
+    }
+    out->is_valid = valid;
+    if (valid) {
+      BoxScalar<OutType>::Box(value, out);
+    }
+  }
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
+    const auto descrs = batch.GetDescriptors();
+    bool all_scalar = true;
+    bool any_scalar = false;
+    size_t first_array_index = batch.values.size();
+    for (size_t i = 0; i < batch.values.size(); i++) {
+      const auto& datum = batch.values[i];
+      all_scalar &= datum.descr().shape == ValueDescr::SCALAR;
+      any_scalar |= datum.descr().shape == ValueDescr::SCALAR;
+      if (first_array_index >= batch.values.size() &&
+          datum.descr().shape == ValueDescr::ARRAY) {
+        first_array_index = i;
+      }
+    }
+    if (all_scalar) {
+      ExecScalar(batch, options, out->scalar().get());
+      return Status::OK();
+    }
+
+    ArrayData* output = out->mutable_array();
+
+    // Exactly one array (output = input)
+    if (batch.values.size() == 1) {
+      *output = *batch[0].array();
+      return Status::OK();
+    }
+
+    // At least one array, two or more arguments
+    DCHECK_GE(first_array_index, 0);
+    DCHECK_LT(first_array_index, batch.values.size());
+    DCHECK(batch.values[first_array_index].is_array());
+    if (any_scalar) {

Review comment:
       Handling scalars should happen before the output == input bail. If the 
folded scalar is null then we can either ignore it (skip_nulls=true) or emit a 
null array without examining any array inputs (skip_nulls=false). After that 
we're guaranteed to have a valid scalar (for which MakeArrayFromScalar will not 
materialize a validity bitmap, so you can drop the BitmapXor etc)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to