lidavidm commented on a change in pull request #10412:
URL: https://github.com/apache/arrow/pull/10412#discussion_r669002350



##########
File path: cpp/src/arrow/compute/kernels/vector_replace.cc
##########
@@ -0,0 +1,510 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/util/bitmap_ops.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+namespace {
+
+Status ReplacementArrayTooShort(int64_t expected, int64_t actual) {
+  return Status::Invalid("Replacement array must be of appropriate length 
(expected ",
+                         expected, " items but got ", actual, " items)");
+}
+
+// Helper to implement replace_with kernel with scalar mask for fixed-width 
types,
+// using callbacks to handle both bool and byte-sized types
+Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array,
+                             const BooleanScalar& mask, const Datum& 
replacements,
+                             ArrayData* output) {
+  if (!mask.is_valid) {
+    // Output = null
+    ARROW_ASSIGN_OR_RAISE(auto replacement_array,
+                          MakeArrayOfNull(array.type, array.length, 
ctx->memory_pool()));
+    *output = *replacement_array->data();
+    return Status::OK();
+  }
+  if (mask.value) {
+    // Output = replacement
+    if (replacements.is_scalar()) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto replacement_array,
+          MakeArrayFromScalar(*replacements.scalar(), array.length, 
ctx->memory_pool()));
+      *output = *replacement_array->data();
+    } else {
+      auto replacement_array = replacements.array();
+      if (replacement_array->length != array.length) {
+        return ReplacementArrayTooShort(array.length, 
replacement_array->length);
+      }
+      *output = *replacement_array;
+    }
+  } else {
+    // Output = input
+    *output = array;
+  }
+  return Status::OK();
+}
+
+struct CopyArrayBitmap {
+  const uint8_t* in_bitmap;
+  int64_t in_offset;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, 
out_bitmap,
+                                out_offset);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset,
+                      BitUtil::GetBit(in_bitmap, in_offset + offset));
+  }
+};
+
+struct CopyScalarBitmap {
+  const bool is_valid;
+
+  void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset,
+                  int64_t length) const {
+    BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid);
+  }
+
+  void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const {
+    BitUtil::SetBitTo(out_bitmap, out_offset, is_valid);
+  }
+};
+
+// Helper to implement replace_with kernel with array mask for fixed-width 
types,
+// using callbacks to handle both bool and byte-sized types and to handle
+// scalar and array replacements
+template <typename Functor, typename Data, typename CopyBitmap>
+void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask,
+                              const Data& replacements, bool 
replacements_bitmap,
+                              const CopyBitmap copy_bitmap, const uint8_t* 
mask_bitmap,

Review comment:
       In this case I intentionally made CopyBitmap itself cheaper to copy than 
to use as a reference - it's <= 2 words though I suppose the compiler will 
optimize it identically either way.
   
   Except, doing this does seem about ~5% slower:
   
   ```
   Before:
   
-------------------------------------------------------------------------------------------------------
   Benchmark                                             Time             CPU   
Iterations UserCounters...
   
-------------------------------------------------------------------------------------------------------
   ReplaceWithMaskLowSelectivityBench/16384/0        33631 ns        33631 ns   
    204971 bytes_per_second=3.62975G/s
   ReplaceWithMaskLowSelectivityBench/16384/99       35018 ns        35017 ns   
    202363 bytes_per_second=3.46498G/s
   ReplaceWithMaskHighSelectivityBench/16384/0       77268 ns        77267 ns   
     90912 bytes_per_second=1.57985G/s
   ReplaceWithMaskHighSelectivityBench/16384/99      75751 ns        75750 ns   
     92444 bytes_per_second=1.60176G/s
   
   After:
   
-------------------------------------------------------------------------------------------------------
   Benchmark                                             Time             CPU   
Iterations UserCounters...
   
-------------------------------------------------------------------------------------------------------
   ReplaceWithMaskLowSelectivityBench/16384/0        35512 ns        35511 ns   
    192582 bytes_per_second=3.43751G/s
   ReplaceWithMaskLowSelectivityBench/16384/99       36702 ns        36701 ns   
    191996 bytes_per_second=3.30598G/s
   ReplaceWithMaskHighSelectivityBench/16384/0       82957 ns        82956 ns   
     85194 bytes_per_second=1.47151G/s
   ReplaceWithMaskHighSelectivityBench/16384/99      80415 ns        80413 ns   
     86354 bytes_per_second=1.50887G/s
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to