lidavidm commented on a change in pull request #10412: URL: https://github.com/apache/arrow/pull/10412#discussion_r669002350
########## File path: cpp/src/arrow/compute/kernels/vector_replace.cc ########## @@ -0,0 +1,510 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/api_scalar.h" +#include "arrow/compute/kernels/common.h" +#include "arrow/util/bitmap_ops.h" + +namespace arrow { +namespace compute { +namespace internal { + +namespace { + +Status ReplacementArrayTooShort(int64_t expected, int64_t actual) { + return Status::Invalid("Replacement array must be of appropriate length (expected ", + expected, " items but got ", actual, " items)"); +} + +// Helper to implement replace_with kernel with scalar mask for fixed-width types, +// using callbacks to handle both bool and byte-sized types +Status ReplaceWithScalarMask(KernelContext* ctx, const ArrayData& array, + const BooleanScalar& mask, const Datum& replacements, + ArrayData* output) { + if (!mask.is_valid) { + // Output = null + ARROW_ASSIGN_OR_RAISE(auto replacement_array, + MakeArrayOfNull(array.type, array.length, ctx->memory_pool())); + *output = *replacement_array->data(); + return Status::OK(); + } + if (mask.value) { + // Output = replacement + if (replacements.is_scalar()) { + ARROW_ASSIGN_OR_RAISE( + auto replacement_array, + MakeArrayFromScalar(*replacements.scalar(), array.length, ctx->memory_pool())); + *output = *replacement_array->data(); + } else { + auto replacement_array = replacements.array(); + if (replacement_array->length != array.length) { + return ReplacementArrayTooShort(array.length, replacement_array->length); + } + *output = *replacement_array; + } + } else { + // Output = input + *output = array; + } + return Status::OK(); +} + +struct CopyArrayBitmap { + const uint8_t* in_bitmap; + int64_t in_offset; + + void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset, + int64_t length) const { + arrow::internal::CopyBitmap(in_bitmap, in_offset + offset, length, out_bitmap, + out_offset); + } + + void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const { + BitUtil::SetBitTo(out_bitmap, out_offset, + BitUtil::GetBit(in_bitmap, in_offset + offset)); + } +}; + +struct CopyScalarBitmap { + const bool is_valid; + + void CopyBitmap(uint8_t* out_bitmap, int64_t out_offset, int64_t offset, + int64_t length) const { + BitUtil::SetBitsTo(out_bitmap, out_offset, length, is_valid); + } + + void SetBit(uint8_t* out_bitmap, int64_t out_offset, int64_t offset) const { + BitUtil::SetBitTo(out_bitmap, out_offset, is_valid); + } +}; + +// Helper to implement replace_with kernel with array mask for fixed-width types, +// using callbacks to handle both bool and byte-sized types and to handle +// scalar and array replacements +template <typename Functor, typename Data, typename CopyBitmap> +void ReplaceWithArrayMaskImpl(const ArrayData& array, const ArrayData& mask, + const Data& replacements, bool replacements_bitmap, + const CopyBitmap copy_bitmap, const uint8_t* mask_bitmap, Review comment: In this case I intentionally made CopyBitmap itself cheaper to copy than to use as a reference - it's <= 2 words though I suppose the compiler will optimize it identically either way. Except, doing this does seem about ~5% slower: ``` Before: ------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------------------------------- ReplaceWithMaskLowSelectivityBench/16384/0 33631 ns 33631 ns 204971 bytes_per_second=3.62975G/s ReplaceWithMaskLowSelectivityBench/16384/99 35018 ns 35017 ns 202363 bytes_per_second=3.46498G/s ReplaceWithMaskHighSelectivityBench/16384/0 77268 ns 77267 ns 90912 bytes_per_second=1.57985G/s ReplaceWithMaskHighSelectivityBench/16384/99 75751 ns 75750 ns 92444 bytes_per_second=1.60176G/s After: ------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... ------------------------------------------------------------------------------------------------------- ReplaceWithMaskLowSelectivityBench/16384/0 35512 ns 35511 ns 192582 bytes_per_second=3.43751G/s ReplaceWithMaskLowSelectivityBench/16384/99 36702 ns 36701 ns 191996 bytes_per_second=3.30598G/s ReplaceWithMaskHighSelectivityBench/16384/0 82957 ns 82956 ns 85194 bytes_per_second=1.47151G/s ReplaceWithMaskHighSelectivityBench/16384/99 80415 ns 80413 ns 86354 bytes_per_second=1.50887G/s ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
