This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 42447e6af6 GH-36327: [C++][CI] Fix Valgrind failures (#36461)
42447e6af6 is described below

commit 42447e6af605a765ba9a43338394441e07c185f4
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Jul 4 21:37:08 2023 +0200

    GH-36327: [C++][CI] Fix Valgrind failures (#36461)
    
    * Silence Valgrind error in OpenSSL
    * Make sure the entire data buffer of pairwise_diff is initialized
    * Make sure the bitmaps of REE-encoded data are initialized
    
    * Closes: #36327
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/ree_util_internal.cc |  5 ++-
 cpp/src/arrow/compute/kernels/vector_pairwise.cc   | 51 +++++++++++++---------
 .../arrow/compute/kernels/vector_run_end_encode.cc |  3 ++
 cpp/valgrind.supp                                  |  6 +++
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/ree_util_internal.cc 
b/cpp/src/arrow/compute/kernels/ree_util_internal.cc
index edfefd324f..00c885f6fa 100644
--- a/cpp/src/arrow/compute/kernels/ree_util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/ree_util_internal.cc
@@ -36,7 +36,8 @@ Result<std::shared_ptr<Buffer>> AllocateValuesBuffer(int64_t 
length, const DataT
                                                      MemoryPool* pool,
                                                      int64_t data_buffer_size) 
{
   if (type.bit_width() == 1) {
-    return AllocateBitmap(length, pool);
+    // Make sure the bitmap is initialized (avoids Valgrind errors).
+    return AllocateEmptyBitmap(length, pool);
   } else if (is_fixed_width(type.id())) {
     return AllocateBuffer(length * type.byte_width(), pool);
   } else {
@@ -62,7 +63,7 @@ Result<std::shared_ptr<ArrayData>> PreallocateValuesArray(
   std::vector<std::shared_ptr<Buffer>> values_data_buffers;
   std::shared_ptr<Buffer> validity_buffer = NULLPTR;
   if (has_validity_buffer) {
-    ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateBitmap(length, pool));
+    ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateEmptyBitmap(length, pool));
   }
   ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateValuesBuffer(length, 
*value_type,
                                                                  pool, 
data_buffer_size));
diff --git a/cpp/src/arrow/compute/kernels/vector_pairwise.cc 
b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
index 440b1393a3..ccce0abc33 100644
--- a/cpp/src/arrow/compute/kernels/vector_pairwise.cc
+++ b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
@@ -19,7 +19,8 @@
 
 #include <iostream>
 #include <memory>
-#include "arrow/builder.h"
+
+#include "arrow/array/builder_base.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/function.h"
@@ -35,9 +36,9 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
-#include "arrow/visit_type_inline.h"
 
 namespace arrow::compute::internal {
+namespace {
 
 // We reuse the kernel exec function of a scalar binary function to compute 
pairwise
 // results. For example, for pairwise_diff, we reuse subtract's kernel exec.
@@ -55,21 +56,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const 
ArraySpan& input,
                         ArrayData* result) {
   // We only compute values in the region where the input-with-offset overlaps
   // the original input. The margin where these do not overlap gets filled 
with null.
-  auto margin_length = std::min(abs(periods), input.length);
-  auto computed_length = input.length - margin_length;
-  auto margin_start = periods > 0 ? 0 : computed_length;
-  auto computed_start = periods > 0 ? margin_length : 0;
-  auto left_start = computed_start;
-  auto right_start = margin_length - computed_start;
-  // prepare bitmap
-  bit_util::ClearBitmap(result->buffers[0]->mutable_data(), margin_start, 
margin_length);
+  const auto margin_length = std::min(abs(periods), input.length);
+  const auto computed_length = input.length - margin_length;
+  const auto computed_start = periods > 0 ? margin_length : 0;
+  const auto left_start = computed_start;
+  const auto right_start = margin_length - computed_start;
+  // prepare null bitmap
+  int64_t null_count = margin_length;
   for (int64_t i = computed_start; i < computed_start + computed_length; i++) {
     if (input.IsValid(i) && input.IsValid(i - periods)) {
       bit_util::SetBit(result->buffers[0]->mutable_data(), i);
     } else {
-      bit_util::ClearBit(result->buffers[0]->mutable_data(), i);
+      ++null_count;
     }
   }
+  result->null_count = null_count;
   // prepare input span
   ArraySpan left(input);
   left.SetSlice(left_start, computed_length);
@@ -90,9 +91,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const ArraySpan& 
input,
 Status PairwiseExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
   const auto& state = checked_cast<const PairwiseState&>(*ctx->state());
   auto input = batch[0].array;
-  RETURN_NOT_OK(PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec, 
state.periods,
-                                 out->array_data_mutable()));
-  return Status::OK();
+
+  // The scalar diff kernel will only write into the non-null output area.
+  // We must therefore pre-initialize the output, otherwise the left or right
+  // margin would be left uninitialized.
+  ARROW_ASSIGN_OR_RAISE(auto builder,
+                        MakeBuilder(out->type()->GetSharedPtr(), 
ctx->memory_pool()));
+  // Append nulls rather than empty values, so as to allocate a null bitmap.
+  RETURN_NOT_OK(builder->AppendNulls(out->length()));
+  std::shared_ptr<ArrayData> out_data;
+  RETURN_NOT_OK(builder->FinishInternal(&out_data));
+  out_data->null_count = kUnknownNullCount;
+  out->value = std::move(out_data);
+
+  return PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec, 
state.periods,
+                          out->array_data_mutable());
 }
 
 const FunctionDoc pairwise_diff_doc(
@@ -122,19 +135,13 @@ const PairwiseOptions* GetDefaultPairwiseOptions() {
   return &kDefaultPairwiseOptions;
 }
 
-struct PairwiseKernelData {
-  InputType input;
-  OutputType output;
-  ArrayKernelExec exec;
-};
-
 void RegisterPairwiseDiffKernels(std::string_view func_name,
                                  std::string_view base_func_name, const 
FunctionDoc& doc,
                                  FunctionRegistry* registry) {
   VectorKernel kernel;
   kernel.can_execute_chunkwise = false;
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
-  kernel.mem_allocation = MemAllocation::PREALLOCATE;
+  kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
   kernel.init = OptionsWrapper<PairwiseOptions>::Init;
   auto func = std::make_shared<VectorFunction>(std::string(func_name), 
Arity::Unary(),
                                                doc, 
GetDefaultPairwiseOptions());
@@ -174,6 +181,8 @@ void RegisterPairwiseDiffKernels(std::string_view func_name,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+}  // namespace
+
 void RegisterVectorPairwise(FunctionRegistry* registry) {
   RegisterPairwiseDiffKernels("pairwise_diff", "subtract", pairwise_diff_doc, 
registry);
   RegisterPairwiseDiffKernels("pairwise_diff_checked", "subtract_checked",
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc 
b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index c0f54fbf5d..eef816a149 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -28,6 +28,7 @@
 namespace arrow {
 namespace compute {
 namespace internal {
+namespace {
 
 struct RunEndEncondingState : public KernelState {
   explicit RunEndEncondingState(std::shared_ptr<DataType> run_end_type)
@@ -526,6 +527,8 @@ static const FunctionDoc run_end_decode_doc(
     "Decode run-end encoded array",
     ("Return a decoded version of a run-end encoded input array."), {"array"});
 
+}  // namespace
+
 void RegisterVectorRunEndEncode(FunctionRegistry* registry) {
   auto function = std::make_shared<VectorFunction>("run_end_encode", 
Arity::Unary(),
                                                    run_end_encode_doc);
diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp
index 2b9959136a..83e21df83a 100644
--- a/cpp/valgrind.supp
+++ b/cpp/valgrind.supp
@@ -69,3 +69,9 @@
     ...
     fun:_dl_allocate_tls
 }
+{
+    <OpenSSL>:Conditional jump or move depends on uninitialised value(s)
+    Memcheck:Cond
+    ...
+    fun:*gcm_cipher*
+}

Reply via email to