This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 42447e6af6 GH-36327: [C++][CI] Fix Valgrind failures (#36461)
42447e6af6 is described below
commit 42447e6af605a765ba9a43338394441e07c185f4
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Jul 4 21:37:08 2023 +0200
GH-36327: [C++][CI] Fix Valgrind failures (#36461)
* Silence Valgrind error in OpenSSL
* Make sure the entire data buffer of pairwise_diff is initialized
* Make sure the bitmaps of REE-encoded data are initialized
* Closes: #36327
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/kernels/ree_util_internal.cc | 5 ++-
cpp/src/arrow/compute/kernels/vector_pairwise.cc | 51 +++++++++++++---------
.../arrow/compute/kernels/vector_run_end_encode.cc | 3 ++
cpp/valgrind.supp | 6 +++
4 files changed, 42 insertions(+), 23 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/ree_util_internal.cc
b/cpp/src/arrow/compute/kernels/ree_util_internal.cc
index edfefd324f..00c885f6fa 100644
--- a/cpp/src/arrow/compute/kernels/ree_util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/ree_util_internal.cc
@@ -36,7 +36,8 @@ Result<std::shared_ptr<Buffer>> AllocateValuesBuffer(int64_t
length, const DataT
MemoryPool* pool,
int64_t data_buffer_size)
{
if (type.bit_width() == 1) {
- return AllocateBitmap(length, pool);
+ // Make sure the bitmap is initialized (avoids Valgrind errors).
+ return AllocateEmptyBitmap(length, pool);
} else if (is_fixed_width(type.id())) {
return AllocateBuffer(length * type.byte_width(), pool);
} else {
@@ -62,7 +63,7 @@ Result<std::shared_ptr<ArrayData>> PreallocateValuesArray(
std::vector<std::shared_ptr<Buffer>> values_data_buffers;
std::shared_ptr<Buffer> validity_buffer = NULLPTR;
if (has_validity_buffer) {
- ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateBitmap(length, pool));
+ ARROW_ASSIGN_OR_RAISE(validity_buffer, AllocateEmptyBitmap(length, pool));
}
ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateValuesBuffer(length,
*value_type,
pool,
data_buffer_size));
diff --git a/cpp/src/arrow/compute/kernels/vector_pairwise.cc
b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
index 440b1393a3..ccce0abc33 100644
--- a/cpp/src/arrow/compute/kernels/vector_pairwise.cc
+++ b/cpp/src/arrow/compute/kernels/vector_pairwise.cc
@@ -19,7 +19,8 @@
#include <iostream>
#include <memory>
-#include "arrow/builder.h"
+
+#include "arrow/array/builder_base.h"
#include "arrow/compute/api_vector.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
@@ -35,9 +36,9 @@
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"
-#include "arrow/visit_type_inline.h"
namespace arrow::compute::internal {
+namespace {
// We reuse the kernel exec function of a scalar binary function to compute
pairwise
// results. For example, for pairwise_diff, we reuse subtract's kernel exec.
@@ -55,21 +56,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const
ArraySpan& input,
ArrayData* result) {
// We only compute values in the region where the input-with-offset overlaps
// the original input. The margin where these do not overlap gets filled
with null.
- auto margin_length = std::min(abs(periods), input.length);
- auto computed_length = input.length - margin_length;
- auto margin_start = periods > 0 ? 0 : computed_length;
- auto computed_start = periods > 0 ? margin_length : 0;
- auto left_start = computed_start;
- auto right_start = margin_length - computed_start;
- // prepare bitmap
- bit_util::ClearBitmap(result->buffers[0]->mutable_data(), margin_start,
margin_length);
+ const auto margin_length = std::min(abs(periods), input.length);
+ const auto computed_length = input.length - margin_length;
+ const auto computed_start = periods > 0 ? margin_length : 0;
+ const auto left_start = computed_start;
+ const auto right_start = margin_length - computed_start;
+ // prepare null bitmap
+ int64_t null_count = margin_length;
for (int64_t i = computed_start; i < computed_start + computed_length; i++) {
if (input.IsValid(i) && input.IsValid(i - periods)) {
bit_util::SetBit(result->buffers[0]->mutable_data(), i);
} else {
- bit_util::ClearBit(result->buffers[0]->mutable_data(), i);
+ ++null_count;
}
}
+ result->null_count = null_count;
// prepare input span
ArraySpan left(input);
left.SetSlice(left_start, computed_length);
@@ -90,9 +91,21 @@ Status PairwiseExecImpl(KernelContext* ctx, const ArraySpan&
input,
Status PairwiseExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
const auto& state = checked_cast<const PairwiseState&>(*ctx->state());
auto input = batch[0].array;
- RETURN_NOT_OK(PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec,
state.periods,
- out->array_data_mutable()));
- return Status::OK();
+
+ // The scalar diff kernel will only write into the non-null output area.
+ // We must therefore pre-initialize the output, otherwise the left or right
+ // margin would be left uninitialized.
+ ARROW_ASSIGN_OR_RAISE(auto builder,
+ MakeBuilder(out->type()->GetSharedPtr(),
ctx->memory_pool()));
+ // Append nulls rather than empty values, so as to allocate a null bitmap.
+ RETURN_NOT_OK(builder->AppendNulls(out->length()));
+ std::shared_ptr<ArrayData> out_data;
+ RETURN_NOT_OK(builder->FinishInternal(&out_data));
+ out_data->null_count = kUnknownNullCount;
+ out->value = std::move(out_data);
+
+ return PairwiseExecImpl(ctx, batch[0].array, state.scalar_exec,
state.periods,
+ out->array_data_mutable());
}
const FunctionDoc pairwise_diff_doc(
@@ -122,19 +135,13 @@ const PairwiseOptions* GetDefaultPairwiseOptions() {
return &kDefaultPairwiseOptions;
}
-struct PairwiseKernelData {
- InputType input;
- OutputType output;
- ArrayKernelExec exec;
-};
-
void RegisterPairwiseDiffKernels(std::string_view func_name,
std::string_view base_func_name, const
FunctionDoc& doc,
FunctionRegistry* registry) {
VectorKernel kernel;
kernel.can_execute_chunkwise = false;
kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
- kernel.mem_allocation = MemAllocation::PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
kernel.init = OptionsWrapper<PairwiseOptions>::Init;
auto func = std::make_shared<VectorFunction>(std::string(func_name),
Arity::Unary(),
doc,
GetDefaultPairwiseOptions());
@@ -174,6 +181,8 @@ void RegisterPairwiseDiffKernels(std::string_view func_name,
DCHECK_OK(registry->AddFunction(std::move(func)));
}
+} // namespace
+
void RegisterVectorPairwise(FunctionRegistry* registry) {
RegisterPairwiseDiffKernels("pairwise_diff", "subtract", pairwise_diff_doc,
registry);
RegisterPairwiseDiffKernels("pairwise_diff_checked", "subtract_checked",
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index c0f54fbf5d..eef816a149 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -28,6 +28,7 @@
namespace arrow {
namespace compute {
namespace internal {
+namespace {
struct RunEndEncondingState : public KernelState {
explicit RunEndEncondingState(std::shared_ptr<DataType> run_end_type)
@@ -526,6 +527,8 @@ static const FunctionDoc run_end_decode_doc(
"Decode run-end encoded array",
("Return a decoded version of a run-end encoded input array."), {"array"});
+} // namespace
+
void RegisterVectorRunEndEncode(FunctionRegistry* registry) {
auto function = std::make_shared<VectorFunction>("run_end_encode",
Arity::Unary(),
run_end_encode_doc);
diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp
index 2b9959136a..83e21df83a 100644
--- a/cpp/valgrind.supp
+++ b/cpp/valgrind.supp
@@ -69,3 +69,9 @@
...
fun:_dl_allocate_tls
}
+{
+ <OpenSSL>:Conditional jump or move depends on uninitialised value(s)
+ Memcheck:Cond
+ ...
+ fun:*gcm_cipher*
+}