This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fc7b800a0e GH-42140: [C++] Avoid invalid accesses in 
parquet-encoding-benchmark (#42141)
fc7b800a0e is described below

commit fc7b800a0ea38713391dd8772bb77583b6a56e6b
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jun 13 21:11:42 2024 +0200

    GH-42140: [C++] Avoid invalid accesses in parquet-encoding-benchmark 
(#42141)
    
    ### Rationale for this change
    
    `parquet-encoding-benchmark`  would make invalid memory accesses if more 
than one repetition per benchmark is requested by the user.
    
    This was initially noticed in https://github.com/apache/arrow/pull/41205 : 
switching to jemalloc would crash the specific benchmark(s) on a macOS ARM 
machine.
    
    ### What changes are included in this PR?
    
    Make sure benchmark data initialization is idempotent.
    
    ### Are these changes tested?
    
    Locally using Valgrind.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #42140
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/encoding_benchmark.cc | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cpp/src/parquet/encoding_benchmark.cc 
b/cpp/src/parquet/encoding_benchmark.cc
index e74a9f55b1..34d12a624f 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -1233,12 +1233,10 @@ class BenchmarkDecodeArrowByteArray : public 
BenchmarkDecodeArrowBase<ByteArrayT
     valid_bits_ = input_array_->null_bitmap_data();
     total_size_ = input_array_->data()->buffers[2]->size();
 
-    values_.reserve(num_values_);
+    values_.resize(num_values_);
     const auto& binary_array = static_cast<const 
::arrow::BinaryArray&>(*input_array_);
     for (int64_t i = 0; i < binary_array.length(); i++) {
-      auto view = binary_array.GetView(i);
-      values_.emplace_back(static_cast<uint32_t>(view.length()),
-                           reinterpret_cast<const uint8_t*>(view.data()));
+      values_[i] = binary_array.GetView(i);
     }
   }
 
@@ -1431,10 +1429,10 @@ class BenchmarkDecodeArrowBoolean : public 
BenchmarkDecodeArrowBase<BooleanType>
     // so, we uses this as "total_size" for the benchmark.
     total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
 
-    values_.reserve(num_values_);
+    values_.resize(num_values_);
     const auto& boolean_array = static_cast<const 
::arrow::BooleanArray&>(*input_array_);
     for (int64_t i = 0; i < boolean_array.length(); i++) {
-      values_.push_back(boolean_array.Value(i));
+      values_[i] = boolean_array.Value(i);
     }
   }
 

Reply via email to