This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fc7b800a0e GH-42140: [C++] Avoid invalid accesses in
parquet-encoding-benchmark (#42141)
fc7b800a0e is described below
commit fc7b800a0ea38713391dd8772bb77583b6a56e6b
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jun 13 21:11:42 2024 +0200
GH-42140: [C++] Avoid invalid accesses in parquet-encoding-benchmark
(#42141)
### Rationale for this change
`parquet-encoding-benchmark` would make invalid memory accesses if more
than one repetition per benchmark is requested by the user.
This was initially noticed in https://github.com/apache/arrow/pull/41205 :
switching to jemalloc would crash the specific benchmark(s) on a macOS ARM
machine.
### What changes are included in this PR?
Make sure benchmark data initialization is idempotent.
### Are these changes tested?
Locally using Valgrind.
### Are there any user-facing changes?
No.
* GitHub Issue: #42140
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/parquet/encoding_benchmark.cc | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/cpp/src/parquet/encoding_benchmark.cc
b/cpp/src/parquet/encoding_benchmark.cc
index e74a9f55b1..34d12a624f 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -1233,12 +1233,10 @@ class BenchmarkDecodeArrowByteArray : public
BenchmarkDecodeArrowBase<ByteArrayT
valid_bits_ = input_array_->null_bitmap_data();
total_size_ = input_array_->data()->buffers[2]->size();
- values_.reserve(num_values_);
+ values_.resize(num_values_);
const auto& binary_array = static_cast<const
::arrow::BinaryArray&>(*input_array_);
for (int64_t i = 0; i < binary_array.length(); i++) {
- auto view = binary_array.GetView(i);
- values_.emplace_back(static_cast<uint32_t>(view.length()),
- reinterpret_cast<const uint8_t*>(view.data()));
+ values_[i] = binary_array.GetView(i);
}
}
@@ -1431,10 +1429,10 @@ class BenchmarkDecodeArrowBoolean : public
BenchmarkDecodeArrowBase<BooleanType>
// so, we uses this as "total_size" for the benchmark.
total_size_ = ::arrow::bit_util::BytesForBits(num_values_);
- values_.reserve(num_values_);
+ values_.resize(num_values_);
const auto& boolean_array = static_cast<const
::arrow::BooleanArray&>(*input_array_);
for (int64_t i = 0; i < boolean_array.length(); i++) {
- values_.push_back(boolean_array.Value(i));
+ values_[i] = boolean_array.Value(i);
}
}