This is an automated email from the ASF dual-hosted git repository.
pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ed536d18f7 GH-50149: [C++][Parquet] Avoid process abort when encoding
fuzzer encounters OOM (#50150)
ed536d18f7 is described below
commit ed536d18f7afc2c2964211c3625ffd29bf3e8ded
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jun 11 10:02:51 2026 +0200
GH-50149: [C++][Parquet] Avoid process abort when encoding fuzzer
encounters OOM (#50150)
### Rationale for this change
In the Parquet encoding fuzzer, an OOM error when trying to roundtrip the
encoding payload leads to a hard error that is reported as an issue on
OSS-Fuzz. This should be converted to a soft error, i.e. a potential log
message but not a process abort.
### Are these changes tested?
By additional fuzz regression file as well as manually.
### Are there any user-facing changes?
No.
* GitHub Issue: #50149
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/parquet/arrow/fuzz_encoding_internal.cc | 32 +++++++++++++++----------
testing | 2 +-
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
index 4270eb5437..a007739116 100644
--- a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
+++ b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
@@ -290,23 +290,24 @@ struct TypedFuzzEncoding {
}
// Re-encode and re-decode using roundtrip encoding
- {
- auto compare_chunk = [&](int offset, std::span<const c_type>
chunk_values) {
- return CompareChunkAgainstReference(offset, chunk_values);
- };
+ auto compare_chunk = [&](int offset, std::span<const c_type> chunk_values)
{
+ return CompareChunkAgainstReference(offset, chunk_values);
+ };
+ auto do_roundtrip = [&]() -> Status {
auto encoder = MakeEncoder(roundtrip_encoding_);
BEGIN_PARQUET_CATCH_EXCEPTIONS
if constexpr (arrow_supported()) {
encoder->Put(*reference_array_);
auto reencoded_buffer = encoder->FlushValues();
auto reencoded_data = reencoded_buffer->template span_as<uint8_t>();
- auto array = DecodeArrow(roundtrip_encoding_,
reencoded_data).ValueOrDie();
- ARROW_CHECK_OK(array->ValidateFull());
- ARROW_CHECK_OK(CompareAgainstReference(array));
+ ARROW_ASSIGN_OR_RAISE(auto array,
+ DecodeArrow(roundtrip_encoding_,
reencoded_data));
+ RETURN_NOT_OK(array->ValidateFull());
+ RETURN_NOT_OK(CompareAgainstReference(array));
// Compare with reading raw values
for (const int chunk_size : chunk_sizes()) {
- ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_,
reencoded_data,
- chunk_size, compare_chunk));
+ RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data,
+ chunk_size, compare_chunk));
}
} else {
encoder->Put(reference_values_.data(),
@@ -315,14 +316,19 @@ struct TypedFuzzEncoding {
auto reencoded_data = reencoded_buffer->template span_as<uint8_t>();
// Vary chunk sizes
for (const int chunk_size : chunk_sizes()) {
- ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_,
reencoded_data,
- chunk_size, compare_chunk));
+ RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data,
+ chunk_size, compare_chunk));
}
}
END_PARQUET_CATCH_EXCEPTIONS
+ return Status::OK();
+ };
+ Status roundtrip_status = do_roundtrip();
+ // OOM when attempting to roundtrip is not a hard failure, any other error
is.
+ if (!roundtrip_status.IsOutOfMemory()) {
+ ARROW_CHECK_OK(roundtrip_status);
}
-
- return Status::OK();
+ return roundtrip_status;
}
protected:
diff --git a/testing b/testing
index 9cfebfef89..1d74fce2b6 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 9cfebfef8982fb8612e0a2c59059752bd32321a3
+Subproject commit 1d74fce2b6bb30158f254fc292252f4a87fc67a3