This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fb202ee66d GH-43703: [C++][Parquet][CI] Parquet: Introducing more
bad_data for testing (#43708)
fb202ee66d is described below
commit fb202ee66d73572f46035c5b2f21ac22f74ba951
Author: mwish <[email protected]>
AuthorDate: Thu Aug 15 21:04:39 2024 +0800
GH-43703: [C++][Parquet][CI] Parquet: Introducing more bad_data for testing
(#43708)
### Rationale for this change
Introducing more bad_data for testing
### What changes are included in this PR?
* Upgrade parquet-testing
* Introduce more bad_data
* Update fuzz generation
### Are these changes tested?
They're tests :-)
### Are there any user-facing changes?
no
* GitHub Issue: #43703
Authored-by: mwish <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/build-support/fuzzing/generate_corpuses.sh | 1 +
cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 12 +++++++++---
cpp/submodules/parquet-testing | 2 +-
3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/cpp/build-support/fuzzing/generate_corpuses.sh
b/cpp/build-support/fuzzing/generate_corpuses.sh
index e3f00e6478..ffd5c54e44 100755
--- a/cpp/build-support/fuzzing/generate_corpuses.sh
+++ b/cpp/build-support/fuzzing/generate_corpuses.sh
@@ -56,4 +56,5 @@ rm -rf ${CORPUS_DIR}
${OUT}/parquet-arrow-generate-fuzz-corpus ${CORPUS_DIR}
# Add Parquet testing examples
cp ${ARROW_CPP}/submodules/parquet-testing/data/*.parquet ${CORPUS_DIR}
+cp ${ARROW_CPP}/submodules/parquet-testing/bad_data/*.parquet ${CORPUS_DIR}
${ARROW_CPP}/build-support/fuzzing/pack_corpus.py ${CORPUS_DIR}
${OUT}/parquet-arrow-fuzz_seed_corpus.zip
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index aad1e933c4..64030e0f90 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -5298,14 +5298,20 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
TEST(TestArrowReadWrite, FuzzReader) {
constexpr size_t kMaxFileSize = 1024 * 1024 * 1;
- {
- auto path = test::get_data_file("PARQUET-1481.parquet", /*is_good=*/false);
+ auto check_bad_file = [&](const std::string& file_name) {
+ SCOPED_TRACE(file_name);
+ auto path = test::get_data_file(file_name, /*is_good=*/false);
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
path,
::arrow::io::FileMode::READ));
PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
auto s = internal::FuzzReader(buffer->data(), buffer->size());
ASSERT_NOT_OK(s);
- }
+ };
+ check_bad_file("PARQUET-1481.parquet");
+ check_bad_file("ARROW-GH-41317.parquet");
+ check_bad_file("ARROW-GH-41321.parquet");
+ check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet");
+ check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet");
{
auto path = test::get_data_file("alltypes_plain.parquet",
/*is_good=*/true);
PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 74278bc4a1..cb7a967414 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 74278bc4a1122d74945969e6dec405abd1533ec3
+Subproject commit cb7a9674142c137367bf75a01b79c6e214a73199