This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fb202ee66d GH-43703: [C++][Parquet][CI] Parquet: Introducing more 
bad_data for testing (#43708)
fb202ee66d is described below

commit fb202ee66d73572f46035c5b2f21ac22f74ba951
Author: mwish <[email protected]>
AuthorDate: Thu Aug 15 21:04:39 2024 +0800

    GH-43703: [C++][Parquet][CI] Parquet: Introducing more bad_data for testing 
(#43708)
    
    
    
    ### Rationale for this change
    
    Introducing more bad_data for testing
    
    ### What changes are included in this PR?
    
    * Upgrade parquet-testing
    * Introduce more bad_data
    * Update fuzz generation
    
    ### Are these changes tested?
    
    They're tests :-)
    
    ### Are there any user-facing changes?
    
    no
    
    * GitHub Issue: #43703
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/build-support/fuzzing/generate_corpuses.sh    |  1 +
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 12 +++++++++---
 cpp/submodules/parquet-testing                    |  2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/cpp/build-support/fuzzing/generate_corpuses.sh 
b/cpp/build-support/fuzzing/generate_corpuses.sh
index e3f00e6478..ffd5c54e44 100755
--- a/cpp/build-support/fuzzing/generate_corpuses.sh
+++ b/cpp/build-support/fuzzing/generate_corpuses.sh
@@ -56,4 +56,5 @@ rm -rf ${CORPUS_DIR}
 ${OUT}/parquet-arrow-generate-fuzz-corpus ${CORPUS_DIR}
 # Add Parquet testing examples
 cp ${ARROW_CPP}/submodules/parquet-testing/data/*.parquet ${CORPUS_DIR}
+cp ${ARROW_CPP}/submodules/parquet-testing/bad_data/*.parquet ${CORPUS_DIR}
 ${ARROW_CPP}/build-support/fuzzing/pack_corpus.py ${CORPUS_DIR} 
${OUT}/parquet-arrow-fuzz_seed_corpus.zip
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc 
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index aad1e933c4..64030e0f90 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -5298,14 +5298,20 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
 
 TEST(TestArrowReadWrite, FuzzReader) {
   constexpr size_t kMaxFileSize = 1024 * 1024 * 1;
-  {
-    auto path = test::get_data_file("PARQUET-1481.parquet", /*is_good=*/false);
+  auto check_bad_file = [&](const std::string& file_name) {
+    SCOPED_TRACE(file_name);
+    auto path = test::get_data_file(file_name, /*is_good=*/false);
     PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
                                              path, 
::arrow::io::FileMode::READ));
     PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
     auto s = internal::FuzzReader(buffer->data(), buffer->size());
     ASSERT_NOT_OK(s);
-  }
+  };
+  check_bad_file("PARQUET-1481.parquet");
+  check_bad_file("ARROW-GH-41317.parquet");
+  check_bad_file("ARROW-GH-41321.parquet");
+  check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet");
+  check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet");
   {
     auto path = test::get_data_file("alltypes_plain.parquet", 
/*is_good=*/true);
     PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 74278bc4a1..cb7a967414 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 74278bc4a1122d74945969e6dec405abd1533ec3
+Subproject commit cb7a9674142c137367bf75a01b79c6e214a73199

Reply via email to