This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9015a81d0e GH-44769: [C++][Parquet] Fix read/write of metadata length 
footer on big-endian systems (#44787)
9015a81d0e is described below

commit 9015a81d0e9f9a861509e5e1b6f96c0d8c01a999
Author: Elliott Sales de Andrade <[email protected]>
AuthorDate: Wed Nov 20 10:20:52 2024 -0500

    GH-44769: [C++][Parquet] Fix read/write of metadata length footer on 
big-endian systems (#44787)
    
    ### Rationale for this change
    
    See issue.
    
    ### What changes are included in this PR?
    
    - [Fix writing Parquet metadata length 
footer](https://github.com/apache/arrow/commit/fd3cf89d3c978078d96300f770eccc8e7de4bc40)
    
      By converting the `uint32_t` to little endian before casting to a 
`uint8_t*`, this is always correct in the output file.
     - [Fix reading Parquet metadata length 
footer](https://github.com/apache/arrow/commit/4b1dd1b6e5533096764d2541842b4f5fdf858a4e)
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Reading a Parquet file won't complain about metadata size in the footer, 
though that doesn't guarantee anything else will work yet.
    * GitHub Issue: #44769
    
    Authored-by: Elliott Sales de Andrade <[email protected]>
    Signed-off-by: mwish <[email protected]>
---
 cpp/src/parquet/file_reader.cc |  7 ++++---
 cpp/src/parquet/file_writer.cc | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 3e9eeea6c6..3cc42ae370 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -497,9 +497,10 @@ class SerializedFile : public ParquetFileReader::Contents {
           "is not a parquet file.");
     }
     // Both encrypted/unencrypted footers have the same footer length check.
-    uint32_t metadata_len = ::arrow::util::SafeLoadAs<uint32_t>(
-        reinterpret_cast<const uint8_t*>(footer_buffer->data()) + 
footer_read_size -
-        kFooterSize);
+    uint32_t metadata_len =
+        
::arrow::bit_util::FromLittleEndian(::arrow::util::SafeLoadAs<uint32_t>(
+            reinterpret_cast<const uint8_t*>(footer_buffer->data()) + 
footer_read_size -
+            kFooterSize));
     if (metadata_len > source_size_ - kFooterSize) {
       throw ParquetInvalidOrCorruptedFileException(
           "Parquet file size is ", source_size_,
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index baa9e00da2..854e2161f8 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -426,8 +426,10 @@ class FileSerializer : public ParquetFileWriter::Contents {
       WriteEncryptedFileMetadata(*file_metadata_, sink_.get(), 
footer_encryptor, true);
       PARQUET_ASSIGN_OR_THROW(position, sink_->Tell());
       uint32_t footer_and_crypto_len = static_cast<uint32_t>(position - 
metadata_start);
+      uint32_t footer_and_crypto_len_le =
+          ::arrow::bit_util::ToLittleEndian(footer_and_crypto_len);
       PARQUET_THROW_NOT_OK(
-          sink_->Write(reinterpret_cast<uint8_t*>(&footer_and_crypto_len), 4));
+          sink_->Write(reinterpret_cast<uint8_t*>(&footer_and_crypto_len_le), 
4));
       PARQUET_THROW_NOT_OK(sink_->Write(kParquetEMagic, 4));
     } else {  // Encrypted file with plaintext footer
       file_metadata_ = metadata_->Finish(key_value_metadata_);
@@ -539,7 +541,10 @@ void WriteFileMetaData(const FileMetaData& file_metadata, 
ArrowOutputStream* sin
   metadata_len = static_cast<uint32_t>(position) - metadata_len;
 
   // Write Footer
-  PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 
4));
+  {
+    uint32_t metadata_len_le = ::arrow::bit_util::ToLittleEndian(metadata_len);
+    
PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len_le), 
4));
+  }
   PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4));
 }
 
@@ -562,7 +567,10 @@ void WriteEncryptedFileMetadata(const FileMetaData& 
file_metadata,
     PARQUET_ASSIGN_OR_THROW(position, sink->Tell());
     metadata_len = static_cast<uint32_t>(position) - metadata_len;
 
-    
PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len), 4));
+    {
+      uint32_t metadata_len_le = 
::arrow::bit_util::ToLittleEndian(metadata_len);
+      
PARQUET_THROW_NOT_OK(sink->Write(reinterpret_cast<uint8_t*>(&metadata_len_le), 
4));
+    }
     PARQUET_THROW_NOT_OK(sink->Write(kParquetMagic, 4));
   }
 }

Reply via email to