This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new b597a206a5 Use u32 metadata_len when parsing footer of parquet. (#4599)
b597a206a5 is described below

commit b597a206a59dd10deeff9fa53097b6d14cf169e2
Author: Yuyi Wang <[email protected]>
AuthorDate: Mon Jul 31 21:53:28 2023 +0800

    Use u32 metadata_len when parsing footer of parquet. (#4599)
    
    * Use u32 metadata_len.
    
    * Remove a useless test.
    
    * Fix footer metadata_len type.
---
 parquet/src/file/footer.rs | 20 +++-----------------
 parquet/src/file/writer.rs |  2 +-
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index fcd6a300c5..f4fb2534c2 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -103,13 +103,9 @@ pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> 
Result<usize> {
     }
 
     // get the metadata length from the footer
-    let metadata_len = i32::from_le_bytes(slice[..4].try_into().unwrap());
-    metadata_len.try_into().map_err(|_| {
-        general_err!(
-            "Invalid Parquet file. Metadata length is less than zero ({})",
-            metadata_len
-        )
-    })
+    let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
+    // u32 won't be larger than usize in most cases
+    Ok(metadata_len as usize)
 }
 
 /// Parses column orders from Thrift definition.
@@ -175,16 +171,6 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_parse_metadata_invalid_length() {
-        let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R', 
b'1']);
-        let reader_result = parse_metadata(&test_file);
-        assert_eq!(
-            reader_result.unwrap_err().to_string(),
-            "Parquet error: Invalid Parquet file. Metadata length is less than 
zero (-16777216)"
-        );
-    }
-
     #[test]
     fn test_parse_metadata_invalid_start() {
         let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R', 
b'1']);
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index bde350a1ea..12da085ed2 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -347,7 +347,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         let end_pos = self.buf.bytes_written();
 
         // Write footer
-        let metadata_len = (end_pos - start_pos) as i32;
+        let metadata_len = (end_pos - start_pos) as u32;
 
         self.buf.write_all(&metadata_len.to_le_bytes())?;
         self.buf.write_all(&PARQUET_MAGIC)?;

Reply via email to