This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new b597a206a5 Use u32 metadata_len when parsing footer of parquet. (#4599)
b597a206a5 is described below
commit b597a206a59dd10deeff9fa53097b6d14cf169e2
Author: Yuyi Wang <[email protected]>
AuthorDate: Mon Jul 31 21:53:28 2023 +0800
Use u32 metadata_len when parsing footer of parquet. (#4599)
* Use u32 metadata_len.
* Remove a useless test.
* Fix footer metadata_len type.
---
parquet/src/file/footer.rs | 20 +++-----------------
parquet/src/file/writer.rs | 2 +-
2 files changed, 4 insertions(+), 18 deletions(-)
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index fcd6a300c5..f4fb2534c2 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -103,13 +103,9 @@ pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) ->
Result<usize> {
}
// get the metadata length from the footer
- let metadata_len = i32::from_le_bytes(slice[..4].try_into().unwrap());
- metadata_len.try_into().map_err(|_| {
- general_err!(
- "Invalid Parquet file. Metadata length is less than zero ({})",
- metadata_len
- )
- })
+ let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap());
+ // u32 won't be larger than usize in most cases
+ Ok(metadata_len as usize)
}
/// Parses column orders from Thrift definition.
@@ -175,16 +171,6 @@ mod tests {
);
}
- #[test]
- fn test_parse_metadata_invalid_length() {
- let test_file = Bytes::from(vec![0, 0, 0, 255, b'P', b'A', b'R',
b'1']);
- let reader_result = parse_metadata(&test_file);
- assert_eq!(
- reader_result.unwrap_err().to_string(),
- "Parquet error: Invalid Parquet file. Metadata length is less than
zero (-16777216)"
- );
- }
-
#[test]
fn test_parse_metadata_invalid_start() {
let test_file = Bytes::from(vec![255, 0, 0, 0, b'P', b'A', b'R',
b'1']);
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index bde350a1ea..12da085ed2 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -347,7 +347,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
let end_pos = self.buf.bytes_written();
// Write footer
- let metadata_len = (end_pos - start_pos) as i32;
+ let metadata_len = (end_pos - start_pos) as u32;
self.buf.write_all(&metadata_len.to_le_bytes())?;
self.buf.write_all(&PARQUET_MAGIC)?;