etseidl commented on code in PR #7334:
URL: https://github.com/apache/arrow-rs/pull/7334#discussion_r2014954403
##########
parquet/src/file/metadata/reader.rs:
##########
@@ -638,6 +676,57 @@ impl ParquetMetaDataReader {
}
}
+ #[cfg(all(feature = "async", feature = "arrow"))]
+ async fn load_metadata_via_suffix<F: MetadataSuffixFetch>(
+ &self,
+ fetch: &mut F,
+ prefetch: usize,
+ ) -> Result<(ParquetMetaData, Option<(usize, Bytes)>)> {
+ let suffix = fetch.fetch_suffix(prefetch).await?;
+ let suffix_len = suffix.len();
+
+ // TODO: not sure how to restore this error handling here
Review Comment:
I think first ensure prefetch is at least `FOOTER_SIZE`, then error out if
`suffix_len` is not at least `FOOTER_SIZE`.
##########
parquet/src/file/metadata/reader.rs:
##########
@@ -638,6 +676,57 @@ impl ParquetMetaDataReader {
}
}
+ #[cfg(all(feature = "async", feature = "arrow"))]
+ async fn load_metadata_via_suffix<F: MetadataSuffixFetch>(
+ &self,
+ fetch: &mut F,
+ prefetch: usize,
+ ) -> Result<(ParquetMetaData, Option<(usize, Bytes)>)> {
+ let suffix = fetch.fetch_suffix(prefetch).await?;
+ let suffix_len = suffix.len();
+
+ // TODO: not sure how to restore this error handling here
+
+ // let fetch_len = file_size - footer_start;
+ // if suffix_len < fetch_len {
+ // return Err(eof_err!(
+ // "metadata requires {} bytes, but could only read {}",
+ // fetch_len,
+ // suffix_len
+ // ));
+ // }
+
+ let mut footer = [0; FOOTER_SIZE];
+ footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
+
+ let footer = Self::decode_footer_tail(&footer)?;
+ let length = footer.metadata_length();
+
+ // TODO: not sure how to restore this error handling here
Review Comment:
This should no longer be necessary
##########
parquet/src/file/metadata/reader.rs:
##########
@@ -638,6 +676,57 @@ impl ParquetMetaDataReader {
}
}
+ #[cfg(all(feature = "async", feature = "arrow"))]
+ async fn load_metadata_via_suffix<F: MetadataSuffixFetch>(
+ &self,
+ fetch: &mut F,
+ prefetch: usize,
+ ) -> Result<(ParquetMetaData, Option<(usize, Bytes)>)> {
+ let suffix = fetch.fetch_suffix(prefetch).await?;
+ let suffix_len = suffix.len();
+
+ // TODO: not sure how to restore this error handling here
+
+ // let fetch_len = file_size - footer_start;
+ // if suffix_len < fetch_len {
+ // return Err(eof_err!(
+ // "metadata requires {} bytes, but could only read {}",
+ // fetch_len,
+ // suffix_len
+ // ));
+ // }
+
+ let mut footer = [0; FOOTER_SIZE];
+ footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
+
+ let footer = Self::decode_footer_tail(&footer)?;
+ let length = footer.metadata_length();
+
+ // TODO: not sure how to restore this error handling here
+
+ // if file_size < length + FOOTER_SIZE {
+ // return Err(eof_err!(
+ // "file size of {} is less than footer + metadata {}",
+ // file_size,
+ // length + FOOTER_SIZE
+ // ));
+ // }
+
+ // Did not fetch the entire file metadata in the initial read, need to
make a second request
+ let metadata_offset = length + FOOTER_SIZE;
+ if length > suffix_len - FOOTER_SIZE {
+ let meta = fetch.fetch_suffix(metadata_offset).await?;
Review Comment:
should probably make sure enough bytes were fetched here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]