shehabgamin commented on code in PR #17022: URL: https://github.com/apache/datafusion/pull/17022#discussion_r2264173720
########## datafusion/datasource-parquet/src/file_format.rs: ########## @@ -968,19 +973,41 @@ pub async fn fetch_parquet_metadata( meta: &ObjectMeta, size_hint: Option<usize>, #[allow(unused)] decryption_properties: Option<&FileDecryptionProperties>, -) -> Result<ParquetMetaData> { + file_metadata_cache: Option<Arc<dyn FileMetadataCache>>, +) -> Result<Arc<ParquetMetaData>> { + if let Some(cache) = &file_metadata_cache { + if let Some(cached_metadata) = cache.get(meta) { + if let Some(parquet_metadata) = cached_metadata + .as_any() + .downcast_ref::<CachedParquetMetaData>() + { + return Ok(Arc::clone(parquet_metadata.parquet_metadata())); + } + } + } + let file_size = meta.size; let fetch = ObjectStoreFetch::new(store, meta); let reader = ParquetMetaDataReader::new().with_prefetch_hint(size_hint); - #[cfg(feature = "parquet_encryption")] let reader = reader.with_decryption_properties(decryption_properties); - reader - .load_and_finish(fetch, file_size) - .await - .map_err(DataFusionError::from) + let metadata = Arc::new( + reader + .load_and_finish(fetch, file_size) + .await + .map_err(DataFusionError::from)?, + ); + + if let Some(cache) = file_metadata_cache { + cache.put( + meta, + Arc::new(CachedParquetMetaData::new(Arc::clone(&metadata))), + ); + } Review Comment: Ahh nice catch, will do! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org