adriangb commented on issue #6002:
URL: https://github.com/apache/arrow-rs/issues/6002#issuecomment-2221000971
I got my thing working, but it seems quite brittle:
```rust
#[derive(Debug, Clone)]
struct AsyncBytes {
file_size: usize,
data_suffix: Bytes,
min_offset: usize,
max_offset: usize,
}
impl AsyncBytes {
fn new(file_size: usize, data_suffix: Bytes) -> Self {
Self {
file_size,
data_suffix,
min_offset: file_size,
max_offset: file_size,
}
}
fn fetched_range(&self) -> Range<usize> {
self.min_offset..self.max_offset
}
}
impl MetadataFetch for &mut AsyncBytes {
fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_,
ParquetResult<Bytes>> {
self.min_offset = self.min_offset.min(range.start);
self.max_offset = self.max_offset.max(range.end);
let available_range = self.file_size -
self.data_suffix.len()..self.file_size;
if !(available_range.start <= range.start && available_range.end >=
range.end) {
return async move {
let err = format!(
"Attempted to fetch data from outside metadata section:
range={range:?}, available_range={available_range:?}"
);
Err(parquet::errors::ParquetError::General(err))
}
.boxed();
}
// adjust the range to be within the data section
let range = range.start - available_range.start..range.end -
available_range.start;
let data = self.data_suffix.slice(range.start..range.end);
async move { Ok(data) }.boxed()
}
}
pub async fn load_metadata(
file_size: usize,
serialized_parquet_metadata: Bytes,
) -> ParquetResult<Arc<ParquetMetaData>> {
let mut reader = AsyncBytes::new(file_size,
serialized_parquet_metadata.clone());
let loader = MetadataLoader::load(&mut reader, file_size, None).await?;
let loaded_metadata = loader.finish();
let mut metadata = MetadataLoader::new(&mut reader, loaded_metadata);
metadata.load_page_index(true, true).await?;
Ok(Arc::new(metadata.finish()))
}
pub async fn extract_metadata_from_file(file_data: &Bytes) ->
ParquetResult<Vec<u8>> {
let loaded_metadata = parse_metadata(file_data)?;
let mut reader = AsyncBytes::new(file_data.len(), file_data.clone());
let mut metadata = MetadataLoader::new(&mut reader, loaded_metadata);
metadata.load_page_index(true, true).await?;
metadata.finish();
let range = reader.fetched_range().start..file_data.len();
println!("Extracted metadata from range: {range:?}");
Ok(file_data[reader.fetched_range().start..].to_vec())
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]