etseidl commented on code in PR #7371:
URL: https://github.com/apache/arrow-rs/pull/7371#discussion_r2031788668
##########
parquet/src/file/metadata/reader.rs:
##########
@@ -1252,18 +1254,18 @@ mod async_tests {
}
}
- fn read_range(file: &mut File, range: Range<usize>) -> Result<Bytes> {
+ fn read_range(file: &mut File, range: Range<u64>) -> Result<Bytes> {
file.seek(SeekFrom::Start(range.start as _))?;
let len = range.end - range.start;
- let mut buf = Vec::with_capacity(len);
+ let mut buf = Vec::with_capacity(len as usize);
Review Comment:
Being super pedantic, but should this use `try_from`?
##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -150,14 +150,14 @@ impl<T: AsyncFileReader + MetadataFetch + AsyncRead +
AsyncSeek + Unpin> Metadat
}
impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
- fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_,
Result<Bytes>> {
+ fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>>
{
async move {
- self.seek(SeekFrom::Start(range.start as u64)).await?;
+ self.seek(SeekFrom::Start(range.start)).await?;
let to_read = range.end - range.start;
- let mut buffer = Vec::with_capacity(to_read);
- let read = self.take(to_read as u64).read_to_end(&mut
buffer).await?;
- if read != to_read {
+ let mut buffer = Vec::with_capacity(to_read as usize);
Review Comment:
and here
##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -461,14 +461,14 @@ impl<T: AsyncFileReader + Send + 'static>
ParquetRecordBatchStreamBuilder<T> {
}
let bitset = match column_metadata.bloom_filter_length() {
- Some(_) => buffer.slice((bitset_offset as usize - offset)..),
+ Some(_) => buffer.slice((bitset_offset as usize - offset as
usize)..),
Review Comment:
and here
##########
parquet/src/arrow/async_reader/metadata.rs:
##########
@@ -256,8 +263,8 @@ where
F: FnMut(Range<usize>) -> Fut + Send,
Fut: Future<Output = Result<Bytes>> + Send,
{
- fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
- async move { self.0(range).await }.boxed()
+ fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>> {
+ async move { self.0(range.start as usize..range.end as usize).await
}.boxed()
Review Comment:
perhaps here as well
##########
parquet/src/arrow/async_reader/metadata.rs:
##########
@@ -48,12 +48,13 @@ use std::ops::Range;
/// file: tokio::fs::File,
/// }
/// impl MetadataFetch for TokioFileMetadata {
-/// fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_,
Result<Bytes>> {
+/// fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes>>
{
/// // return a future that fetches data in range
/// async move {
-/// let mut buf = vec![0; range.len()]; // target buffer
+/// let len = (range.end - range.start) as usize;
Review Comment:
Same comment here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]