H-Plus-Time commented on code in PR #6157:
URL: https://github.com/apache/arrow-rs/pull/6157#discussion_r1699776660


##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -162,15 +194,31 @@ impl AsyncFileReader for Box<dyn AsyncFileReader> {
 }
 
 impl<T: AsyncRead + AsyncSeek + Unpin + Send> AsyncFileReader for T {
-    fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, 
Result<Bytes>> {
+    fn get_bytes(&mut self, range: GetRange) -> BoxFuture<'_, Result<Bytes>> {
         async move {
-            self.seek(SeekFrom::Start(range.start as u64)).await?;
-
-            let to_read = range.end - range.start;
-            let mut buffer = Vec::with_capacity(to_read);
-            let read = self.take(to_read as u64).read_to_end(&mut 
buffer).await?;
-            if read != to_read {
-                return Err(eof_err!("expected to read {} bytes, got {}", 
to_read, read));
+            let to_read = match range {
+                GetRange::Suffix(end_offset) => {
+                    self.seek(SeekFrom::End(-(end_offset as i64))).await?;
+                    Some(end_offset)
+                }
+                GetRange::Offset(offset) => {
+                    self.seek(SeekFrom::Start(offset as u64)).await?;
+                    None
+                }
+                GetRange::Bounded(range) => {
+                    self.seek(SeekFrom::Start(range.start as u64)).await?;
+                    Some(range.end - range.start)
+                }
+            };
+            // TODO: figure out a better alternative for Offset ranges
+            let mut buffer = Vec::with_capacity(to_read.unwrap_or(1_024usize));

Review Comment:
   One thing that's worth checking - is there _ever_ a situation where it's 
sensible/vaguely useful for a parquet reader to do an unbounded read (the 
GetRange::Offset variant)?
   
   I would think you'd always have a known upper bound, since knowing where to 
start a read (e.g. read all row groups starting from row group N) implies you 
have the FileMetadata already.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to