Dandandan commented on issue #1363:
URL: 
https://github.com/apache/arrow-datafusion/issues/1363#issuecomment-980063141


   FTR - this is the entire diff:
   
   ```rust
   
   diff --git a/datafusion/src/datasource/file_format/parquet.rs 
b/datafusion/src/datasource/file_format/parquet.rs
   index 7976be791..1d493c274 100644
   --- a/datafusion/src/datasource/file_format/parquet.rs
   +++ b/datafusion/src/datasource/file_format/parquet.rs
   @@ -18,6 +18,7 @@
    //! Parquet format abstractions
    
    use std::any::Any;
   +use std::io::BufRead;
    use std::io::Read;
    use std::sync::Arc;
    
   @@ -321,7 +322,7 @@ impl Length for ChunkObjectReader {
    }
    
    impl ChunkReader for ChunkObjectReader {
   -    type T = Box<dyn Read + Send + Sync>;
   +    type T = Box<dyn BufRead + Send + Sync>;
    
        fn get_read(&self, start: u64, length: usize) -> ParquetResult<Self::T> 
{
            self.0
   diff --git a/datafusion/src/datasource/object_store/local.rs 
b/datafusion/src/datasource/object_store/local.rs
   index b2a2ddfa9..5b87bee1a 100644
   --- a/datafusion/src/datasource/object_store/local.rs
   +++ b/datafusion/src/datasource/object_store/local.rs
   @@ -18,7 +18,7 @@
    //! Object store that represents the Local File System.
    
    use std::fs::{self, File, Metadata};
   -use std::io::{Read, Seek, SeekFrom};
   +use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
    use std::sync::Arc;
    
    use async_trait::async_trait;
   @@ -82,12 +82,15 @@ impl ObjectReader for LocalFileReader {
            &self,
            start: u64,
            length: usize,
   -    ) -> Result<Box<dyn Read + Send + Sync>> {
   +    ) -> Result<Box<dyn BufRead + Send + Sync>> {
            // A new file descriptor is opened for each chunk reader.
            // This okay because chunks are usually fairly large.
            let mut file = File::open(&self.file.path)?;
            file.seek(SeekFrom::Start(start))?;
   -        Ok(Box::new(file.take(length as u64)))
   +        
   +        let file = BufReader::new(file.take(length as u64));
   +        
   +        Ok(Box::new(file))
        }
    
        fn length(&self) -> u64 {
   diff --git a/datafusion/src/datasource/object_store/mod.rs 
b/datafusion/src/datasource/object_store/mod.rs
   index 59e184103..97085c5df 100644
   --- a/datafusion/src/datasource/object_store/mod.rs
   +++ b/datafusion/src/datasource/object_store/mod.rs
   @@ -21,7 +21,7 @@ pub mod local;
    
    use std::collections::HashMap;
    use std::fmt::{self, Debug};
   -use std::io::Read;
   +use std::io::{BufRead, Read};
    use std::pin::Pin;
    use std::sync::{Arc, RwLock};
    
   @@ -48,10 +48,10 @@ pub trait ObjectReader: Send + Sync {
            &self,
            start: u64,
            length: usize,
   -    ) -> Result<Box<dyn Read + Send + Sync>>;
   +    ) -> Result<Box<dyn BufRead + Send + Sync>>;
    
        /// Get reader for the entire file
   -    fn sync_reader(&self) -> Result<Box<dyn Read + Send + Sync>> {
   +    fn sync_reader(&self) -> Result<Box<dyn BufRead + Send + Sync>> {
            self.sync_chunk_reader(0, self.length() as usize)
        }
    
   diff --git a/datafusion/src/physical_plan/file_format/file_stream.rs 
b/datafusion/src/physical_plan/file_format/file_stream.rs
   index 958b1721b..ca3a69a6e 100644
   --- a/datafusion/src/physical_plan/file_format/file_stream.rs
   +++ b/datafusion/src/physical_plan/file_format/file_stream.rs
   @@ -32,13 +32,7 @@ use arrow::{
        record_batch::RecordBatch,
    };
    use futures::Stream;
   -use std::{
   -    io::Read,
   -    iter,
   -    pin::Pin,
   -    sync::Arc,
   -    task::{Context, Poll},
   -};
   +use std::{io::{BufRead, Read}, iter, pin::Pin, sync::Arc, task::{Context, 
Poll}};
    
    use super::PartitionColumnProjector;
    
   @@ -48,12 +42,12 @@ pub type BatchIter = Box<dyn Iterator<Item = 
ArrowResult<RecordBatch>> + Send +
    /// A closure that creates a file format reader (iterator over 
`RecordBatch`) from a `Read` object
    /// and an optional number of required records.
    pub trait FormatReaderOpener:
   -    FnMut(Box<dyn Read + Send + Sync>, &Option<usize>) -> BatchIter + Send 
+ Unpin + 'static
   +    FnMut(Box<dyn BufRead + Send + Sync>, &Option<usize>) -> BatchIter + 
Send + Unpin + 'static
    {
    }
    
    impl<T> FormatReaderOpener for T where
   -    T: FnMut(Box<dyn Read + Send + Sync>, &Option<usize>) -> BatchIter
   +    T: FnMut(Box<dyn BufRead + Send + Sync>, &Option<usize>) -> BatchIter
            + Send
            + Unpin
            + 'static
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to