alamb commented on a change in pull request #8300:
URL: https://github.com/apache/arrow/pull/8300#discussion_r502332349



##########
File path: rust/parquet/src/file/reader.rs
##########
@@ -18,35 +18,37 @@
 //! Contains file reader API and provides methods to access file metadata, row 
group
 //! readers to read individual column chunks, or access record iterator.
 
-use std::{
-    convert::TryFrom,
-    fs::File,
-    io::{BufReader, Cursor, Read, Seek, SeekFrom},
-    path::Path,
-    rc::Rc,
-};
+use std::{boxed::Box, io::Read, rc::Rc};
 
-use byteorder::{ByteOrder, LittleEndian};
-use parquet_format::{
-    ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData, PageHeader, 
PageType,
-};
-use thrift::protocol::TCompactInputProtocol;
-
-use crate::basic::{ColumnOrder, Compression, Encoding, Type};
 use crate::column::page::PageIterator;
-use crate::column::{
-    page::{Page, PageReader},
-    reader::{ColumnReader, ColumnReaderImpl},
-};
-use crate::compression::{create_codec, Codec};
+use crate::column::{page::PageReader, reader::ColumnReader};
 use crate::errors::{ParquetError, Result};
-use crate::file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC};
+use crate::file::metadata::*;
+pub use crate::file::serialized_reader::{SerializedFileReader, 
SerializedPageReader};
 use crate::record::reader::RowIter;
-use crate::record::Row;
-use crate::schema::types::{
-    self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, Type as SchemaType,
-};
-use crate::util::{io::FileSource, memory::ByteBufferPtr};
+use crate::schema::types::{ColumnDescPtr, SchemaDescPtr, Type as SchemaType};
+
+use crate::basic::Type;
+
+use crate::column::reader::ColumnReaderImpl;
+
+/// Length should return the amount of bytes that implementor contains.
+/// It's mainly used to read the metadata, which is at the end of the source.
+#[allow(clippy::len_without_is_empty)]
+pub trait Length {
+    /// Returns the amount of bytes of the inner source.
+    fn len(&self) -> u64;
+}
+
+/// The ChunkReader trait generates readers of chunks of a source.
+/// For a file system reader, each chunk might contain a clone of File bounded 
on a given range.
+/// For an object store reader, each read can be mapped to a range request.
+pub trait ChunkReader: Length {
+    type T: Read;
+    /// get a serialy readeable slice of the current reader
+    /// This should fail if the slice exceeds the current bounds
+    fn get_read(&self, start: u64, length: usize) -> Result<Self::T>;
+}
 
 // ----------------------------------------------------------------------

Review comment:
       
   When I tried to compile an in-house project with this branch I got the 
following error: 
   
   ```
   
   error[E0432]: unresolved import 
`delorean_parquet::parquet::file::reader::TryClone`
     --> delorean_parquet/src/lib.rs:13:28
      |
   13 |     file::reader::{Length, TryClone},
      |                            ^^^^^^^^ no `TryClone` in 
`parquet::file::reader`
   
   error[E0432]: unresolved import 
`delorean_parquet::parquet::file::reader::TryClone`
   ```
   
   When I tried to use the copy in `util::io` I got: 
   
   ```
   error[E0603]: module `util` is private
     --> delorean_parquet/src/lib.rs:14:5
      |
   14 |     util::io::{TryClone},
      |     ^^^^ private module
      |
   
   ```
   
   When I used the import in `seriailized_reader` I got a similar error: 
   
   ```
   error[E0603]: trait `TryClone` is private
     --> delorean_parquet/src/lib.rs:14:30
      |
   14 |     file::serialized_reader::TryClone,
      |                              ^^^^^^^^ private trait
      |
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to