This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 551eaab8c Minor: improve `ChunkedReader` docs (#6477)
551eaab8c is described below

commit 551eaab8cf02dd351dcafb4a1ffa602dfa7a899a
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Oct 1 14:58:32 2024 -0400

    Minor: improve `ChunkedReader` docs (#6477)
    
    * Minor: improve `ChunkedReader` docs
    
    * Update parquet/src/file/reader.rs
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
    
    * Get pedantic
    
    ---------
    
    Co-authored-by: Raphael Taylor-Davies 
<[email protected]>
---
 parquet/src/file/reader.rs | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/parquet/src/file/reader.rs b/parquet/src/file/reader.rs
index cff921b20..d8a61fafe 100644
--- a/parquet/src/file/reader.rs
+++ b/parquet/src/file/reader.rs
@@ -45,25 +45,38 @@ pub trait Length {
     fn len(&self) -> u64;
 }
 
-/// The ChunkReader trait generates readers of chunks of a source.
+/// Generates [`Read`]ers to read chunks of a Parquet data source.
 ///
-/// For more information see [`File::try_clone`]
+/// The Parquet reader uses [`ChunkReader`] to access Parquet data, allowing
+/// multiple decoders to read concurrently from different locations in the 
same file.
+///
+/// The trait provides:
+/// * random access (via [`Self::get_bytes`])
+/// * sequential (via [`Self::get_read`])
+///
+/// # Provided Implementations
+/// * [`File`] for reading from local file system
+/// * [`Bytes`] for reading from an in-memory buffer
+///
+/// User provided implementations can implement more sophisticated behaviors
+/// such as on-demand buffering or scan sharing.
 pub trait ChunkReader: Length + Send + Sync {
+    /// The concrete type of readers returned by this trait
     type T: Read;
 
-    /// Get a [`Read`] starting at the provided file offset
+    /// Get a [`Read`] instance starting at the provided file offset
     ///
-    /// Subsequent or concurrent calls to [`Self::get_read`] or 
[`Self::get_bytes`] may
-    /// side-effect on previously returned [`Self::T`]. Care should be taken 
to avoid this
-    ///
-    /// See [`File::try_clone`] for more information
+    /// Returned readers follow the model of [`File::try_clone`] where 
mutations
+    /// of one reader affect all readers. Thus subsequent or concurrent calls 
to
+    /// [`Self::get_read`] or [`Self::get_bytes`] may cause side-effects on
+    /// previously returned readers. Callers of `get_read` should take care
+    /// to avoid race conditions.
     fn get_read(&self, start: u64) -> Result<Self::T>;
 
-    /// Get a range as bytes
-    ///
-    /// Concurrent calls to [`Self::get_bytes`] may result in interleaved 
output
+    /// Get a range of data in memory as [`Bytes`]
     ///
-    /// See [`File::try_clone`] for more information
+    /// Similarly to [`Self::get_read`], this method may have side-effects on
+    /// previously returned readers.
     fn get_bytes(&self, start: u64, length: usize) -> Result<Bytes>;
 }
 

Reply via email to