This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f73dbc30b Minor: improve `RowFilter` and `ArrowPredicate` docs (#6301)
f73dbc30b is described below

commit f73dbc30bb80a87d5b57c1cdb0232d15f0a5a75a
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Aug 25 07:06:37 2024 -0400

    Minor: improve `RowFilter` and `ArrowPredicate` docs (#6301)
    
    * Minor: improve `RowFilter` and `ArrowPredicate` docs
    
    * tweak
---
 parquet/src/arrow/arrow_reader/filter.rs | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/filter.rs 
b/parquet/src/arrow/arrow_reader/filter.rs
index 4686e1512..2e22f7e01 100644
--- a/parquet/src/arrow/arrow_reader/filter.rs
+++ b/parquet/src/arrow/arrow_reader/filter.rs
@@ -20,6 +20,8 @@ use arrow_array::{BooleanArray, RecordBatch};
 use arrow_schema::ArrowError;
 
 /// A predicate operating on [`RecordBatch`]
+///
+/// See [`RowFilter`] for more information on the use of this trait.
 pub trait ArrowPredicate: Send + 'static {
     /// Returns the [`ProjectionMask`] that describes the columns required
     /// to evaluate this predicate. All projected columns will be provided in 
the `batch`
@@ -29,7 +31,7 @@ pub trait ArrowPredicate: Send + 'static {
     /// Evaluate this predicate for the given [`RecordBatch`] containing the 
columns
     /// identified by [`Self::projection`]
     ///
-    /// Must return a  [`BooleanArray`] that has the same length as the input
+    /// Must return a [`BooleanArray`] that has the same length as the input
     /// `batch` where each row indicates whether the row should be returned:
     /// * `true`:the row should be returned
     /// * `false` or `null`: the row should not be returned
@@ -68,12 +70,17 @@ where
     }
 }
 
-/// A [`RowFilter`] allows pushing down a filter predicate to skip IO and 
decode
+/// Filter applied *during* the parquet read process
+///
+/// [`RowFilter`] applies predicates in order, after decoding only the columns
+/// required. As predicates eliminate rows, fewer rows from subsequent columns
+/// may be required, thus potentially reducing IO and decode.
 ///
-/// This consists of a list of [`ArrowPredicate`] where only the rows that 
satisfy all
-/// of the predicates will be returned. Any [`RowSelection`] will be applied 
prior
+/// A `RowFilter` consists of a list of [`ArrowPredicate`]s. Only the rows for 
which
+/// all the predicates evaluate to `true` will be returned.
+/// Any [`RowSelection`] provided to the reader will be applied prior
 /// to the first predicate, and each predicate in turn will then be used to 
compute
-/// a more refined [`RowSelection`] to use when evaluating the subsequent 
predicates.
+/// a more refined [`RowSelection`] used when evaluating the subsequent 
predicates.
 ///
 /// Once all predicates have been evaluated, the final [`RowSelection`] is 
applied
 /// to the top-level [`ProjectionMask`] to produce the final output 
[`RecordBatch`].

Reply via email to