This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new cfb9807646 Deprecate `ArrowReaderOptions::with_page_index` and update 
API (#9199)
cfb9807646 is described below

commit cfb9807646eaa3e577b406fcfe18312960a59f99
Author: Matthew Kim <[email protected]>
AuthorDate: Sat Jan 17 06:12:29 2026 -1000

    Deprecate `ArrowReaderOptions::with_page_index` and update API (#9199)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/9197
    
    # Rationale for this change
    
    This PR deprecates `ArrowReaderOptions::with_page_index(bool)` in favor
    of `with_page_index_policy(PageIndexPolicy)` to align with the
    `ParquetMetadataReader` api. The underlying implementation continues to
    use separate `column_index` and `offset_index` fields
    
    
    
    # Are there any user-facing changes?
    
    Yes, some methods are deprecated
---
 parquet/benches/arrow_reader_clickbench.rs    |   3 +-
 parquet/benches/arrow_statistics.rs           |   8 ++-
 parquet/examples/external_metadata.rs         |   2 +-
 parquet/src/arrow/arrow_reader/mod.rs         | 100 +++++++++++++++++++-------
 parquet/src/arrow/async_reader/mod.rs         |  50 +++++++------
 parquet/src/arrow/async_reader/store.rs       |  20 +++---
 parquet/src/file/metadata/reader.rs           |   4 +-
 parquet/tests/arrow_reader/io/async_reader.rs |   7 +-
 parquet/tests/arrow_reader/io/mod.rs          |   5 +-
 parquet/tests/arrow_reader/io/sync_reader.rs  |   3 +-
 parquet/tests/arrow_reader/predicate_cache.rs |  13 ++--
 parquet/tests/arrow_reader/statistics.rs      |   5 +-
 parquet/tests/arrow_writer_layout.rs          |   4 +-
 parquet/tests/encryption/encryption.rs        |   7 +-
 parquet/tests/encryption/encryption_async.rs  |   3 +-
 15 files changed, 154 insertions(+), 80 deletions(-)

diff --git a/parquet/benches/arrow_reader_clickbench.rs 
b/parquet/benches/arrow_reader_clickbench.rs
index e737a4cad1..32035c772b 100644
--- a/parquet/benches/arrow_reader_clickbench.rs
+++ b/parquet/benches/arrow_reader_clickbench.rs
@@ -42,6 +42,7 @@ use parquet::arrow::arrow_reader::{
     ParquetRecordBatchReaderBuilder, RowFilter,
 };
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::schema::types::SchemaDescriptor;
 use std::fmt::{Display, Formatter};
 use std::path::{Path, PathBuf};
@@ -847,7 +848,7 @@ fn column_indices(schema: &SchemaDescriptor, column_names: 
&Vec<&str>) -> Vec<us
 /// Loads Parquet metadata from the given path, including page indexes
 fn load_metadata(path: &Path) -> ArrowReaderMetadata {
     let file = std::fs::File::open(path).unwrap();
-    let options = ArrowReaderOptions::new().with_page_index(true);
+    let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(true));
     let orig_metadata =
         ArrowReaderMetadata::load(&file, 
options.clone()).expect("parquet-metadata loading failed");
 
diff --git a/parquet/benches/arrow_statistics.rs 
b/parquet/benches/arrow_statistics.rs
index f825883e32..a4aa9d137e 100644
--- a/parquet/benches/arrow_statistics.rs
+++ b/parquet/benches/arrow_statistics.rs
@@ -24,7 +24,10 @@ use arrow_schema::{
     Field, Schema,
 };
 use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
-use parquet::{arrow::arrow_reader::ArrowReaderOptions, 
file::properties::WriterProperties};
+use parquet::{
+    arrow::arrow_reader::ArrowReaderOptions,
+    file::{metadata::PageIndexPolicy, properties::WriterProperties},
+};
 use parquet::{
     arrow::{ArrowWriter, arrow_reader::ArrowReaderBuilder},
     file::properties::EnabledStatistics,
@@ -195,7 +198,8 @@ fn criterion_benchmark(c: &mut Criterion) {
         for data_page_row_count_limit in &data_page_row_count_limits {
             let file = create_parquet_file(dtype.clone(), row_groups, 
data_page_row_count_limit);
             let file = file.reopen().unwrap();
-            let options = ArrowReaderOptions::new().with_page_index(true);
+            let options =
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(true));
             let reader = ArrowReaderBuilder::try_new_with_options(file, 
options).unwrap();
             let metadata = reader.metadata();
             let row_groups = metadata.row_groups();
diff --git a/parquet/examples/external_metadata.rs 
b/parquet/examples/external_metadata.rs
index 9370016049..eeb1d90d3c 100644
--- a/parquet/examples/external_metadata.rs
+++ b/parquet/examples/external_metadata.rs
@@ -189,7 +189,7 @@ async fn read_remote_parquet_file_with_metadata(
 ) -> Vec<RecordBatch> {
     let options = ArrowReaderOptions::new()
         // tell the reader to read the page index
-        .with_page_index(true);
+        .with_page_index_policy(PageIndexPolicy::from(true));
     // create a reader with pre-existing metadata
     let arrow_reader_metadata = ArrowReaderMetadata::try_new(metadata.into(), 
options).unwrap();
     let reader =
diff --git a/parquet/src/arrow/arrow_reader/mod.rs 
b/parquet/src/arrow/arrow_reader/mod.rs
index cb172e1e38..29346f0b27 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -455,8 +455,10 @@ pub struct ArrowReaderOptions {
     ///
     /// [ARROW_SCHEMA_META_KEY]: crate::arrow::ARROW_SCHEMA_META_KEY
     supplied_schema: Option<SchemaRef>,
-    /// Policy for reading offset and column indexes.
-    pub(crate) page_index_policy: PageIndexPolicy,
+
+    pub(crate) column_index: PageIndexPolicy,
+    pub(crate) offset_index: PageIndexPolicy,
+
     /// Options to control reading of Parquet metadata
     metadata_options: ParquetMetaDataOptions,
     /// If encryption is enabled, the file decryption properties can be 
provided
@@ -601,6 +603,7 @@ impl ArrowReaderOptions {
         }
     }
 
+    #[deprecated(since = "57.2.0", note = "Use `with_page_index_policy` 
instead")]
     /// Enable reading the [`PageIndex`] from the metadata, if present 
(defaults to `false`)
     ///
     /// The `PageIndex` can be used to push down predicates to the parquet 
scan,
@@ -614,22 +617,41 @@ impl ArrowReaderOptions {
     /// [`ParquetMetaData::column_index`]: 
crate::file::metadata::ParquetMetaData::column_index
     /// [`ParquetMetaData::offset_index`]: 
crate::file::metadata::ParquetMetaData::offset_index
     pub fn with_page_index(self, page_index: bool) -> Self {
-        let page_index_policy = PageIndexPolicy::from(page_index);
-
-        Self {
-            page_index_policy,
-            ..self
-        }
+        self.with_page_index_policy(PageIndexPolicy::from(page_index))
     }
 
-    /// Set the [`PageIndexPolicy`] to determine how page indexes should be 
read.
+    /// Sets the [`PageIndexPolicy`] for both the column and offset indexes.
+    ///
+    /// The `PageIndex` consists of two structures: the `ColumnIndex` and 
`OffsetIndex`.
+    /// This method sets the same policy for both. For fine-grained control, 
use
+    /// [`Self::with_column_index_policy`] and 
[`Self::with_offset_index_policy`].
     ///
-    /// See [`Self::with_page_index`] for more details.
+    /// See [`Self::with_page_index`] for more details on page indexes.
     pub fn with_page_index_policy(self, policy: PageIndexPolicy) -> Self {
-        Self {
-            page_index_policy: policy,
-            ..self
-        }
+        self.with_column_index_policy(policy)
+            .with_offset_index_policy(policy)
+    }
+
+    /// Sets the [`PageIndexPolicy`] for the Parquet [ColumnIndex] structure.
+    ///
+    /// The `ColumnIndex` contains min/max statistics for each page, which can 
be used
+    /// for predicate pushdown and page-level pruning.
+    ///
+    /// [ColumnIndex]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+    pub fn with_column_index_policy(mut self, policy: PageIndexPolicy) -> Self 
{
+        self.column_index = policy;
+        self
+    }
+
+    /// Sets the [`PageIndexPolicy`] for the Parquet [OffsetIndex] structure.
+    ///
+    /// The `OffsetIndex` contains the locations and sizes of each page, which 
enables
+    /// efficient page-level skipping and random access within column chunks.
+    ///
+    /// [OffsetIndex]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+    pub fn with_offset_index_policy(mut self, policy: PageIndexPolicy) -> Self 
{
+        self.offset_index = policy;
+        self
     }
 
     /// Provide a Parquet schema to use when decoding the metadata. The schema 
in the Parquet
@@ -766,11 +788,34 @@ impl ArrowReaderOptions {
         })
     }
 
-    /// Retrieve the currently set page index behavior.
+    #[deprecated(
+        since = "57.2.0",
+        note = "Use `column_index_policy` or `offset_index_policy` instead"
+    )]
+    /// Returns whether page index reading is enabled.
+    ///
+    /// This returns `true` if both the column index and offset index policies 
are not [`PageIndexPolicy::Skip`].
     ///
-    /// This can be set via [`with_page_index`][Self::with_page_index].
+    /// This can be set via [`with_page_index`][Self::with_page_index] or
+    /// [`with_page_index_policy`][Self::with_page_index_policy].
     pub fn page_index(&self) -> bool {
-        self.page_index_policy != PageIndexPolicy::Skip
+        self.offset_index != PageIndexPolicy::Skip && self.column_index != 
PageIndexPolicy::Skip
+    }
+
+    /// Retrieve the currently set [`PageIndexPolicy`] for the offset index.
+    ///
+    /// This can be set via 
[`with_offset_index_policy`][Self::with_offset_index_policy]
+    /// or [`with_page_index_policy`][Self::with_page_index_policy].
+    pub fn offset_index_policy(&self) -> PageIndexPolicy {
+        self.offset_index
+    }
+
+    /// Retrieve the currently set [`PageIndexPolicy`] for the column index.
+    ///
+    /// This can be set via 
[`with_column_index_policy`][Self::with_column_index_policy]
+    /// or [`with_page_index_policy`][Self::with_page_index_policy].
+    pub fn column_index_policy(&self) -> PageIndexPolicy {
+        self.column_index
     }
 
     /// Retrieve the currently set metadata decoding options.
@@ -826,7 +871,8 @@ impl ArrowReaderMetadata {
     /// to load the page index by making an object store request.
     pub fn load<T: ChunkReader>(reader: &T, options: ArrowReaderOptions) -> 
Result<Self> {
         let metadata = ParquetMetaDataReader::new()
-            .with_page_index_policy(options.page_index_policy)
+            .with_column_index_policy(options.column_index)
+            .with_offset_index_policy(options.offset_index)
             .with_metadata_options(Some(options.metadata_options.clone()));
         #[cfg(feature = "encryption")]
         let metadata = metadata.with_decryption_properties(
@@ -1551,7 +1597,7 @@ pub(crate) mod tests {
         FloatType, Int32Type, Int64Type, Int96, Int96Type,
     };
     use crate::errors::Result;
-    use crate::file::metadata::{ParquetMetaData, ParquetStatisticsPolicy};
+    use crate::file::metadata::{PageIndexPolicy, ParquetMetaData, 
ParquetStatisticsPolicy};
     use crate::file::properties::{EnabledStatistics, WriterProperties, 
WriterVersion};
     use crate::file::writer::SerializedFileWriter;
     use crate::schema::parser::parse_message_type;
@@ -3348,8 +3394,9 @@ pub(crate) mod tests {
 
         file.rewind().unwrap();
 
-        let options = ArrowReaderOptions::new()
-            .with_page_index(opts.enabled_statistics == 
EnabledStatistics::Page);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(
+            opts.enabled_statistics == EnabledStatistics::Page,
+        ));
 
         let mut builder =
             ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options).unwrap();
@@ -4757,7 +4804,8 @@ pub(crate) mod tests {
             batch_size: usize,
             selections: RowSelection,
         ) -> ParquetRecordBatchReader {
-            let options = ArrowReaderOptions::new().with_page_index(true);
+            let options =
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
             let file = test_file.try_clone().unwrap();
             ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options)
                 .unwrap()
@@ -4796,7 +4844,7 @@ pub(crate) mod tests {
             let test_file = File::open(path).unwrap();
             let builder = 
ParquetRecordBatchReaderBuilder::try_new_with_options(
                 test_file,
-                ArrowReaderOptions::new().with_page_index(true),
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required),
             )
             .unwrap();
             assert!(!builder.metadata().offset_index().unwrap()[0].is_empty());
@@ -4811,7 +4859,7 @@ pub(crate) mod tests {
             let test_file = File::open(path).unwrap();
             let builder = 
ParquetRecordBatchReaderBuilder::try_new_with_options(
                 test_file,
-                ArrowReaderOptions::new().with_page_index(true),
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required),
             )
             .unwrap();
             // Although `Vec<Vec<PageLoacation>>` of each row group is empty,
@@ -5583,7 +5631,7 @@ pub(crate) mod tests {
         writer.close().unwrap();
         let data = Bytes::from(buffer);
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let builder =
             
ParquetRecordBatchReaderBuilder::try_new_with_options(data.clone(), 
options).unwrap();
         let schema = builder.parquet_schema().clone();
@@ -5598,7 +5646,7 @@ pub(crate) mod tests {
             })
         };
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let predicate = make_predicate(filter_mask.clone());
 
         // The batch size is set to 12 to read all rows in one go after 
filtering
diff --git a/parquet/src/arrow/async_reader/mod.rs 
b/parquet/src/arrow/async_reader/mod.rs
index 60f2ca1615..38eef7343e 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -45,7 +45,7 @@ use crate::bloom_filter::{
     SBBF_HEADER_SIZE_ESTIMATE, Sbbf, chunk_read_bloom_filter_header_and_offset,
 };
 use crate::errors::{ParquetError, Result};
-use crate::file::metadata::{PageIndexPolicy, ParquetMetaData, 
ParquetMetaDataReader};
+use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
 
 mod metadata;
 pub use metadata::*;
@@ -165,11 +165,14 @@ impl<T: AsyncRead + AsyncSeek + Unpin + Send> 
AsyncFileReader for T {
     ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
         async move {
             let metadata_opts = options.map(|o| o.metadata_options().clone());
-            let metadata_reader = ParquetMetaDataReader::new()
-                .with_page_index_policy(PageIndexPolicy::from(
-                    options.is_some_and(|o| o.page_index()),
-                ))
-                .with_metadata_options(metadata_opts);
+            let mut metadata_reader =
+                
ParquetMetaDataReader::new().with_metadata_options(metadata_opts);
+
+            if let Some(opts) = options {
+                metadata_reader = metadata_reader
+                    .with_column_index_policy(opts.column_index_policy())
+                    .with_offset_index_policy(opts.offset_index_policy());
+            }
 
             #[cfg(feature = "encryption")]
             let metadata_reader = metadata_reader.with_decryption_properties(
@@ -775,6 +778,7 @@ mod tests {
     use crate::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
     use crate::arrow::schema::virtual_type::RowNumber;
     use crate::arrow::{ArrowWriter, AsyncArrowWriter, ProjectionMask};
+    use crate::file::metadata::PageIndexPolicy;
     use crate::file::metadata::ParquetMetaDataReader;
     use crate::file::properties::WriterProperties;
     use arrow::compute::kernels::cmp::eq;
@@ -829,9 +833,12 @@ mod tests {
             &'a mut self,
             options: Option<&'a ArrowReaderOptions>,
         ) -> BoxFuture<'a, Result<Arc<ParquetMetaData>>> {
-            let metadata_reader = 
ParquetMetaDataReader::new().with_page_index_policy(
-                PageIndexPolicy::from(options.is_some_and(|o| o.page_index())),
-            );
+            let mut metadata_reader = ParquetMetaDataReader::new();
+            if let Some(opts) = options {
+                metadata_reader = metadata_reader
+                    .with_column_index_policy(opts.column_index_policy())
+                    .with_offset_index_policy(opts.offset_index_policy());
+            }
             self.metadata = Some(Arc::new(
                 metadata_reader.parse_and_finish(&self.data).unwrap(),
             ));
@@ -953,7 +960,7 @@ mod tests {
 
         let async_reader = TestReader::new(data.clone());
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let builder = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
             .await
             .unwrap();
@@ -1055,7 +1062,7 @@ mod tests {
 
         let async_reader = TestReader::new(data.clone());
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let builder = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
             .await
             .unwrap();
@@ -1129,7 +1136,8 @@ mod tests {
 
             let async_reader = TestReader::new(data.clone());
 
-            let options = ArrowReaderOptions::new().with_page_index(true);
+            let options =
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
             let builder = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
                 .await
                 .unwrap();
@@ -1191,7 +1199,7 @@ mod tests {
 
         let async_reader = TestReader::new(data.clone());
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let builder = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
             .await
             .unwrap();
@@ -1252,7 +1260,7 @@ mod tests {
 
         let builder = ParquetRecordBatchStreamBuilder::new_with_options(
             TestReader::new(data.clone()),
-            ArrowReaderOptions::new().with_page_index(true),
+            
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required),
         )
         .await
         .unwrap();
@@ -1274,7 +1282,7 @@ mod tests {
         // If the Reader chooses mask to handle filter, it might cause panic 
because the mid 4 pages may not be decoded.
         let stream = ParquetRecordBatchStreamBuilder::new_with_options(
             TestReader::new(data.clone()),
-            ArrowReaderOptions::new().with_page_index(true),
+            
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required),
         )
         .await
         .unwrap()
@@ -1557,7 +1565,7 @@ mod tests {
 
         let mask = ProjectionMask::leaves(&parquet_schema, vec![0, 2]);
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let stream = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
             .await
             .unwrap()
@@ -1835,7 +1843,7 @@ mod tests {
             // Read data
             let mut reader = ParquetRecordBatchStreamBuilder::new_with_options(
                 tokio::fs::File::from_std(file.try_clone().unwrap()),
-                ArrowReaderOptions::new().with_page_index(true),
+                
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required),
             )
             .await
             .unwrap();
@@ -1957,7 +1965,7 @@ mod tests {
             .unwrap();
 
         metadata.set_offset_index(Some(vec![]));
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let arrow_reader_metadata = 
ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
         let reader =
             ParquetRecordBatchStreamBuilder::new_with_metadata(file, 
arrow_reader_metadata)
@@ -1982,7 +1990,7 @@ mod tests {
             .await
             .unwrap();
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let arrow_reader_metadata = 
ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
         let reader =
             ParquetRecordBatchStreamBuilder::new_with_metadata(file, 
arrow_reader_metadata)
@@ -2034,7 +2042,7 @@ mod tests {
         write_metadata_to_local_file(metadata, &metadata_path);
         let metadata = read_metadata_from_local_file(&metadata_path);
 
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let arrow_reader_metadata = 
ArrowReaderMetadata::try_new(metadata.into(), options).unwrap();
         let reader =
             ParquetRecordBatchStreamBuilder::new_with_metadata(file, 
arrow_reader_metadata)
@@ -2060,7 +2068,7 @@ mod tests {
         let async_reader = TestReader::new(data);
 
         // Enable page index so the fetch logic loads only required pages
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Required);
         let builder = 
ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
             .await
             .unwrap();
diff --git a/parquet/src/arrow/async_reader/store.rs 
b/parquet/src/arrow/async_reader/store.rs
index f1e987081d..59b161bbc6 100644
--- a/parquet/src/arrow/async_reader/store.rs
+++ b/parquet/src/arrow/async_reader/store.rs
@@ -226,8 +226,12 @@ impl AsyncFileReader for ParquetObjectReader {
             // When page_index_policy is Optional or Required, override the 
preload flags
             // to ensure the specified policy takes precedence.
             if let Some(options) = options {
-                if options.page_index_policy != PageIndexPolicy::Skip {
-                    metadata = 
metadata.with_page_index_policy(options.page_index_policy);
+                if options.column_index_policy() != PageIndexPolicy::Skip
+                    || options.offset_index_policy() != PageIndexPolicy::Skip
+                {
+                    metadata = metadata
+                        
.with_column_index_policy(options.column_index_policy())
+                        
.with_offset_index_policy(options.offset_index_policy());
                 }
             }
 
@@ -426,8 +430,7 @@ mod tests {
             .with_preload_offset_index(true);
 
         // Create options with page_index_policy set to Skip (default)
-        let mut options = ArrowReaderOptions::new();
-        options.page_index_policy = PageIndexPolicy::Skip;
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Skip);
 
         // Get metadata - Skip means use reader's preload flags (true)
         let metadata = reader.get_metadata(Some(&options)).await.unwrap();
@@ -447,8 +450,7 @@ mod tests {
             .with_preload_offset_index(false);
 
         // Create options with page_index_policy set to Optional
-        let mut options = ArrowReaderOptions::new();
-        options.page_index_policy = PageIndexPolicy::Optional;
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Optional);
 
         // Get metadata - Optional overrides preload flags and attempts to 
load indexes
         let metadata = reader.get_metadata(Some(&options)).await.unwrap();
@@ -468,8 +470,7 @@ mod tests {
             .with_preload_column_index(false)
             .with_preload_offset_index(false);
 
-        let mut options1 = ArrowReaderOptions::new();
-        options1.page_index_policy = PageIndexPolicy::Skip;
+        let options1 = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Skip);
         let metadata1 = reader1.get_metadata(Some(&options1)).await.unwrap();
 
         // Test 2: preload=false + Optional policy -> overrides to try loading
@@ -478,8 +479,7 @@ mod tests {
             .with_preload_column_index(false)
             .with_preload_offset_index(false);
 
-        let mut options2 = ArrowReaderOptions::new();
-        options2.page_index_policy = PageIndexPolicy::Optional;
+        let options2 = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::Optional);
         let metadata2 = reader2.get_metadata(Some(&options2)).await.unwrap();
 
         // Both should succeed (no panic/error)
diff --git a/parquet/src/file/metadata/reader.rs 
b/parquet/src/file/metadata/reader.rs
index a18a5e68a9..2be9dcbd4b 100644
--- a/parquet/src/file/metadata/reader.rs
+++ b/parquet/src/file/metadata/reader.rs
@@ -123,9 +123,7 @@ impl ParquetMetaDataReader {
     /// [Parquet page index]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
     #[deprecated(since = "56.1.0", note = "Use `with_page_index_policy` 
instead")]
     pub fn with_page_indexes(self, val: bool) -> Self {
-        let policy = PageIndexPolicy::from(val);
-        self.with_column_index_policy(policy)
-            .with_offset_index_policy(policy)
+        self.with_page_index_policy(PageIndexPolicy::from(val))
     }
 
     /// Enable or disable reading the Parquet [ColumnIndex] structure.
diff --git a/parquet/tests/arrow_reader/io/async_reader.rs 
b/parquet/tests/arrow_reader/io/async_reader.rs
index 2f49de8a38..8022335da0 100644
--- a/parquet/tests/arrow_reader/io/async_reader.rs
+++ b/parquet/tests/arrow_reader/io/async_reader.rs
@@ -28,6 +28,7 @@ use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
RowSelection, RowSelector
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
 use parquet::errors::Result;
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::ParquetMetaData;
 use std::ops::Range;
 use std::sync::Arc;
@@ -206,7 +207,7 @@ async fn test_read_single_row_filter_no_page_index() {
     // Apply a filter  "b" > 575 and <less> than 625
     // (last data page in Row Group 0 and first DataPage in Row Group 1)
     let test_file = test_file();
-    let options = test_options().with_page_index(false);
+    let options = 
test_options().with_page_index_policy(PageIndexPolicy::from(false));
     let builder = async_builder(&test_file, options).await;
     let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
 
@@ -318,7 +319,9 @@ async fn async_builder(
     test_file: &TestParquetFile,
     options: ArrowReaderOptions,
 ) -> ParquetRecordBatchStreamBuilder<RecordingAsyncFileReader> {
-    let parquet_meta_data = if options.page_index() {
+    let parquet_meta_data = if options.offset_index_policy() != 
PageIndexPolicy::Skip
+        || options.column_index_policy() != PageIndexPolicy::Skip
+    {
         Arc::clone(test_file.parquet_metadata())
     } else {
         // strip out the page index from the metadata
diff --git a/parquet/tests/arrow_reader/io/mod.rs 
b/parquet/tests/arrow_reader/io/mod.rs
index 86b7674121..3b11429be4 100644
--- a/parquet/tests/arrow_reader/io/mod.rs
+++ b/parquet/tests/arrow_reader/io/mod.rs
@@ -47,6 +47,7 @@ use parquet::arrow::arrow_reader::{
 use parquet::arrow::{ArrowWriter, ProjectionMask};
 use parquet::data_type::AsBytes;
 use parquet::file::FOOTER_SIZE;
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::{FooterTail, ParquetMetaData, ParquetOffsetIndex};
 use parquet::file::page_index::offset_index::PageLocation;
 use parquet::file::properties::WriterProperties;
@@ -73,7 +74,7 @@ fn test_file() -> TestParquetFile {
 ///
 /// Note these tests use the PageIndex to reduce IO
 fn test_options() -> ArrowReaderOptions {
-    ArrowReaderOptions::default().with_page_index(true)
+    
ArrowReaderOptions::default().with_page_index_policy(PageIndexPolicy::from(true))
 }
 
 /// Return a row filter that evaluates "b > 575" AND "b < 625"
@@ -189,7 +190,7 @@ impl TestParquetFile {
         // Read the parquet file to determine its layout
         let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
             bytes.clone(),
-            ArrowReaderOptions::default().with_page_index(true),
+            
ArrowReaderOptions::default().with_page_index_policy(PageIndexPolicy::from(true)),
         )
         .unwrap();
 
diff --git a/parquet/tests/arrow_reader/io/sync_reader.rs 
b/parquet/tests/arrow_reader/io/sync_reader.rs
index 77c200fa86..835029d5c6 100644
--- a/parquet/tests/arrow_reader/io/sync_reader.rs
+++ b/parquet/tests/arrow_reader/io/sync_reader.rs
@@ -27,6 +27,7 @@ use parquet::arrow::ProjectionMask;
 use parquet::arrow::arrow_reader::{
     ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection, 
RowSelector,
 };
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::reader::{ChunkReader, Length};
 use std::io::Read;
 use std::sync::Arc;
@@ -122,7 +123,7 @@ fn test_read_single_column() {
 #[test]
 fn test_read_single_column_no_page_index() {
     let test_file = test_file();
-    let options = test_options().with_page_index(false);
+    let options = 
test_options().with_page_index_policy(PageIndexPolicy::from(false));
     let builder = sync_builder(&test_file, options);
     let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
     let builder = 
builder.with_projection(ProjectionMask::columns(&schema_descr, ["b"]));
diff --git a/parquet/tests/arrow_reader/predicate_cache.rs 
b/parquet/tests/arrow_reader/predicate_cache.rs
index b419c37158..bf3412dd4d 100644
--- a/parquet/tests/arrow_reader/predicate_cache.rs
+++ b/parquet/tests/arrow_reader/predicate_cache.rs
@@ -33,7 +33,7 @@ use parquet::arrow::arrow_reader::{ArrowPredicateFn, 
ArrowReaderOptions, RowFilt
 use parquet::arrow::arrow_reader::{ArrowReaderBuilder, 
ParquetRecordBatchReaderBuilder};
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, 
ProjectionMask};
-use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, 
ParquetMetaDataReader};
+use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
 use parquet::file::properties::WriterProperties;
 use std::ops::Range;
 use std::sync::Arc;
@@ -356,9 +356,14 @@ impl AsyncFileReader for TestReader {
         &'a mut self,
         options: Option<&'a ArrowReaderOptions>,
     ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
-        let metadata_reader = 
ParquetMetaDataReader::new().with_page_index_policy(
-            PageIndexPolicy::from(options.is_some_and(|o| o.page_index())),
-        );
+        let mut metadata_reader = ParquetMetaDataReader::new();
+
+        if let Some(options) = options {
+            metadata_reader = metadata_reader
+                .with_column_index_policy(options.column_index_policy())
+                .with_offset_index_policy(options.offset_index_policy());
+        }
+
         self.metadata = Some(Arc::new(
             metadata_reader.parse_and_finish(&self.data).unwrap(),
         ));
diff --git a/parquet/tests/arrow_reader/statistics.rs 
b/parquet/tests/arrow_reader/statistics.rs
index aef473fa84..07a9bcc578 100644
--- a/parquet/tests/arrow_reader/statistics.rs
+++ b/parquet/tests/arrow_reader/statistics.rs
@@ -46,6 +46,7 @@ use 
parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::arrow::arrow_reader::{
     ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReaderBuilder,
 };
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
 use parquet::file::properties::{EnabledStatistics, WriterProperties};
 use parquet::file::statistics::{Statistics, ValueStatistics};
@@ -145,7 +146,7 @@ fn build_parquet_file(
     let _file_meta = writer.close().unwrap();
 
     let file = output_file.reopen().unwrap();
-    let options = ArrowReaderOptions::new().with_page_index(true);
+    let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(true));
     ArrowReaderBuilder::try_new_with_options(file, options).unwrap()
 }
 
@@ -170,7 +171,7 @@ impl TestReader {
 
         // open the file & get the reader
         let file = file.reopen().unwrap();
-        let options = ArrowReaderOptions::new().with_page_index(true);
+        let options = 
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(true));
         ArrowReaderBuilder::try_new_with_options(file, options).unwrap()
     }
 }
diff --git a/parquet/tests/arrow_writer_layout.rs 
b/parquet/tests/arrow_writer_layout.rs
index f78370ca8d..ca6f89cab4 100644
--- a/parquet/tests/arrow_writer_layout.rs
+++ b/parquet/tests/arrow_writer_layout.rs
@@ -24,6 +24,7 @@ use bytes::Bytes;
 use parquet::arrow::ArrowWriter;
 use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
 use parquet::basic::{Encoding, PageType};
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{ReaderProperties, WriterProperties};
 use parquet::file::reader::SerializedPageReader;
@@ -68,7 +69,8 @@ fn do_test(test: LayoutTest) {
     let b = Bytes::from(buf);
 
     // Re-read file to decode column index
-    let read_options = ArrowReaderOptions::new().with_page_index(true);
+    let read_options =
+        
ArrowReaderOptions::new().with_page_index_policy(PageIndexPolicy::from(true));
     let reader =
         ParquetRecordBatchReaderBuilder::try_new_with_options(b.clone(), 
read_options).unwrap();
 
diff --git a/parquet/tests/encryption/encryption.rs 
b/parquet/tests/encryption/encryption.rs
index f999abab95..b642af040e 100644
--- a/parquet/tests/encryption/encryption.rs
+++ b/parquet/tests/encryption/encryption.rs
@@ -34,6 +34,7 @@ use parquet::data_type::{ByteArray, ByteArrayType};
 use parquet::encryption::decrypt::FileDecryptionProperties;
 use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::WriterProperties;
 use parquet::file::writer::SerializedFileWriter;
@@ -453,7 +454,7 @@ fn uniform_encryption_roundtrip(
 
     let options = ArrowReaderOptions::new()
         .with_file_decryption_properties(decryption_properties)
-        .with_page_index(page_index);
+        .with_page_index_policy(PageIndexPolicy::from(page_index));
 
     let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options)?;
     assert_eq!(&row_group_sizes(builder.metadata()), &[50, 50, 50]);
@@ -557,7 +558,7 @@ fn uniform_encryption_page_skipping(page_index: bool) -> 
parquet::errors::Result
 
     let options = ArrowReaderOptions::new()
         .with_file_decryption_properties(decryption_properties)
-        .with_page_index(page_index);
+        .with_page_index_policy(PageIndexPolicy::from(page_index));
 
     let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, 
options)?;
 
@@ -1041,7 +1042,7 @@ fn test_decrypt_page_index(
     let file = File::open(path)?;
     let options = ArrowReaderOptions::default()
         .with_file_decryption_properties(decryption_properties)
-        .with_page_index(true);
+        .with_page_index_policy(PageIndexPolicy::from(true));
 
     let arrow_metadata = ArrowReaderMetadata::load(&file, options)?;
 
diff --git a/parquet/tests/encryption/encryption_async.rs 
b/parquet/tests/encryption/encryption_async.rs
index 51acd73748..dc57ecd50d 100644
--- a/parquet/tests/encryption/encryption_async.rs
+++ b/parquet/tests/encryption/encryption_async.rs
@@ -35,6 +35,7 @@ use parquet::arrow::{
 use parquet::encryption::decrypt::FileDecryptionProperties;
 use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
+use parquet::file::metadata::PageIndexPolicy;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
 use parquet::file::writer::SerializedFileWriter;
@@ -439,7 +440,7 @@ async fn test_decrypt_page_index(
 
     let options = ArrowReaderOptions::new()
         .with_file_decryption_properties(decryption_properties)
-        .with_page_index(true);
+        .with_page_index_policy(PageIndexPolicy::from(true));
 
     let arrow_metadata = ArrowReaderMetadata::load_async(&mut file, 
options).await?;
 


Reply via email to