This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 2be261b78b Deprecate old Parquet page index parsing functions (#7640)
2be261b78b is described below
commit 2be261b78b16a4aa7b5b9aece648bec663c0dbf1
Author: Ed Seidl <[email protected]>
AuthorDate: Tue Jun 10 21:24:19 2025 -0700
Deprecate old Parquet page index parsing functions (#7640)
# Which issue does this PR close?
- Closes #6447.
# Rationale for this change
This deprecates the last of the old standalone Parquet metadata parsing
functions that have since been replaced by `ParquetMetaDataReader`.
# What changes are included in this PR?
# Are there any user-facing changes?
No, only adds deprecation warnings to public API
---
parquet/src/arrow/arrow_writer/mod.rs | 10 ++++++----
parquet/src/file/page_index/index_reader.rs | 8 ++++++++
parquet/src/file/serialized_reader.rs | 2 ++
3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index fbc32b0c4b..147c553443 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1345,7 +1345,6 @@ mod tests {
use crate::data_type::AsBytes;
use crate::file::metadata::ParquetMetaData;
use crate::file::page_index::index::Index;
- use crate::file::page_index::index_reader::read_offset_indexes;
use crate::file::properties::{
BloomFilterPosition, EnabledStatistics, ReaderProperties,
WriterVersion,
};
@@ -2026,7 +2025,9 @@ mod tests {
writer.write(&batch).unwrap();
writer.close().unwrap();
- let reader =
SerializedFileReader::new(file.try_clone().unwrap()).unwrap();
+ let options = ReadOptionsBuilder::new().with_page_index().build();
+ let reader =
+ SerializedFileReader::new_with_options(file.try_clone().unwrap(),
options).unwrap();
let column = reader.metadata().row_group(0).columns();
@@ -2039,7 +2040,8 @@ mod tests {
"Expected a dictionary page"
);
- let offset_indexes = read_offset_indexes(&file,
column).unwrap().unwrap();
+ assert!(reader.metadata().offset_index().is_some());
+ let offset_indexes = &reader.metadata().offset_index().unwrap()[0];
let page_locations = offset_indexes[0].page_locations.clone();
@@ -2048,7 +2050,7 @@ mod tests {
assert_eq!(
page_locations.len(),
10,
- "Expected 9 pages but got {page_locations:#?}"
+ "Expected 10 pages but got {page_locations:#?}"
);
}
diff --git a/parquet/src/file/page_index/index_reader.rs
b/parquet/src/file/page_index/index_reader.rs
index c472ceb291..368ede8b40 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -48,6 +48,10 @@ pub(crate) fn acc_range(a: Option<Range<u64>>, b:
Option<Range<u64>>) -> Option<
/// See [Page Index Documentation] for more details.
///
/// [Page Index Documentation]:
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+#[deprecated(
+ since = "55.2.0",
+ note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
+)]
pub fn read_columns_indexes<R: ChunkReader>(
reader: &R,
chunks: &[ColumnChunkMetaData],
@@ -128,6 +132,10 @@ pub fn read_pages_locations<R: ChunkReader>(
/// See [Page Index Documentation] for more details.
///
/// [Page Index Documentation]:
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+#[deprecated(
+ since = "55.2.0",
+ note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
+)]
pub fn read_offset_indexes<R: ChunkReader>(
reader: &R,
chunks: &[ColumnChunkMetaData],
diff --git a/parquet/src/file/serialized_reader.rs
b/parquet/src/file/serialized_reader.rs
index 5d50a8c49d..ac43381ae8 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -1108,6 +1108,7 @@ mod tests {
use crate::data_type::private::ParquetValueType;
use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type};
use crate::file::page_index::index::{Index, NativeIndex};
+ #[allow(deprecated)]
use crate::file::page_index::index_reader::{read_columns_indexes,
read_offset_indexes};
use crate::file::writer::SerializedFileWriter;
use crate::record::RowAccessor;
@@ -1940,6 +1941,7 @@ mod tests {
}
#[test]
+ #[allow(deprecated)]
fn test_page_index_reader_out_of_order() {
let test_file = get_test_file("alltypes_tiny_pages_plain.parquet");
let options = ReadOptionsBuilder::new().with_page_index().build();