etseidl commented on code in PR #8445:
URL: https://github.com/apache/arrow-rs/pull/8445#discussion_r2382560107
##########
parquet/benches/metadata.rs:
##########
@@ -242,40 +244,36 @@ fn criterion_benchmark(c: &mut Criterion) {
#[cfg(feature = "arrow")]
c.bench_function("page headers", |b| {
b.iter(|| {
- metadata.row_groups.iter().for_each(|rg| {
- rg.columns.iter().for_each(|col| {
- if let Some(col_meta) = &col.meta_data {
- if let Some(dict_offset) =
col_meta.dictionary_page_offset {
- parquet::thrift::bench_page_header(
- &file_bytes.slice(dict_offset as usize..),
- );
- }
+ for rg in metadata.row_groups() {
+ for col in rg.columns() {
+ if let Some(dict_offset) = col.dictionary_page_offset() {
parquet::thrift::bench_page_header(
- &file_bytes.slice(col_meta.data_page_offset as
usize..),
+ &file_bytes.slice(dict_offset as usize..),
);
}
- });
- });
+ parquet::thrift::bench_page_header(
+ &file_bytes.slice(col.data_page_offset() as usize..),
+ );
+ }
+ }
})
});
#[cfg(feature = "arrow")]
c.bench_function("page headers (no stats)", |b| {
Review Comment:
Agreed...the only reason I added them was to see the speedup from not
decoding the `Statistics`. I'll make a note to remove them later. Same for the
private file metadata decoding...we should only be benchmarking the public API.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]