etseidl commented on code in PR #8445:
URL: https://github.com/apache/arrow-rs/pull/8445#discussion_r2382560107


##########
parquet/benches/metadata.rs:
##########
@@ -242,40 +244,36 @@ fn criterion_benchmark(c: &mut Criterion) {
     #[cfg(feature = "arrow")]
     c.bench_function("page headers", |b| {
         b.iter(|| {
-            metadata.row_groups.iter().for_each(|rg| {
-                rg.columns.iter().for_each(|col| {
-                    if let Some(col_meta) = &col.meta_data {
-                        if let Some(dict_offset) = 
col_meta.dictionary_page_offset {
-                            parquet::thrift::bench_page_header(
-                                &file_bytes.slice(dict_offset as usize..),
-                            );
-                        }
+            for rg in metadata.row_groups() {
+                for col in rg.columns() {
+                    if let Some(dict_offset) = col.dictionary_page_offset() {
                         parquet::thrift::bench_page_header(
-                            &file_bytes.slice(col_meta.data_page_offset as 
usize..),
+                            &file_bytes.slice(dict_offset as usize..),
                         );
                     }
-                });
-            });
+                    parquet::thrift::bench_page_header(
+                        &file_bytes.slice(col.data_page_offset() as usize..),
+                    );
+                }
+            }
         })
     });
 
     #[cfg(feature = "arrow")]
     c.bench_function("page headers (no stats)", |b| {

Review Comment:
   Agreed...the only reason I added them was to see the speedup from not 
decoding the `Statistics`. I'll make a note to remove them later. Same for the 
private file metadata decoding...we should only be benchmarking the public API.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to