This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new c295b172b Short-circuit on exhausted page in skip_records (#4320)
c295b172b is described below
commit c295b172b37902d5fa41ef275ff5b86caf9fde75
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed May 31 15:03:25 2023 +0100
Short-circuit on exhausted page in skip_records (#4320)
---
parquet/src/column/reader.rs | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index f63b1e60a..0bb6e0024 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -333,6 +333,14 @@ where
None => (to_read, to_read),
};
+ self.num_decoded_values += rep_levels_read as u32;
+ remaining -= records_read;
+
+ if self.num_buffered_values == self.num_decoded_values {
+ // Exhausted buffered page - no need to advance other decoders
+ continue;
+ }
+
let (values_read, def_levels_read) = match
self.def_level_decoder.as_mut() {
Some(decoder) => decoder
.skip_def_levels(rep_levels_read,
self.descr.max_def_level())?,
@@ -355,9 +363,6 @@ where
values_read
));
}
-
- self.num_decoded_values += rep_levels_read as u32;
- remaining -= records_read;
}
Ok(num_records - remaining)
}