etseidl commented on code in PR #9786:
URL: https://github.com/apache/arrow-rs/pull/9786#discussion_r3119813801
##########
parquet/src/encodings/decoding.rs:
##########
@@ -862,52 +862,51 @@ where
let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as
usize;
self.check_bit_width(bit_width)?;
let mini_block_to_skip = self.mini_block_remaining.min(to_skip -
skip);
- let mini_block_should_skip = mini_block_to_skip;
- let skip_count = self
- .bit_reader
- .get_batch(&mut skip_buffer[0..mini_block_to_skip], bit_width);
-
- if skip_count != mini_block_to_skip {
- return Err(general_err!(
- "Expected to skip {} values from mini block got {}.",
- mini_block_batch_size,
- skip_count
- ));
- }
-
- // see commentary in self.get() above regarding optimizations
Review Comment:
Please restore this comment as it is still relevant
##########
parquet/src/encodings/decoding.rs:
##########
@@ -862,52 +862,51 @@ where
let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as
usize;
self.check_bit_width(bit_width)?;
let mini_block_to_skip = self.mini_block_remaining.min(to_skip -
skip);
- let mini_block_should_skip = mini_block_to_skip;
- let skip_count = self
- .bit_reader
- .get_batch(&mut skip_buffer[0..mini_block_to_skip], bit_width);
-
- if skip_count != mini_block_to_skip {
- return Err(general_err!(
- "Expected to skip {} values from mini block got {}.",
- mini_block_batch_size,
- skip_count
- ));
- }
-
- // see commentary in self.get() above regarding optimizations
let min_delta = self.min_delta.as_i64()?;
if bit_width == 0 {
- // if min_delta == 0, there's nothing to do. self.last_value
is unchanged
+ // All remainders are zero: every delta equals min_delta
exactly.
+ // Advance last_value by n * min_delta with no bit reads.
Review Comment:
please still address the `min_delta == 0` case in the comments here
##########
parquet/src/encodings/decoding.rs:
##########
@@ -862,52 +862,51 @@ where
let bit_width = self.mini_block_bit_widths[self.mini_block_idx] as
usize;
self.check_bit_width(bit_width)?;
let mini_block_to_skip = self.mini_block_remaining.min(to_skip -
skip);
- let mini_block_should_skip = mini_block_to_skip;
- let skip_count = self
Review Comment:
I love moving this inside `bw != 0` branch...saves a memset at least. I
wonder if we could lazy initialize `skip_buffer` as well. Probably better as a
follow on.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]