tustvold commented on a change in pull request #1284:
URL: https://github.com/apache/arrow-rs/pull/1284#discussion_r805158878
##########
File path: parquet/src/encodings/decoding.rs
##########
@@ -431,232 +433,253 @@ pub struct DeltaBitPackDecoder<T: DataType> {
initialized: bool,
// Header info
- num_values: usize,
- num_mini_blocks: i64,
+ // The number of values in each block
+ block_size: usize,
+ /// The number of values in the current page
+ values_left: usize,
+ /// The number of mini-blocks in each block
+ mini_blocks_per_block: usize,
+ /// The number of values in each mini block
values_per_mini_block: usize,
- values_current_mini_block: usize,
- first_value: i64,
- first_value_read: bool,
// Per block info
- min_delta: i64,
+ /// The minimum delta in the block
+ min_delta: T::T,
+ /// The byte offset of the end of the current block
+ block_end_offset: usize,
+ /// The index on the current mini block
mini_block_idx: usize,
- delta_bit_width: u8,
- delta_bit_widths: ByteBuffer,
- deltas_in_mini_block: Vec<T::T>, // eagerly loaded deltas for a mini block
- use_batch: bool,
-
- current_value: i64,
-
- _phantom: PhantomData<T>,
+ /// The bit widths of each mini block in the current block
+ mini_block_bit_widths: Vec<u8>,
+ /// The number of values remaining in the current mini block
+ mini_block_remaining: usize,
+
+ /// The first value from the block header if not consumed
+ first_value: Option<T::T>,
+ /// The last value to compute offsets from
+ last_value: T::T,
}
-impl<T: DataType> DeltaBitPackDecoder<T> {
+impl<T: DataType> DeltaBitPackDecoder<T>
+where
+ T::T: Default + FromPrimitive + WrappingAdd + Copy,
+{
/// Creates new delta bit packed decoder.
pub fn new() -> Self {
Self {
bit_reader: BitReader::from(vec![]),
initialized: false,
- num_values: 0,
- num_mini_blocks: 0,
+ block_size: 0,
+ values_left: 0,
+ mini_blocks_per_block: 0,
values_per_mini_block: 0,
- values_current_mini_block: 0,
- first_value: 0,
- first_value_read: false,
- min_delta: 0,
+ min_delta: Default::default(),
mini_block_idx: 0,
- delta_bit_width: 0,
- delta_bit_widths: ByteBuffer::new(),
- deltas_in_mini_block: vec![],
- use_batch: mem::size_of::<T::T>() == 4,
- current_value: 0,
- _phantom: PhantomData,
+ mini_block_bit_widths: vec![],
+ mini_block_remaining: 0,
+ block_end_offset: 0,
+ first_value: None,
+ last_value: Default::default(),
}
}
- /// Returns underlying bit reader offset.
+ /// Returns the current offset
pub fn get_offset(&self) -> usize {
assert!(self.initialized, "Bit reader is not initialized");
- self.bit_reader.get_byte_offset()
+ match self.values_left {
+ // If we've exhausted this page report the end of the current block
+ // as we may not have consumed the trailing padding
+ //
+ // The max is necessary to handle pages with no blocks
Review comment:
Yup, the page header will be present but there will be no blocks :smile:
Or at least that is what the encoder currently does...
`test_delta_byte_array_single_array` fails if this isn't here. Will update
comment
Edit: I think it actually occurs when there is only a single value, but I'm
not 100% sure :sweat_smile:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]