pitrou commented on issue #14923:
URL: https://github.com/apache/arrow/issues/14923#issuecomment-1369902019
It should actually be quite simple:
```diff
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index b761de69d..6f7e1e9e0 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -2454,13 +2454,17 @@ class DeltaBitPackDecoder : public DecoderImpl,
virtual public TypedDecoder<DTyp
// read the bitwidth of each miniblock
uint8_t* bit_width_data = delta_bit_widths_->mutable_data();
+ uint32_t current_num_values = 0;
+
for (uint32_t i = 0; i < mini_blocks_per_block_; ++i) {
if (!decoder_->GetAligned<uint8_t>(1, bit_width_data + i)) {
ParquetException::EofException();
}
- if (bit_width_data[i] > kMaxDeltaBitWidth) {
+ if (ARROW_PREDICT_FALSE(bit_width_data[i] > kMaxDeltaBitWidth &&
+ current_num_values < total_value_count_)) {
throw ParquetException("delta bit width larger than integer bit
width");
}
+ current_num_values += values_per_mini_block_;
}
mini_block_idx_ = 0;
delta_bit_width_ = bit_width_data[0];
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]