pitrou commented on issue #14923:
URL: https://github.com/apache/arrow/issues/14923#issuecomment-1369902019

   It should actually be quite simple:
   ```diff
   diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
   index b761de69d..6f7e1e9e0 100644
   --- a/cpp/src/parquet/encoding.cc
   +++ b/cpp/src/parquet/encoding.cc
   @@ -2454,13 +2454,17 @@ class DeltaBitPackDecoder : public DecoderImpl, 
virtual public TypedDecoder<DTyp
    
        // read the bitwidth of each miniblock
        uint8_t* bit_width_data = delta_bit_widths_->mutable_data();
   +    uint32_t current_num_values = 0;
   +
        for (uint32_t i = 0; i < mini_blocks_per_block_; ++i) {
          if (!decoder_->GetAligned<uint8_t>(1, bit_width_data + i)) {
            ParquetException::EofException();
          }
   -      if (bit_width_data[i] > kMaxDeltaBitWidth) {
   +      if (ARROW_PREDICT_FALSE(bit_width_data[i] > kMaxDeltaBitWidth &&
   +                              current_num_values < total_value_count_)) {
            throw ParquetException("delta bit width larger than integer bit 
width");
          }
   +      current_num_values += values_per_mini_block_;
        }
        mini_block_idx_ = 0;
        delta_bit_width_ = bit_width_data[0];
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to