mapleFU commented on PR #57:
URL: https://github.com/apache/parquet-testing/pull/57#issuecomment-2307509744

   I finally catch this. This file has zero-sized dictionary bit-width. With 
diff:
   
   ```
   diff --git a/cpp/src/arrow/util/bit_stream_utils_internal.h 
b/cpp/src/arrow/util/bit_stream_utils_internal.h
   index 811694e43..176d4b1d9 100644
   --- a/cpp/src/arrow/util/bit_stream_utils_internal.h
   +++ b/cpp/src/arrow/util/bit_stream_utils_internal.h
   @@ -22,6 +22,7 @@
    #include <algorithm>
    #include <cstdint>
    #include <cstring>
   +#include <iostream>
    
    #include "arrow/util/bit_util.h"
    #include "arrow/util/bpacking.h"
   @@ -312,6 +313,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) {
    
    template <typename T>
    inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
   +  std::cout << "Unpack:" << num_bits << ", batch-size:" << batch_size << 
'\n';
      DCHECK(buffer_ != NULL);
      DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)) << "num_bits: " << 
num_bits;
    
   diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
   index 16a1e2492..b76ca1b2d 100644
   --- a/cpp/src/parquet/encoding.cc
   +++ b/cpp/src/parquet/encoding.cc
   @@ -1626,6 +1626,7 @@ class DictDecoderImpl : public DecoderImpl, virtual 
public DictDecoder<Type> {
          throw ParquetException("Invalid or corrupted bit_width " +
                                 std::to_string(bit_width) + ". Maximum allowed 
is 32.");
        }
   +    std::cout << "Dictionary bit-width: " << int(bit_width) << '\n';
        idx_decoder_ = ::arrow::util::RleDecoder(++data, --len, bit_width);
      }
   ```
   
   The dictionary bit-width is 0 here in the output.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to