This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2898577  PARQUET-1835: [C++] Fix crashes on invalid input
2898577 is described below

commit 2898577b22a1047516af9ad2bc53490c458cf3b8
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon Apr 6 17:13:18 2020 -0500

    PARQUET-1835: [C++] Fix crashes on invalid input
    
    Will hopefully fix the following issues:
    * https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=21377
    * https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=21567
    
    Closes #6848 from pitrou/PARQUET-1835-oss-fuzz
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 cpp/src/parquet/column_reader.cc |  6 +++++-
 cpp/src/parquet/encoding.cc      | 12 ++++++------
 testing                          |  2 +-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index e4dc8dc..f746eb7 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -82,7 +82,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t 
max_level,
     case Encoding::BIT_PACKED: {
       num_bytes =
           static_cast<int32_t>(BitUtil::BytesForBits(num_buffered_values * 
bit_width_));
-      if (num_bytes > data_size) {
+      if (num_bytes < 0 || num_bytes > data_size - 4) {
         throw ParquetException("Received invalid number of bytes (corrupt data 
page?)");
       }
       if (!bit_packed_decoder_) {
@@ -375,6 +375,10 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
       if (header.num_values < 0) {
         throw ParquetException("Invalid page header (negative number of 
values)");
       }
+      if (header.definition_levels_byte_length < 0 ||
+          header.repetition_levels_byte_length < 0) {
+        throw ParquetException("Invalid page header (negative levels byte 
length)");
+      }
       bool is_compressed = header.__isset.is_compressed ? header.is_compressed 
: false;
       EncodedStatistics page_statistics = ExtractStatsFromHeader(header);
       seen_num_rows_ += header.num_values;
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index c70d26a..b123c04 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -1045,15 +1045,15 @@ int PlainDecoder<DType>::DecodeArrow(
 template <typename T>
 inline int DecodePlain(const uint8_t* data, int64_t data_size, int num_values,
                        int type_length, T* out) {
-  int bytes_to_decode = num_values * static_cast<int>(sizeof(T));
-  if (data_size < bytes_to_decode) {
+  int64_t bytes_to_decode = num_values * static_cast<int64_t>(sizeof(T));
+  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
     ParquetException::EofException();
   }
   // If bytes_to_decode == 0, data could be null
   if (bytes_to_decode > 0) {
     memcpy(out, data, bytes_to_decode);
   }
-  return bytes_to_decode;
+  return static_cast<int>(bytes_to_decode);
 }
 
 template <typename DType>
@@ -1108,8 +1108,8 @@ template <>
 inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t 
data_size,
                                           int num_values, int type_length,
                                           FixedLenByteArray* out) {
-  int bytes_to_decode = type_length * num_values;
-  if (data_size < bytes_to_decode) {
+  int64_t bytes_to_decode = static_cast<int64_t>(type_length) * num_values;
+  if (bytes_to_decode > data_size || bytes_to_decode > INT_MAX) {
     ParquetException::EofException();
   }
   for (int i = 0; i < num_values; ++i) {
@@ -1117,7 +1117,7 @@ inline int DecodePlain<FixedLenByteArray>(const uint8_t* 
data, int64_t data_size
     data += type_length;
     data_size -= type_length;
   }
-  return bytes_to_decode;
+  return static_cast<int>(bytes_to_decode);
 }
 
 template <typename DType>
diff --git a/testing b/testing
index 84730c2..582b79a 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 84730c2fa8f3f7d0ecd79b05b38446375972ef4f
+Subproject commit 582b79a547dfe2e0fd40a245951d200d6d9c093b

Reply via email to