wjones127 commented on code in PR #34955:
URL: https://github.com/apache/arrow/pull/34955#discussion_r1160976252


##########
cpp/src/parquet/encoding.cc:
##########
@@ -2745,6 +2738,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
     int32_t data_size = 0;
     const int32_t* length_ptr =
         reinterpret_cast<const int32_t*>(buffered_length_->data()) + 
length_idx_;
+    int bytes_left = decoder_->bytes_left();

Review Comment:
   nit: could we rewrite this as `bytes_offset`? That way it's clearer why we 
are computing this before calling `decoder_->Advance()`
   
   ```suggestion
       int bytes_offset = len_ - decoder_->bytes_left();
   ```



##########
cpp/src/parquet/encoding.cc:
##########
@@ -2756,13 +2750,10 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
       }
     }
     length_idx_ += max_values;
-
-    PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size));
-    if (decoder_->GetBatch(8, buffered_data_->mutable_data(), data_size) != 
data_size) {
+    if (ARROW_PREDICT_FALSE(!decoder_->Advance(8 * 
static_cast<int64_t>(data_size)))) {
       ParquetException::EofException();
     }
-    const uint8_t* data_ptr = buffered_data_->data();
-
+    const uint8_t* data_ptr = data_ + (len_ - bytes_left);

Review Comment:
   ```suggestion
       const uint8_t* data_ptr = data_ + bytes_offset;
   ```



##########
cpp/src/parquet/encoding.cc:
##########
@@ -3071,8 +3061,14 @@ class DeltaByteArrayDecoder : public DecoderImpl,
     prefix_len_offset_ = 0;
     num_valid_values_ = num_prefix;
 
+    int bytes_left = decoder_->bytes_left();
+    int suffix_begins = len - bytes_left;
+    if (suffix_begins < 0) {
+      throw ParquetException(
+          "Wrong data in DeltaByteArrayDecoder: bytes_left more than length");
+    }

Review Comment:
   If this happens, doesn't that mean we have already read past the buffer end 
(buffer overflow)? Seems like that should be handled in the lengths decoder.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to