mapleFU commented on code in PR #46532: URL: https://github.com/apache/arrow/pull/46532#discussion_r2108440247
########## cpp/src/parquet/decoder.cc: ########## @@ -654,43 +701,41 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>, int64_t valid_bits_offset, typename EncodingTraits<ByteArrayType>::Accumulator* out, int* out_values_decoded) { - ArrowBinaryHelper<ByteArrayType> helper(out, num_values); - int values_decoded = 0; - - RETURN_NOT_OK(helper.Prepare(len_)); - - int i = 0; - RETURN_NOT_OK(VisitNullBitmapInline( - valid_bits, valid_bits_offset, num_values, null_count, - [&]() { - if (ARROW_PREDICT_FALSE(len_ < 4)) { - ParquetException::EofException(); - } - auto value_len = SafeLoadAs<int32_t>(data_); - if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { - return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); - } - auto increment = value_len + 4; - if (ARROW_PREDICT_FALSE(len_ < increment)) { - ParquetException::EofException(); - } - RETURN_NOT_OK(helper.PrepareNextInput(value_len, len_)); - helper.UnsafeAppend(data_ + 4, value_len); - data_ += increment; - len_ -= increment; - ++values_decoded; - ++i; - return Status::OK(); - }, - [&]() { - helper.UnsafeAppendNull(); - ++i; - return Status::OK(); - })); + auto visit_binary_helper = [&](auto* helper) { + int values_decoded = 0; + + RETURN_NOT_OK(VisitNullBitmapInline( + valid_bits, valid_bits_offset, num_values, null_count, + [&]() { + if (ARROW_PREDICT_FALSE(len_ < 4)) { + return Status::Invalid( + "Invalid or truncated PLAIN-encoded BYTE_ARRAY data"); + } + auto value_len = SafeLoadAs<int32_t>(data_); + if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > len_ - 4)) { + return Status::Invalid( + "Invalid or truncated PLAIN-encoded BYTE_ARRAY data"); + } + RETURN_NOT_OK(helper->AppendValue(data_ + 4, value_len, + /*estimated_remaining_data_length=*/len_)); Review Comment: Aha, I got this, AppendValue with length means the chunk size, this is really tricky -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org