This is an automated email from the ASF dual-hosted git repository.
maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c009024ab6 GH-39525: [C++][Parquet] Pass memory pool to decoders
(#39526)
c009024ab6 is described below
commit c009024ab6d29ff834d5ac2e7b27675ddda0a47a
Author: emkornfield <[email protected]>
AuthorDate: Thu Jan 11 09:40:51 2024 -0800
GH-39525: [C++][Parquet] Pass memory pool to decoders (#39526)
### Rationale for this change
Memory pools should be plumbed through where ever possible.
### What changes are included in this PR?
Pass through memory pool to decoders
### Are these changes tested?
Not directly; this was caught via some internal fuzz targets.
### Are there any user-facing changes?
No.
* Closes: #39525
Authored-by: Micah Kornfield <[email protected]>
Signed-off-by: mwish <[email protected]>
---
cpp/src/parquet/column_reader.cc | 44 ++++++++--------------------------------
1 file changed, 9 insertions(+), 35 deletions(-)
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f5d9734aa1..ac4627d69c 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -760,7 +760,7 @@ class ColumnReaderImplBase {
if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
page->encoding() == Encoding::PLAIN) {
- auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+ auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_,
pool_);
dictionary->SetData(page->num_values(), page->data(), page->size());
// The dictionary is fully decoded during DictionaryDecoder::Init, so the
@@ -883,47 +883,21 @@ class ColumnReaderImplBase {
current_decoder_ = it->second.get();
} else {
switch (encoding) {
- case Encoding::PLAIN: {
- auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
- current_decoder_ = decoder.get();
- decoders_[static_cast<int>(encoding)] = std::move(decoder);
- break;
- }
- case Encoding::BYTE_STREAM_SPLIT: {
- auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT,
descr_);
- current_decoder_ = decoder.get();
- decoders_[static_cast<int>(encoding)] = std::move(decoder);
- break;
- }
- case Encoding::RLE: {
- auto decoder = MakeTypedDecoder<DType>(Encoding::RLE, descr_);
+ case Encoding::PLAIN:
+ case Encoding::BYTE_STREAM_SPLIT:
+ case Encoding::RLE:
+ case Encoding::DELTA_BINARY_PACKED:
+ case Encoding::DELTA_BYTE_ARRAY:
+ case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
+ auto decoder = MakeTypedDecoder<DType>(encoding, descr_, pool_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
break;
}
+
case Encoding::RLE_DICTIONARY:
throw ParquetException("Dictionary page must be before data page.");
- case Encoding::DELTA_BINARY_PACKED: {
- auto decoder =
MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED, descr_);
- current_decoder_ = decoder.get();
- decoders_[static_cast<int>(encoding)] = std::move(decoder);
- break;
- }
- case Encoding::DELTA_BYTE_ARRAY: {
- auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BYTE_ARRAY,
descr_);
- current_decoder_ = decoder.get();
- decoders_[static_cast<int>(encoding)] = std::move(decoder);
- break;
- }
- case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
- auto decoder =
- MakeTypedDecoder<DType>(Encoding::DELTA_LENGTH_BYTE_ARRAY,
descr_);
- current_decoder_ = decoder.get();
- decoders_[static_cast<int>(encoding)] = std::move(decoder);
- break;
- }
-
default:
throw ParquetException("Unknown encoding type.");
}