This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-15.0.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 75c9e0293415f56b8bac1cadbfb71eb1318fde89 Author: emkornfield <[email protected]> AuthorDate: Thu Jan 11 09:40:51 2024 -0800 GH-39525: [C++][Parquet] Pass memory pool to decoders (#39526) ### Rationale for this change Memory pools should be plumbed through where ever possible. ### What changes are included in this PR? Pass through memory pool to decoders ### Are these changes tested? Not directly; this was caught via some internal fuzz targets. ### Are there any user-facing changes? No. * Closes: #39525 Authored-by: Micah Kornfield <[email protected]> Signed-off-by: mwish <[email protected]> --- cpp/src/parquet/column_reader.cc | 44 ++++++++-------------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 99978e283b..86c32e5e27 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -760,7 +760,7 @@ class ColumnReaderImplBase { if (page->encoding() == Encoding::PLAIN_DICTIONARY || page->encoding() == Encoding::PLAIN) { - auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); + auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_, pool_); dictionary->SetData(page->num_values(), page->data(), page->size()); // The dictionary is fully decoded during DictionaryDecoder::Init, so the @@ -883,47 +883,21 @@ class ColumnReaderImplBase { current_decoder_ = it->second.get(); } else { switch (encoding) { - case Encoding::PLAIN: { - auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_); - current_decoder_ = decoder.get(); - decoders_[static_cast<int>(encoding)] = std::move(decoder); - break; - } - case Encoding::BYTE_STREAM_SPLIT: { - auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT, descr_); - current_decoder_ = decoder.get(); - decoders_[static_cast<int>(encoding)] = std::move(decoder); - break; - } - case Encoding::RLE: { - auto decoder = MakeTypedDecoder<DType>(Encoding::RLE, descr_); + case Encoding::PLAIN: + case Encoding::BYTE_STREAM_SPLIT: + case Encoding::RLE: + case Encoding::DELTA_BINARY_PACKED: + case Encoding::DELTA_BYTE_ARRAY: + case Encoding::DELTA_LENGTH_BYTE_ARRAY: { + auto decoder = MakeTypedDecoder<DType>(encoding, descr_, pool_); current_decoder_ = decoder.get(); decoders_[static_cast<int>(encoding)] = std::move(decoder); break; } + case Encoding::RLE_DICTIONARY: throw ParquetException("Dictionary page must be before data page."); - case Encoding::DELTA_BINARY_PACKED: { - auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED, descr_); - current_decoder_ = decoder.get(); - decoders_[static_cast<int>(encoding)] = std::move(decoder); - break; - } - case Encoding::DELTA_BYTE_ARRAY: { - auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BYTE_ARRAY, descr_); - current_decoder_ = decoder.get(); - decoders_[static_cast<int>(encoding)] = std::move(decoder); - break; - } - case Encoding::DELTA_LENGTH_BYTE_ARRAY: { - auto decoder = - MakeTypedDecoder<DType>(Encoding::DELTA_LENGTH_BYTE_ARRAY, descr_); - current_decoder_ = decoder.get(); - decoders_[static_cast<int>(encoding)] = std::move(decoder); - break; - } - default: throw ParquetException("Unknown encoding type."); }
