This is an automated email from the ASF dual-hosted git repository.
westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 50fb5b0df8 PARQUET-2250:[C++][Parquet] Expose column descriptor
through RecordReader (#34318)
50fb5b0df8 is described below
commit 50fb5b0df84a3ccaa665965e36a6e757d1af98f1
Author: Fatemah Panahi <[email protected]>
AuthorDate: Thu Mar 2 16:43:37 2023 -0800
PARQUET-2250:[C++][Parquet] Expose column descriptor through RecordReader
(#34318)
This change exposes the column descriptor through the RecordReader.
Lead-authored-by: Fatemah Panahi <[email protected]>
Co-authored-by: Fatemah Panahi <[email protected]>
Signed-off-by: Weston Pace <[email protected]>
---
cpp/src/parquet/column_reader.cc | 2 ++
cpp/src/parquet/column_reader.h | 3 +++
cpp/src/parquet/column_reader_test.cc | 3 +++
3 files changed, 8 insertions(+)
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f19079b902..7e650596a6 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1813,6 +1813,8 @@ class TypedRecordReader : public
TypedColumnReaderImpl<DType>,
bool HasMoreData() const override { return this->pager_ != nullptr; }
+ const ColumnDescriptor* descr() const override { return this->descr_; }
+
// Dictionary decoders must be reset when advancing row groups
void ResetDecoders() { this->decoders_.clear(); }
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index b5f96f8fc4..a9f6e217b1 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -351,6 +351,9 @@ class PARQUET_EXPORT RecordReader {
/// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
virtual void SetPageReader(std::unique_ptr<PageReader> reader) = 0;
+ /// \brief Returns the underlying column reader's descriptor.
+ virtual const ColumnDescriptor* descr() const = 0;
+
virtual void DebugPrintState() = 0;
/// \brief Decoded definition levels
diff --git a/cpp/src/parquet/column_reader_test.cc
b/cpp/src/parquet/column_reader_test.cc
index 32fb09f21b..a8ce72165a 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -727,6 +727,9 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) {
int64_t records_read = record_reader_->ReadRecords(/*num_records=*/0);
ASSERT_EQ(records_read, 0);
+ // Test the descr() accessor.
+ ASSERT_EQ(record_reader_->descr()->max_definition_level(), 1);
+
// Read [10], null
records_read = record_reader_->ReadRecords(/*num_records=*/2);
ASSERT_EQ(records_read, 2);