Repository: parquet-cpp Updated Branches: refs/heads/master 708507f3e -> 441d85b43
PARQUET-710: Remove unneeded private member variables from RowGroupReader ABI The injection capability is retained. The private members of the `RowGroupReader` class are removed. Author: Deepak Majeti <[email protected]> Closes #154 from majetideepak/PARQUET-710 and squashes the following commits: 11bfaa6 [Deepak Majeti] Modify comments that mention PIMPL 6dfd40d [Deepak Majeti] clang-format 4d58dfa [Deepak Majeti] Removed private members in Rowgroup Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/441d85b4 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/441d85b4 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/441d85b4 Branch: refs/heads/master Commit: 441d85b433ecc5f103ba3da68be34256599adf11 Parents: 708507f Author: Deepak Majeti <[email protected]> Authored: Tue Sep 6 16:26:38 2016 -0400 Committer: Wes McKinney <[email protected]> Committed: Tue Sep 6 16:26:38 2016 -0400 ---------------------------------------------------------------------- src/parquet/file/metadata.cc | 24 +++++++++++++++++------- src/parquet/file/metadata.h | 7 +++++-- src/parquet/file/reader-internal.cc | 7 +++++-- src/parquet/file/reader-internal.h | 2 ++ src/parquet/file/reader.cc | 15 ++++++++------- src/parquet/file/reader.h | 22 ++++++++++------------ src/parquet/file/writer.h | 12 ++++++++---- 7 files changed, 55 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc index c1fd767..4e298a8 100644 --- a/src/parquet/file/metadata.cc +++ b/src/parquet/file/metadata.cc @@ -166,8 +166,9 @@ int64_t ColumnChunkMetaData::total_compressed_size() const { // row-group metadata class RowGroupMetaData::RowGroupMetaDataImpl { public: - explicit RowGroupMetaDataImpl(const format::RowGroup* row_group) - : row_group_(row_group) {} + explicit RowGroupMetaDataImpl( + const format::RowGroup* row_group, const SchemaDescriptor* schema) + : row_group_(row_group), schema_(schema) {} ~RowGroupMetaDataImpl() {} inline int num_columns() const { return row_group_->columns.size(); } @@ -176,6 +177,8 @@ class RowGroupMetaData::RowGroupMetaDataImpl { inline int64_t total_byte_size() const { return row_group_->total_byte_size; } + inline const SchemaDescriptor* schema() const { return schema_; } + std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) { DCHECK(i < num_columns()) << "The file only has " << num_columns() << " columns, requested metadata for column: " << i; @@ -185,15 +188,18 @@ class RowGroupMetaData::RowGroupMetaDataImpl { private: const format::RowGroup* row_group_; + const SchemaDescriptor* schema_; }; -std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(const uint8_t* metadata) { - return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata)); +std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make( + const uint8_t* metadata, const SchemaDescriptor* schema) { + return std::unique_ptr<RowGroupMetaData>(new RowGroupMetaData(metadata, schema)); } -RowGroupMetaData::RowGroupMetaData(const uint8_t* metadata) +RowGroupMetaData::RowGroupMetaData( + const uint8_t* metadata, const SchemaDescriptor* schema) : impl_{std::unique_ptr<RowGroupMetaDataImpl>(new RowGroupMetaDataImpl( - reinterpret_cast<const format::RowGroup*>(metadata)))} {} + reinterpret_cast<const format::RowGroup*>(metadata), schema))} {} RowGroupMetaData::~RowGroupMetaData() {} int RowGroupMetaData::num_columns() const { @@ -208,6 +214,10 @@ int64_t RowGroupMetaData::total_byte_size() const { return impl_->total_byte_size(); } +const SchemaDescriptor* RowGroupMetaData::schema() const { + return impl_->schema(); +} + std::unique_ptr<ColumnChunkMetaData> RowGroupMetaData::ColumnChunk(int i) const { return impl_->ColumnChunk(i); } @@ -238,7 +248,7 @@ class FileMetaData::FileMetaDataImpl { << "The file only has " << num_row_groups() << " row groups, requested metadata for row group: " << i; return RowGroupMetaData::Make( - reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i])); + reinterpret_cast<const uint8_t*>(&metadata_->row_groups[i]), &schema_); } const SchemaDescriptor* schema_descriptor() const { return &schema_; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/metadata.h ---------------------------------------------------------------------- diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h index c35f82f..78ea53b 100644 --- a/src/parquet/file/metadata.h +++ b/src/parquet/file/metadata.h @@ -75,7 +75,8 @@ class PARQUET_EXPORT ColumnChunkMetaData { class PARQUET_EXPORT RowGroupMetaData { public: // API convenience to get a MetaData accessor - static std::unique_ptr<RowGroupMetaData> Make(const uint8_t* metadata); + static std::unique_ptr<RowGroupMetaData> Make( + const uint8_t* metadata, const SchemaDescriptor* schema); ~RowGroupMetaData(); @@ -83,10 +84,12 @@ class PARQUET_EXPORT RowGroupMetaData { int num_columns() const; int64_t num_rows() const; int64_t total_byte_size() const; + // Return const-pointer to make it clear that this object is not to be copied + const SchemaDescriptor* schema() const; std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) const; private: - explicit RowGroupMetaData(const uint8_t* metadata); + explicit RowGroupMetaData(const uint8_t* metadata, const SchemaDescriptor* schema); // PIMPL Idiom class RowGroupMetaDataImpl; std::unique_ptr<RowGroupMetaDataImpl> impl_; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader-internal.cc b/src/parquet/file/reader-internal.cc index 5c1bc37..1cb91f0 100644 --- a/src/parquet/file/reader-internal.cc +++ b/src/parquet/file/reader-internal.cc @@ -145,6 +145,10 @@ const RowGroupMetaData* SerializedRowGroup::metadata() const { return row_group_metadata_.get(); } +const ReaderProperties* SerializedRowGroup::properties() const { + return &properties_; +} + std::unique_ptr<PageReader> SerializedRowGroup::GetColumnPageReader(int i) { // Read column chunk from the file auto col = row_group_metadata_->ColumnChunk(i); @@ -195,8 +199,7 @@ std::shared_ptr<RowGroupReader> SerializedFile::GetRowGroup(int i) { std::unique_ptr<SerializedRowGroup> contents(new SerializedRowGroup( source_.get(), std::move(file_metadata_->RowGroup(i)), properties_)); - return std::make_shared<RowGroupReader>( - file_metadata_->schema_descriptor(), std::move(contents), properties_.allocator()); + return std::make_shared<RowGroupReader>(std::move(contents)); } const FileMetaData* SerializedFile::metadata() const { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader-internal.h ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader-internal.h b/src/parquet/file/reader-internal.h index 48e2daf..faedda0 100644 --- a/src/parquet/file/reader-internal.h +++ b/src/parquet/file/reader-internal.h @@ -76,6 +76,8 @@ class SerializedRowGroup : public RowGroupReader::Contents { virtual const RowGroupMetaData* metadata() const; + virtual const ReaderProperties* properties() const; + virtual std::unique_ptr<PageReader> GetColumnPageReader(int i); private: http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.cc ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader.cc b/src/parquet/file/reader.cc index 62481f7..b593ea0 100644 --- a/src/parquet/file/reader.cc +++ b/src/parquet/file/reader.cc @@ -41,17 +41,18 @@ namespace parquet { // ---------------------------------------------------------------------- // RowGroupReader public API -RowGroupReader::RowGroupReader(const SchemaDescriptor* schema, - std::unique_ptr<Contents> contents, MemoryAllocator* allocator) - : schema_(schema), contents_(std::move(contents)), allocator_(allocator) {} +RowGroupReader::RowGroupReader(std::unique_ptr<Contents> contents) + : contents_(std::move(contents)) {} std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) { - DCHECK(i < schema_->num_columns()) << "The RowGroup only has " << schema_->num_columns() - << "columns, requested column: " << i; - const ColumnDescriptor* descr = schema_->Column(i); + DCHECK(i < metadata()->num_columns()) << "The RowGroup only has " + << metadata()->num_columns() + << "columns, requested column: " << i; + const ColumnDescriptor* descr = metadata()->schema()->Column(i); std::unique_ptr<PageReader> page_reader = contents_->GetColumnPageReader(i); - return ColumnReader::Make(descr, std::move(page_reader), allocator_); + return ColumnReader::Make(descr, std::move(page_reader), + const_cast<ReaderProperties*>(contents_->properties())->allocator()); } // Returns the rowgroup metadata http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/reader.h ---------------------------------------------------------------------- diff --git a/src/parquet/file/reader.h b/src/parquet/file/reader.h index baa3e30..c1ee9d4 100644 --- a/src/parquet/file/reader.h +++ b/src/parquet/file/reader.h @@ -38,15 +38,17 @@ class RandomAccessSource; class PARQUET_EXPORT RowGroupReader { public: - // Forward declare the PIMPL + // Forward declare a virtual class 'Contents' to aid dependency injection and more + // easily create test fixtures + // An implementation of the Contents class is defined in the .cc file struct Contents { virtual ~Contents() {} virtual std::unique_ptr<PageReader> GetColumnPageReader(int i) = 0; virtual const RowGroupMetaData* metadata() const = 0; + virtual const ReaderProperties* properties() const = 0; }; - RowGroupReader(const SchemaDescriptor* schema, std::unique_ptr<Contents> contents, - MemoryAllocator* allocator); + explicit RowGroupReader(std::unique_ptr<Contents> contents); // Returns the rowgroup metadata const RowGroupMetaData* metadata() const; @@ -56,17 +58,15 @@ class PARQUET_EXPORT RowGroupReader { std::shared_ptr<ColumnReader> Column(int i); private: - const SchemaDescriptor* schema_; - // PIMPL idiom - // This is declared in the .cc file so that we can hide compiled Thrift - // headers from the public API and also more easily create test fixtures. + // Holds a pointer to an instance of Contents implementation std::unique_ptr<Contents> contents_; - MemoryAllocator* allocator_; }; class PARQUET_EXPORT ParquetFileReader { public: - // Forward declare the PIMPL + // Forward declare a virtual class 'Contents' to aid dependency injection and more + // easily create test fixtures + // An implementation of the Contents class is defined in the .cc file struct Contents { virtual ~Contents() {} // Perform any cleanup associated with the file contents @@ -99,9 +99,7 @@ class PARQUET_EXPORT ParquetFileReader { std::ostream& stream, std::list<int> selected_columns, bool print_values = true); private: - // PIMPL idiom - // This is declared in the .cc file so that we can hide compiled Thrift - // headers from the public API and also more easily create test fixtures. + // Holds a pointer to an instance of Contents implementation std::unique_ptr<Contents> contents_; }; http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/441d85b4/src/parquet/file/writer.h ---------------------------------------------------------------------- diff --git a/src/parquet/file/writer.h b/src/parquet/file/writer.h index a5e6e99..0e64961 100644 --- a/src/parquet/file/writer.h +++ b/src/parquet/file/writer.h @@ -35,6 +35,9 @@ class OutputStream; class PARQUET_EXPORT RowGroupWriter { public: + // Forward declare a virtual class 'Contents' to aid dependency injection and more + // easily create test fixtures + // An implementation of the Contents class is defined in the .cc file struct Contents { virtual int num_columns() const = 0; virtual int64_t num_rows() const = 0; @@ -75,8 +78,7 @@ class PARQUET_EXPORT RowGroupWriter { // Owned by the parent ParquetFileWriter const SchemaDescriptor* schema_; - // This is declared in the .cc file so that we can hide compiled Thrift - // headers from the public API and also more easily create test fixtures. + // Holds a pointer to an instance of Contents implementation std::unique_ptr<Contents> contents_; MemoryAllocator* allocator_; @@ -84,6 +86,9 @@ class PARQUET_EXPORT RowGroupWriter { class PARQUET_EXPORT ParquetFileWriter { public: + // Forward declare a virtual class 'Contents' to aid dependency injection and more + // easily create test fixtures + // An implementation of the Contents class is defined in the .cc file struct Contents { virtual ~Contents() {} // Perform any cleanup associated with the file contents @@ -155,8 +160,7 @@ class PARQUET_EXPORT ParquetFileWriter { const ColumnDescriptor* column_schema(int i) const { return schema_->Column(i); } private: - // This is declared in the .cc file so that we can hide compiled Thrift - // headers from the public API and also more easily create test fixtures. + // Holds a pointer to an instance of Contents implementation std::unique_ptr<Contents> contents_; // The SchemaDescriptor is provided by the Contents impl
