wesm commented on a change in pull request #7244:
URL: https://github.com/apache/arrow/pull/7244#discussion_r428996352
##########
File path: cpp/src/parquet/metadata.h
##########
@@ -178,27 +179,42 @@ class PARQUET_EXPORT ColumnChunkMetaData {
std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
};
+/// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
class PARQUET_EXPORT RowGroupMetaData {
public:
- // API convenience to get a MetaData accessor
+ /// \brief Create a RowGroupMetaData from a serialized thrift message.
static std::unique_ptr<RowGroupMetaData> Make(
const void* metadata, const SchemaDescriptor* schema,
const ApplicationVersion* writer_version = NULLPTR,
std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
~RowGroupMetaData();
- // row-group metadata
+ /// \brief The number of columns in this row group. The order must match the
+ /// parent's column ordering.
int num_columns() const;
+
+ /// \brief Return the ColumnChunkMetaData of the corresponding column
ordinal.
+ ///
+ /// WARNING, the returned object references memory location in it's parent
+ /// (RowGroupMetaData) object. Hence, the parent must outlive the returned
+ /// object.
+ ///
+ /// \param[i] index of the ColumnChunkMetaData to retrieve.
Review comment:
doxygen syntax
##########
File path: cpp/src/parquet/metadata.h
##########
@@ -211,41 +227,72 @@ class PARQUET_EXPORT RowGroupMetaData {
class FileMetaDataBuilder;
+/// \brief FileMetaData is a proxy around format::FileMetaData.
class PARQUET_EXPORT FileMetaData {
public:
- // API convenience to get a MetaData accessor
-
+ /// \brief Create a FileMetaData from a serialized thrift message.
static std::shared_ptr<FileMetaData> Make(
const void* serialized_metadata, uint32_t* inout_metadata_len,
std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
~FileMetaData();
- /// Verify signature of FileMetadata when file is encrypted but footer is
not encrypted
- /// (plaintext footer).
- /// Signature is 28 bytes (12 byte nonce and 16 byte tags) when encrypting
FileMetadata
- bool VerifySignature(const void* signature);
-
- // file metadata
- uint32_t size() const;
-
+ /// \brief The number of top-level columns in the schema.
+ ///
+ /// Parquet thrift definition requires that nested schema elements are
+ /// flattened. This method returns the number of columns in the un-flattened
+ /// version.
int num_columns() const;
+ /// \brief The number of flattened schema elements.
+ ///
+ /// Parquet thrift definition requires that nested schema elements are
+ /// flattened. This method returns the total number of elements in the
+ /// flattened list.
+ int num_schema_elements() const;
+
+ /// \brief The total number of rows.
int64_t num_rows() const;
+ /// \brief The number of row groups in the file.
int num_row_groups() const;
+
+ /// \brief Return the RowGroupMetaData of the corresponding row group
ordinal.
+ ///
+ /// WARNING, the returned object references memory location in it's parent
+ /// (FileMetaData) object. Hence, the parent must outlive the returned
object.
+ ///
+ /// \param[i] index of the RowGroup to retrieve.
Review comment:
doxygen syntax
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]