This is an automated email from the ASF dual-hosted git repository.
maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f15a700279 GH-39336: [C++][Parquet] Minor: Style enhancement for
parquet::FileMetaData (#39337)
f15a700279 is described below
commit f15a7002794865be9ffc6fe338c2fce4f259c476
Author: mwish <[email protected]>
AuthorDate: Wed Jan 17 20:37:03 2024 +0800
GH-39336: [C++][Parquet] Minor: Style enhancement for parquet::FileMetaData
(#39337)
### Rationale for this change
Enhance the style of `parquet::FileMetaData::Subset`.
### Are these changes tested?
Already tested
### Are there any user-facing changes?
no
* Closes: #39336
Lead-authored-by: mwish <[email protected]>
Co-authored-by: mwish <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: mwish <[email protected]>
---
cpp/src/arrow/util/bit_stream_utils.h | 2 +-
cpp/src/parquet/metadata.cc | 18 ++++++++++--------
cpp/src/parquet/metadata.h | 8 +++++++-
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/cpp/src/arrow/util/bit_stream_utils.h
b/cpp/src/arrow/util/bit_stream_utils.h
index 2afb2e5193..811694e43b 100644
--- a/cpp/src/arrow/util/bit_stream_utils.h
+++ b/cpp/src/arrow/util/bit_stream_utils.h
@@ -183,7 +183,7 @@ class BitReader {
/// Returns the number of bytes left in the stream, not including the current
/// byte (i.e., there may be an additional fraction of a byte).
- int bytes_left() {
+ int bytes_left() const {
return max_bytes_ -
(byte_offset_ +
static_cast<int>(bit_util::BytesForBits(bit_offset_)));
}
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index d651ea5db0..3f101b5ae3 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -761,7 +761,7 @@ class FileMetaData::FileMetaDataImpl {
return metadata_->row_groups[i];
}
- void AppendRowGroups(const std::unique_ptr<FileMetaDataImpl>& other) {
+ void AppendRowGroups(FileMetaDataImpl* other) {
std::ostringstream diff_output;
if (!schema()->Equals(*other->schema(), &diff_output)) {
auto msg = "AppendRowGroups requires equal schemas.\n" +
diff_output.str();
@@ -800,6 +800,7 @@ class FileMetaData::FileMetaDataImpl {
metadata->schema = metadata_->schema;
metadata->row_groups.resize(row_groups.size());
+
int i = 0;
for (int selected_index : row_groups) {
metadata->num_rows += row_group(selected_index).num_rows;
@@ -822,7 +823,7 @@ class FileMetaData::FileMetaDataImpl {
}
void set_file_decryptor(std::shared_ptr<InternalFileDecryptor>
file_decryptor) {
- file_decryptor_ = file_decryptor;
+ file_decryptor_ = std::move(file_decryptor);
}
private:
@@ -886,13 +887,14 @@ std::shared_ptr<FileMetaData> FileMetaData::Make(
const void* metadata, uint32_t* metadata_len,
std::shared_ptr<InternalFileDecryptor> file_decryptor) {
return std::shared_ptr<FileMetaData>(new FileMetaData(
- metadata, metadata_len, default_reader_properties(), file_decryptor));
+ metadata, metadata_len, default_reader_properties(),
std::move(file_decryptor)));
}
FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
const ReaderProperties& properties,
std::shared_ptr<InternalFileDecryptor>
file_decryptor)
- : impl_(new FileMetaDataImpl(metadata, metadata_len, properties,
file_decryptor)) {}
+ : impl_(new FileMetaDataImpl(metadata, metadata_len, properties,
+ std::move(file_decryptor))) {}
FileMetaData::FileMetaData() : impl_(new FileMetaDataImpl()) {}
@@ -942,7 +944,7 @@ const std::string&
FileMetaData::footer_signing_key_metadata() const {
void FileMetaData::set_file_decryptor(
std::shared_ptr<InternalFileDecryptor> file_decryptor) {
- impl_->set_file_decryptor(file_decryptor);
+ impl_->set_file_decryptor(std::move(file_decryptor));
}
ParquetVersion::type FileMetaData::version() const {
@@ -975,7 +977,7 @@ const std::shared_ptr<const KeyValueMetadata>&
FileMetaData::key_value_metadata(
void FileMetaData::set_file_path(const std::string& path) {
impl_->set_file_path(path); }
void FileMetaData::AppendRowGroups(const FileMetaData& other) {
- impl_->AppendRowGroups(other.impl_);
+ impl_->AppendRowGroups(other.impl_.get());
}
std::shared_ptr<FileMetaData> FileMetaData::Subset(
@@ -1839,7 +1841,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
std::unique_ptr<FileMetaData> Finish(
const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
int64_t total_rows = 0;
- for (auto row_group : row_groups_) {
+ for (const auto& row_group : row_groups_) {
total_rows += row_group.num_rows;
}
metadata_->__set_num_rows(total_rows);
@@ -1858,7 +1860,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
format::KeyValue kv_pair;
kv_pair.__set_key(key_value_metadata_->key(i));
kv_pair.__set_value(key_value_metadata_->value(i));
- metadata_->key_value_metadata.push_back(kv_pair);
+ metadata_->key_value_metadata.push_back(std::move(kv_pair));
}
metadata_->__isset.key_value_metadata = true;
}
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index e47c45ff04..640b898024 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -306,9 +306,15 @@ class PARQUET_EXPORT FileMetaData {
int num_schema_elements() const;
/// \brief The total number of rows.
+ ///
+ /// If the FileMetaData was obtained by calling `SubSet()`, this is the total
+ /// number of rows in the selected row groups.
int64_t num_rows() const;
/// \brief The number of row groups in the file.
+ ///
+ /// If the FileMetaData was obtained by calling `SubSet()`, this is the
number
+ /// of selected row groups.
int num_row_groups() const;
/// \brief Return the RowGroupMetaData of the corresponding row group
ordinal.
@@ -338,7 +344,7 @@ class PARQUET_EXPORT FileMetaData {
/// \brief Size of the original thrift encoded metadata footer.
uint32_t size() const;
- /// \brief Indicate if all of the FileMetadata's RowGroups can be
decompressed.
+ /// \brief Indicate if all of the FileMetaData's RowGroups can be
decompressed.
///
/// This will return false if any of the RowGroup's page is compressed with a
/// compression format which is not compiled in the current parquet library.