fatemehp commented on code in PR #14603:
URL: https://github.com/apache/arrow/pull/14603#discussion_r1035206696
##########
cpp/src/parquet/metadata.h:
##########
@@ -182,6 +184,32 @@ class PARQUET_EXPORT ColumnChunkMetaData {
std::unique_ptr<ColumnChunkMetaDataImpl> impl_;
};
+// \brief DataPageStats stores statistics about a data page including number of
+// values and rows.
+class PARQUET_EXPORT DataPageStats {
+ public:
+ explicit DataPageStats(EncodedStatistics encoded_statistics, int32_t
num_values,
+ std::optional<int32_t> num_rows)
+ : encoded_statistics_(std::move(encoded_statistics)),
+ num_values_(num_values),
+ num_rows_(num_rows) {}
+
+ const EncodedStatistics& statistics() { return encoded_statistics_;}
+
+ int32_t num_values() const {
+ return num_values_;
+ }
+
+ std::optional<int32_t> num_rows() const {
+ return num_rows_;
+ }
+
+ private:
+ const EncodedStatistics encoded_statistics_;
Review Comment:
Done.
##########
cpp/src/parquet/metadata.cc:
##########
@@ -38,6 +43,8 @@
namespace parquet {
+typedef std::variant<format::DataPageHeader, format::DataPageHeaderV2>
DataPageHeader;
Review Comment:
Done
##########
cpp/src/parquet/column_reader.h:
##########
@@ -115,11 +116,26 @@ class PARQUET_EXPORT PageReader {
bool always_compressed = false,
const CryptoContext* ctx = NULLPTR);
+ // If skip_page_callback_ is set, NextPage() must use this callback to
determine if it
+ // should return or skip and move to the next page. If the callback function
returns
+ // true the page must be skipped. The callback will be called only if the
page
+ // type is DATA_PAGE or DATA_PAGE_V2. Dictionary pages must be read
+ // regardless.
+ // \note API EXPERIMENTAL
+ void set_skip_page_callback(
+ std::function<bool(const DataPageStats&)> skip_page_callback) {
+ skip_page_callback_ = skip_page_callback;
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]