huberylee commented on code in PR #39393:
URL: https://github.com/apache/arrow/pull/39393#discussion_r1439276198
##########
cpp/src/parquet/arrow/test_util.h:
##########
@@ -129,11 +138,16 @@ template <typename ArrowType>
::arrow::enable_if_fixed_size_binary<ArrowType, Status> NonNullArray(
size_t size, std::shared_ptr<Array>* out) {
using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
- // set byte_width to the length of "fixed": 5
+ // set byte_width to the length of "fixed": 10
// todo: find a way to generate test data with more diversity.
- BuilderType builder(::arrow::fixed_size_binary(5));
+ const int byte_width = 10;
Review Comment:
> Hmm whats the purpose here?
Just to be consistent with ``NullableArray`` for fixed_size_binary.
##########
cpp/src/parquet/column_reader.h:
##########
@@ -118,6 +120,110 @@ struct CryptoContext {
std::shared_ptr<Decryptor> data_decryptor;
};
+struct PageSkipInfo {
+ PageSkipInfo() = default;
+
+ const RowRanges::Range& Range() { return ranges_[index_]; }
+
+ int64_t SkipRowNum() const { return skip_row_nums_[index_]; }
+
+ int64_t LastRowIndex() const { return last_row_indices_[index_]; }
+
+ bool HasNext() const {
+ if (index_ != ranges_.size() - 1) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ ::arrow::Status Next() {
+ ARROW_RETURN_IF(!HasNext(), ::arrow::Status::Invalid("No more range to
read"));
+ index_++;
+ return ::arrow::Status::OK();
+ }
+
+ int64_t EndRowIndex() const { return last_row_indices_.back(); }
+
+ bool operator==(const PageSkipInfo& other) const {
+ if (ranges_ != other.ranges_) {
+ return false;
+ }
+ if (last_row_indices_ != other.last_row_indices_) {
+ return false;
+ }
+ if (skip_row_nums_ != other.skip_row_nums_) {
+ return false;
+ }
+
+ return true;
+ }
+
+ bool operator!=(const PageSkipInfo& other) const { return !(*this == other);
}
+
+ //
+ // | <--------------------------- column chunk
-------------------------------> |
+ // | <-------------------- page N -----------------------> |
+ // first_row_idx last_row_idx
+ // |-- ... --|-------------------------------------------------------|---
... ---|
+ // |---- range0 ----| |---- range1 ----|
+ // |--skip0--| |--skip1--|
+ // |------last_row_index0-----|
+ // |-------------------last_row_index1-------------------|
+ //
+
+ // Row ranges for this page, start counting from within column chunk
+ std::vector<RowRanges::Range> ranges_;
+
+ // The num of rows to skip before reading each row range
+ std::vector<int64_t> skip_row_nums_;
+
+ // The last row index for echo row range, start counting from within the page
Review Comment:
> What does "echo row range" mean, is it "each row range"?
Yes.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]