fatemehp commented on code in PR #14142:
URL: https://github.com/apache/arrow/pull/14142#discussion_r974511653


##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1328,6 +1329,156 @@ class TypedRecordReader : public 
ColumnReaderImplBase<DType>,
 
     return records_read;
   }
+  
+  // Skip records that we have in our buffer. This function is only for
+  // non-repeated fields.
+  int64_t SkipRecordsInBufferNonRepeated(int64_t num_records) {
+    ARROW_DCHECK(this->max_rep_level_ == 0);
+    ARROW_DCHECK(this->has_values_to_process());
+
+    int64_t remaining_records = levels_written_ - levels_position_;
+    int64_t skipped_records = std::min(num_records, remaining_records);
+    int64_t start_levels_position = levels_position_;
+    // Since there is no repetition, number of levels equals number of records.
+    levels_position_ += skipped_records;
+    // We skipped the levels by incrementing 'levels_position_'. For values
+    // we do not have a buffer, so we need to read them and throw them away.
+    // First we need to figure out how many present/not-null values there are.
+    std::shared_ptr<::arrow::ResizableBuffer> valid_bits;
+    valid_bits = AllocateBuffer(this->pool_);
+    PARQUET_THROW_NOT_OK(
+        valid_bits->Resize(bit_util::BytesForBits(skipped_records), true));
+    ValidityBitmapInputOutput validity_io;
+    validity_io.values_read_upper_bound = skipped_records;
+    validity_io.valid_bits = valid_bits->mutable_data();
+    validity_io.valid_bits_offset = 0;
+    DefLevelsToBitmap(def_levels() + start_levels_position,
+                      levels_position_ - start_levels_position,
+                      this->leaf_info_, &validity_io);
+    int64_t values_to_read = validity_io.values_read - validity_io.null_count;
+    ReadAndThrowAway(values_to_read);
+    // Mark the levels as read in the underlying column reader.
+    this->ConsumeBufferedValues(skipped_records);
+    return skipped_records;
+  }
+
+  // Skip records for repeated fields. Returns number of skipped records.
+  // Skip records for repeated fields. Returns number of skipped records.

Review Comment:
   Done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to