emkornfield commented on code in PR #41362:
URL: https://github.com/apache/arrow/pull/41362#discussion_r1583391648


##########
cpp/src/parquet/column_reader.cc:
##########
@@ -1675,42 +1675,50 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
   //
   // \return Number of records delimited
   int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) {
+    if (ARROW_PREDICT_FALSE(num_records == 0 || levels_position_ == 
levels_written_)) {
+      *values_seen = 0;
+      return 0;
+    }
     int64_t values_to_read = 0;
     int64_t records_read = 0;
-
-    const int16_t* def_levels = this->def_levels() + levels_position_;
-    const int16_t* rep_levels = this->rep_levels() + levels_position_;
-
+    const int16_t* const rep_levels = this->rep_levels();
+    const int16_t* const def_levels = this->def_levels();
     ARROW_DCHECK_GT(this->max_rep_level_, 0);
-
-    // Count logical records and number of values to read
-    while (levels_position_ < levels_written_) {
-      const int16_t rep_level = *rep_levels++;
-      if (rep_level == 0) {
-        // If at_record_start_ is true, we are seeing the start of a record
-        // for the second time, such as after repeated calls to
-        // DelimitRecords. In this case we must continue until we find
-        // another record start or exhausting the ColumnChunk
-        if (!at_record_start_) {
-          // We've reached the end of a record; increment the record count.
-          ++records_read;
-          if (records_read == num_records) {
-            // We've found the number of records we were looking for. Set
-            // at_record_start_ to true and break
-            at_record_start_ = true;
-            break;
-          }
-        }
-      }
+    // If at_record_start_ is true, we are seeing the start of a record
+    // for the second time, such as after repeated calls to
+    // DelimitRecords. In this case we must continue until we find
+    // another record start or exhausting the ColumnChunk
+    if (at_record_start_) {
+      ARROW_DCHECK_EQ(0, rep_levels[levels_position_]);
+      values_to_read += def_levels[levels_position_] == this->max_def_level_;
+      ++levels_position_;
       // We have decided to consume the level at this position; therefore we
       // must advance until we find another record boundary
       at_record_start_ = false;
+    }
 
-      const int16_t def_level = *def_levels++;
-      if (def_level == this->max_def_level_) {
-        ++values_to_read;
+    // Count logical records and number of non-null values to read
+    ARROW_DCHECK(!at_record_start_);
+    while (levels_position_ < levels_written_) {
+      int64_t stride =
+          std::min(levels_written_ - levels_position_, num_records - 
records_read);
+      const int64_t position_end = levels_position_ + stride;
+      for (int64_t i = levels_position_; i < position_end; ++i) {

Review Comment:
   is there a reason to not increment level_position_ directly instead of 
introducing a new temporary?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to