fsaintjacques commented on a change in pull request #7534:
URL: https://github.com/apache/arrow/pull/7534#discussion_r445605965



##########
File path: cpp/src/parquet/arrow/reader.cc
##########
@@ -338,22 +348,39 @@ class RowGroupRecordBatchReader : public 
::arrow::RecordBatchReader {
     // TODO (hatemhelal): Consider refactoring this to share logic with 
ReadTable as this
     // does not currently honor the use_threads option.
     std::vector<std::shared_ptr<ChunkedArray>> columns(field_readers_.size());
-    for (size_t i = 0; i < field_readers_.size(); ++i) {
-      RETURN_NOT_OK(field_readers_[i]->NextBatch(batch_size_, &columns[i]));
-      if (columns[i]->num_chunks() > 1) {
-        return Status::NotImplemented("This class cannot yet iterate chunked 
arrays");
+    int64_t num_rows = -1;
+
+    if (columns.empty()) {
+      // num_rows cannot be derived from field_readers_ so compute it using
+      // row group sizes cached from metadata
+      num_rows = std::min(batch_size_, *row_group_remaining_size_);
+      *row_group_remaining_size_ -= num_rows;
+      if (*row_group_remaining_size_ == 0) {
+        ++row_group_remaining_size_;
       }
+    } else {
+      for (size_t i = 0; i < field_readers_.size(); ++i) {
+        RETURN_NOT_OK(field_readers_[i]->NextBatch(batch_size_, &columns[i]));
+        if (columns[i]->num_chunks() > 1) {
+          return Status::NotImplemented("This class cannot yet iterate chunked 
arrays");
+        }
+      }
+      num_rows = columns[0]->length();
     }
 
     // Create an intermediate table and use TableBatchReader as an adaptor to a
     // RecordBatch
-    std::shared_ptr<Table> table = Table::Make(schema_, columns);
+    std::shared_ptr<Table> table = Table::Make(schema_, columns, num_rows);
+
     RETURN_NOT_OK(table->Validate());
     ::arrow::TableBatchReader table_batch_reader(*table);
     return table_batch_reader.ReadNext(out);
   }
 
  private:
+  std::shared_ptr<FileMetaData> metadata_;

Review comment:
       `metadata_` is not used.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to