lidavidm commented on a change in pull request #9656:
URL: https://github.com/apache/arrow/pull/9656#discussion_r590508192



##########
File path: cpp/src/arrow/ipc/reader.cc
##########
@@ -1022,6 +1209,31 @@ class RecordBatchFileReaderImpl : public 
RecordBatchFileReader {
 
   ReadStats stats() const override { return stats_; }
 
+  Result<AsyncGenerator<std::shared_ptr<RecordBatch>>> GetRecordBatchGenerator(
+      int readahead_messages, const io::IOContext& io_context) override {
+    auto state = std::make_shared<IpcFileRecordBatchGeneratorState>();
+    state->num_dictionaries_ = num_dictionaries();
+    state->num_record_batches_ = num_record_batches();
+    state->file_ = file_;
+    state->options_ = options_;
+    state->owned_file_ = owned_file_;
+    state->footer_buffer_ = footer_buffer_;
+    state->footer_ = footer_;
+    // Must regenerate uncopyable DictionaryMemo
+    RETURN_NOT_OK(UnpackSchemaMessage(state->footer_->schema(), 
state->options_,
+                                      &state->dictionary_memo_, 
&state->schema_,
+                                      &state->out_schema_, 
&state->field_inclusion_mask_,
+                                      &state->swap_endian_));
+    AsyncGenerator<std::shared_ptr<Message>> message_generator =
+        IpcMessageGenerator(state, io_context);
+    if (readahead_messages > 0) {
+      message_generator =
+          MakeReadaheadGenerator(std::move(message_generator), 
readahead_messages);
+    }
+    return IpcFileRecordBatchGenerator(state, message_generator,
+                                       arrow::internal::GetCpuThreadPool());

Review comment:
       Usually not, but it would be in the case of compressed buffers. I could 
also change it to not offload onto a secondary pool by default (and hence do 
the work on the same thread used to read data) and/or benchmark if there's any 
overhead to this.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to