westonpace commented on a change in pull request #9607:
URL: https://github.com/apache/arrow/pull/9607#discussion_r605536876



##########
File path: cpp/src/arrow/dataset/file_csv.cc
##########
@@ -110,35 +110,47 @@ static inline Result<csv::ReadOptions> GetReadOptions(
   return read_options;
 }
 
-static inline Result<std::shared_ptr<csv::StreamingReader>> OpenReader(
+static inline Future<std::shared_ptr<csv::StreamingReader>> OpenReaderAsync(
     const FileSource& source, const CsvFileFormat& format,
     const std::shared_ptr<ScanOptions>& scan_options = nullptr,
     MemoryPool* pool = default_memory_pool()) {
   ARROW_ASSIGN_OR_RAISE(auto reader_options, GetReadOptions(format, 
scan_options));
 
-  util::string_view first_block;
   ARROW_ASSIGN_OR_RAISE(auto input, source.OpenCompressed());
   ARROW_ASSIGN_OR_RAISE(
       input, io::BufferedInputStream::Create(reader_options.block_size,
                                              default_memory_pool(), 
std::move(input)));
-  ARROW_ASSIGN_OR_RAISE(first_block, input->Peek(reader_options.block_size));
-
-  const auto& parse_options = format.parse_options;
-  auto convert_options = csv::ConvertOptions::Defaults();
-  if (scan_options != nullptr) {
-    ARROW_ASSIGN_OR_RAISE(convert_options,
-                          GetConvertOptions(format, scan_options, first_block, 
pool));
-  }
 
-  auto maybe_reader =
-      csv::StreamingReader::Make(io::IOContext(pool), std::move(input), 
reader_options,
-                                 parse_options, convert_options);
-  if (!maybe_reader.ok()) {
-    return maybe_reader.status().WithMessage("Could not open CSV input source 
'",
-                                             source.path(), "': ", 
maybe_reader.status());
-  }
+  auto peek_fut = DeferNotOk(input->io_context().executor()->Submit(

Review comment:
       I think I was operating as if filesystems was async but the point is 
valid.  Plus, I had missed that I was on the I/O thread pool here and not 
transferring back to the CPU thread pool.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to