westonpace commented on a change in pull request #9620:
URL: https://github.com/apache/arrow/pull/9620#discussion_r622647276



##########
File path: cpp/src/arrow/dataset/file_parquet.cc
##########
@@ -325,6 +325,45 @@ Result<std::unique_ptr<parquet::arrow::FileReader>> 
ParquetFileFormat::GetReader
   return std::move(arrow_reader);
 }
 
+Future<std::shared_ptr<parquet::arrow::FileReader>> 
ParquetFileFormat::GetReaderAsync(
+    const FileSource& source, ScanOptions* options) const {
+  ARROW_ASSIGN_OR_RAISE(auto parquet_scan_options,
+                        GetFragmentScanOptions<ParquetFragmentScanOptions>(
+                            kParquetTypeName, options, 
default_fragment_scan_options));
+  MemoryPool* pool = options ? options->pool : default_memory_pool();
+  auto properties = MakeReaderProperties(*this, parquet_scan_options.get(), 
pool);
+  ARROW_ASSIGN_OR_RAISE(auto input, source.Open());
+  // Some ugliness needed due to having Future<unique_ptr<>> here
+  auto reader_fut =
+      parquet::ParquetFileReader::OpenAsync(std::move(input), 
std::move(properties));
+  auto path = source.path();
+  auto self = checked_pointer_cast<const 
ParquetFileFormat>(shared_from_this());
+  return reader_fut.Then(
+      [=](const std::unique_ptr<parquet::ParquetFileReader>&) mutable
+      -> Result<std::shared_ptr<parquet::arrow::FileReader>> {
+        ARROW_ASSIGN_OR_RAISE(std::unique_ptr<parquet::ParquetFileReader> 
reader,
+                              reader_fut.MoveResult());
+        std::shared_ptr<parquet::FileMetaData> metadata = reader->metadata();
+        auto arrow_properties = MakeArrowReaderProperties(*self, *metadata);
+        if (options) {
+          arrow_properties.set_batch_size(options->batch_size);
+        }
+        if (options && !options->use_threads) {
+          arrow_properties.set_use_threads(
+              parquet_scan_options->enable_parallel_column_conversion);

Review comment:
       This could be a nice win for async in the HDD/SSD space (still 
recommending a follow-up PR).




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to