bkietz commented on a change in pull request #9616:
URL: https://github.com/apache/arrow/pull/9616#discussion_r585931483



##########
File path: cpp/src/arrow/dataset/dataset.h
##########
@@ -43,7 +43,7 @@ namespace dataset {
 /// Note that Fragments have well defined physical schemas which are 
reconciled by
 /// the Datasets which contain them; these physical schemas may differ from a 
parent
 /// Dataset's schema and the physical schemas of sibling Fragments.
-class ARROW_DS_EXPORT Fragment {
+class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> 
{

Review comment:
       That sounds good to me, but I think it should be called `ToString()`

##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -73,9 +73,12 @@ Result<ScanTaskIterator> Scanner::Scan() {
 Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch(
     std::vector<std::shared_ptr<RecordBatch>> batches,
     std::shared_ptr<ScanOptions> options, std::shared_ptr<ScanContext> 
context) {
-  ScanTaskVector tasks{std::make_shared<InMemoryScanTask>(batches, 
std::move(options),
-                                                          std::move(context))};
-  return MakeVectorIterator(std::move(tasks));
+  if (batches.empty()) {
+    return MakeVectorIterator(ScanTaskVector());
+  }
+  auto schema = batches[0]->schema();
+  auto fragment = std::make_shared<InMemoryFragment>(schema, batches);

Review comment:
       ```suggestion
     auto fragment = std::make_shared<InMemoryFragment>(std::move(schema), 
std::move(batches));
   ```

##########
File path: cpp/src/arrow/dataset/file_base.cc
##########
@@ -330,8 +326,8 @@ Status FileSystemDataset::Write(const 
FileSystemDatasetWriteOptions& write_optio
 
         std::unordered_set<WriteQueue*> need_flushed;
         for (size_t i = 0; i < groups.batches.size(); ++i) {
-          auto partition_expression =
-              and_(std::move(groups.expressions[i]), 
fragment->partition_expression());
+          auto partition_expression = and_(std::move(groups.expressions[i]),
+                                           
scan_task->fragment()->partition_expression());

Review comment:
       :+1: 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to