bkietz commented on a change in pull request #9616: URL: https://github.com/apache/arrow/pull/9616#discussion_r585931483
########## File path: cpp/src/arrow/dataset/dataset.h ########## @@ -43,7 +43,7 @@ namespace dataset { /// Note that Fragments have well defined physical schemas which are reconciled by /// the Datasets which contain them; these physical schemas may differ from a parent /// Dataset's schema and the physical schemas of sibling Fragments. -class ARROW_DS_EXPORT Fragment { +class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> { Review comment: That sounds good to me, but I think it should be called `ToString()` ########## File path: cpp/src/arrow/dataset/scanner.cc ########## @@ -73,9 +73,12 @@ Result<ScanTaskIterator> Scanner::Scan() { Result<ScanTaskIterator> ScanTaskIteratorFromRecordBatch( std::vector<std::shared_ptr<RecordBatch>> batches, std::shared_ptr<ScanOptions> options, std::shared_ptr<ScanContext> context) { - ScanTaskVector tasks{std::make_shared<InMemoryScanTask>(batches, std::move(options), - std::move(context))}; - return MakeVectorIterator(std::move(tasks)); + if (batches.empty()) { + return MakeVectorIterator(ScanTaskVector()); + } + auto schema = batches[0]->schema(); + auto fragment = std::make_shared<InMemoryFragment>(schema, batches); Review comment: ```suggestion auto fragment = std::make_shared<InMemoryFragment>(std::move(schema), std::move(batches)); ``` ########## File path: cpp/src/arrow/dataset/file_base.cc ########## @@ -330,8 +326,8 @@ Status FileSystemDataset::Write(const FileSystemDatasetWriteOptions& write_optio std::unordered_set<WriteQueue*> need_flushed; for (size_t i = 0; i < groups.batches.size(); ++i) { - auto partition_expression = - and_(std::move(groups.expressions[i]), fragment->partition_expression()); + auto partition_expression = and_(std::move(groups.expressions[i]), + scan_task->fragment()->partition_expression()); Review comment: :+1: ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org