bkietz commented on a change in pull request #10664:
URL: https://github.com/apache/arrow/pull/10664#discussion_r665622977
##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -506,73 +444,116 @@ Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
return EnumeratedRecordBatch{record_batch, fragment};
};
- auto combined_gen = MakeMappedGenerator(enumerated_batch_gen,
std::move(combine_fn));
-
- if (filter_and_project) {
- return FilterAndProjectRecordBatchAsync(options, std::move(combined_gen));
- }
- return combined_gen;
+ return MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
}
Result<AsyncGenerator<EnumeratedRecordBatchGenerator>> FragmentsToBatches(
- FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>&
options,
- bool filter_and_project = true) {
+ FragmentGenerator fragment_gen, const std::shared_ptr<ScanOptions>&
options) {
auto enumerated_fragment_gen =
MakeEnumeratedGenerator(std::move(fragment_gen));
return MakeMappedGenerator(std::move(enumerated_fragment_gen),
[=](const Enumerated<std::shared_ptr<Fragment>>&
fragment) {
- return FragmentToBatches(fragment, options,
- filter_and_project);
+ return FragmentToBatches(fragment, options);
});
}
-Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>>
FragmentsToRowCount(
- FragmentGenerator fragment_gen,
- std::shared_ptr<ScanOptions> options_with_projection) {
- // Must use optional<int64_t> to avoid breaking the pipeline on empty batches
- auto enumerated_fragment_gen =
MakeEnumeratedGenerator(std::move(fragment_gen));
+Result<compute::ExecNode*> MakeScanNode(compute::ExecPlan* plan,
+ FragmentGenerator fragment_gen,
+ std::shared_ptr<ScanOptions> options) {
+ if (!options->use_async) {
+ return Status::NotImplemented("ScanNodes without asynchrony");
+ }
- // Drop projection since we only need to count rows
- auto options = std::make_shared<ScanOptions>(*options_with_projection);
- RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
Review comment:
@lidavidm thanks for the fix!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]