lidavidm commented on a change in pull request #10060:
URL: https://github.com/apache/arrow/pull/10060#discussion_r626533245
##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -488,10 +507,37 @@ Result<AsyncGenerator<EnumeratedRecordBatchGenerator>>
FragmentsToBatches(
return MakeMappedGenerator<EnumeratedRecordBatchGenerator>(
std::move(enumerated_fragment_gen),
[scanner](const Enumerated<std::shared_ptr<Fragment>>& fragment) {
- return FragmentToBatches(scanner, fragment);
+ return FragmentToBatches(scanner, fragment, scanner->options());
});
}
+Result<AsyncGenerator<AsyncGenerator<util::optional<int64_t>>>>
FragmentsToRowCount(
+ std::shared_ptr<AsyncScanner> scanner, FragmentGenerator fragment_gen) {
+ // Must use optional<int64_t> to avoid breaking the pipeline on empty batches
+ auto enumerated_fragment_gen =
MakeEnumeratedGenerator(std::move(fragment_gen));
+ auto options = std::make_shared<ScanOptions>(*scanner->options());
+ RETURN_NOT_OK(SetProjection(options.get(), std::vector<std::string>()));
+ auto count_fragment_fn =
+ [scanner, options](const Enumerated<std::shared_ptr<Fragment>>& fragment)
+ -> Result<AsyncGenerator<util::optional<int64_t>>> {
+ auto count = fragment.value->CountRows(options->filter, options);
+ // Fast path
+ if (count.has_value()) {
+ return MakeSingleFutureGenerator(count.value().Then(
+ [](int64_t val) { return util::make_optional<int64_t>(val); }));
Review comment:
I think this relates to ARROW-12655
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]