westonpace commented on a change in pull request #11991: URL: https://github.com/apache/arrow/pull/11991#discussion_r780473574
########## File path: cpp/src/arrow/dataset/scanner.cc ########## @@ -823,10 +584,77 @@ Result<int64_t> AsyncScanner::CountRows() { return total.load(); } +Result<std::shared_ptr<RecordBatchReader>> AsyncScanner::ToRecordBatchReader() { + ARROW_ASSIGN_OR_RAISE(auto it, ScanBatches()); + return std::make_shared<ScannerRecordBatchReader>(options()->projected_schema, + std::move(it)); +} + const std::shared_ptr<Dataset>& AsyncScanner::dataset() const { return dataset_; } +Status NestedFieldRefsNotImplemented() { + // TODO(ARROW-11259) Several functions (for example, IpcScanTask::Make) assume that + // only top level fields will be materialized. + return Status::NotImplemented("Nested field references in scans."); +} + } // namespace +Result<ProjectionDescr> ProjectionDescr::FromStructExpression( + const compute::Expression& projection, const Schema& dataset_schema) { + ARROW_ASSIGN_OR_RAISE(compute::Expression bound_expression, + projection.Bind(dataset_schema)); + + if (bound_expression.type()->id() != Type::STRUCT) { + return Status::Invalid("Projection ", projection.ToString(), + " cannot yield record batches"); + } + std::shared_ptr<Schema> projection_schema = + ::arrow::schema(checked_cast<const StructType&>(*bound_expression.type()).fields(), + dataset_schema.metadata()); + + return ProjectionDescr{std::move(bound_expression), std::move(projection_schema)}; +} + +Result<ProjectionDescr> ProjectionDescr::FromExpressions( + const std::vector<compute::Expression>& exprs, std::vector<std::string> names, + const Schema& dataset_schema) { + compute::MakeStructOptions project_options{std::move(names)}; + + for (size_t i = 0; i < exprs.size(); ++i) { + if (auto ref = exprs[i].field_ref()) { + if (!ref->name()) return NestedFieldRefsNotImplemented(); + + // set metadata and nullability for plain field references + ARROW_ASSIGN_OR_RAISE(auto field, ref->GetOne(dataset_schema)); + project_options.field_nullability[i] = field->nullable(); + project_options.field_metadata[i] = field->metadata(); + } + } + + return ProjectionDescr::FromStructExpression( + call("make_struct", std::move(exprs), std::move(project_options)), dataset_schema); Review comment: Good catch. Fixed. All the callers were moving into it anyways. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org