westonpace commented on a change in pull request #11991:
URL: https://github.com/apache/arrow/pull/11991#discussion_r780473574



##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -823,10 +584,77 @@ Result<int64_t> AsyncScanner::CountRows() {
   return total.load();
 }
 
+Result<std::shared_ptr<RecordBatchReader>> AsyncScanner::ToRecordBatchReader() 
{
+  ARROW_ASSIGN_OR_RAISE(auto it, ScanBatches());
+  return 
std::make_shared<ScannerRecordBatchReader>(options()->projected_schema,
+                                                    std::move(it));
+}
+
 const std::shared_ptr<Dataset>& AsyncScanner::dataset() const { return 
dataset_; }
 
+Status NestedFieldRefsNotImplemented() {
+  // TODO(ARROW-11259) Several functions (for example, IpcScanTask::Make) 
assume that
+  // only top level fields will be materialized.
+  return Status::NotImplemented("Nested field references in scans.");
+}
+
 }  // namespace
 
+Result<ProjectionDescr> ProjectionDescr::FromStructExpression(
+    const compute::Expression& projection, const Schema& dataset_schema) {
+  ARROW_ASSIGN_OR_RAISE(compute::Expression bound_expression,
+                        projection.Bind(dataset_schema));
+
+  if (bound_expression.type()->id() != Type::STRUCT) {
+    return Status::Invalid("Projection ", projection.ToString(),
+                           " cannot yield record batches");
+  }
+  std::shared_ptr<Schema> projection_schema =
+      ::arrow::schema(checked_cast<const 
StructType&>(*bound_expression.type()).fields(),
+                      dataset_schema.metadata());
+
+  return ProjectionDescr{std::move(bound_expression), 
std::move(projection_schema)};
+}
+
+Result<ProjectionDescr> ProjectionDescr::FromExpressions(
+    const std::vector<compute::Expression>& exprs, std::vector<std::string> 
names,
+    const Schema& dataset_schema) {
+  compute::MakeStructOptions project_options{std::move(names)};
+
+  for (size_t i = 0; i < exprs.size(); ++i) {
+    if (auto ref = exprs[i].field_ref()) {
+      if (!ref->name()) return NestedFieldRefsNotImplemented();
+
+      // set metadata and nullability for plain field references
+      ARROW_ASSIGN_OR_RAISE(auto field, ref->GetOne(dataset_schema));
+      project_options.field_nullability[i] = field->nullable();
+      project_options.field_metadata[i] = field->metadata();
+    }
+  }
+
+  return ProjectionDescr::FromStructExpression(
+      call("make_struct", std::move(exprs), std::move(project_options)), 
dataset_schema);

Review comment:
       Good catch.  Fixed.  All the callers were moving into it anyways.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to