lidavidm commented on a change in pull request #12466:
URL: https://github.com/apache/arrow/pull/12466#discussion_r812869519



##########
File path: cpp/src/arrow/dataset/scanner.cc
##########
@@ -86,6 +86,86 @@ class ScannerRecordBatchReader : public RecordBatchReader {
   std::shared_ptr<Schema> schema_;
   TaggedRecordBatchIterator delegate_;
 };
+
+const FieldVector kAugmentedFields{
+    field("__fragment_index", int32()),
+    field("__batch_index", int32()),
+    field("__last_in_fragment", boolean()),
+};
+
+// Scan options has a number of options that we can infer from the dataset
+// schema if they are not specified.
+Status NormalizeScanOptions(const std::shared_ptr<ScanOptions>& scan_options,
+                            const std::shared_ptr<Schema>& dataset_schema) {
+  if (scan_options->dataset_schema == nullptr) {
+    scan_options->dataset_schema = dataset_schema;
+  }
+
+  if (!scan_options->filter.IsBound()) {
+    ARROW_ASSIGN_OR_RAISE(scan_options->filter,
+                          scan_options->filter.Bind(*dataset_schema));
+  }
+
+  if (!scan_options->projected_schema) {
+    // If the user specifies a projection expression we can maybe infer from
+    // that expression
+    if (scan_options->projection.IsBound()) {
+      if (auto call = scan_options->projection.call()) {
+        if (call->function_name != "make_struct") {
+          return Status::Invalid(
+              "Top level projection expression call must be make_struct");
+        }
+        FieldVector fields;
+        for (const auto& arg : call->arguments) {
+          if (auto field_ref = arg.field_ref()) {
+            if (field_ref->IsName()) {
+              fields.push_back(field(*field_ref->name(), arg.type()));
+              break;
+            }
+          }
+          // Either the expression for this field is not a field_ref or it is 
not a
+          // simple field_ref.  User must supply projected_schema
+          return Status::Invalid(
+              "No projected schema was supplied and we could not infer the 
projected "
+              "schema from the projection expression.");

Review comment:
       We could infer names/types by just stringifying the expression and using 
the expression's type in this case, though I guess as a temporary fix it may 
not be worth it.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to