rtpsw commented on code in PR #13375:
URL: https://github.com/apache/arrow/pull/13375#discussion_r907736969


##########
cpp/src/arrow/engine/substrait/serde.cc:
##########
@@ -58,12 +58,57 @@ Result<compute::Declaration> DeserializeRelation(const 
Buffer& buf,
   return FromProto(rel, ext_set);
 }
 
-Result<std::vector<compute::Declaration>> DeserializePlans(
-    const Buffer& buf, const ConsumerFactory& consumer_factory,
+using DeclarationFactory = std::function<compute::Declaration(
+    compute::Declaration, std::vector<std::string> names)>;
+
+static DeclarationFactory MakeConsumingSinkDeclarationFactory(
+    const ConsumerFactory& consumer_factory) {
+  return [&consumer_factory](compute::Declaration input, 
std::vector<std::string> names) {
+    std::shared_ptr<compute::ExecNodeOptions> options =
+        std::make_shared<compute::ConsumingSinkNodeOptions>(
+            compute::ConsumingSinkNodeOptions{consumer_factory(), 
std::move(names)});
+    return compute::Declaration::Sequence(
+        {std::move(input), {"consuming_sink", options}});
+  };
+}
+
+namespace {
+
+compute::Declaration ProjectByNamesDeclaration(compute::Declaration input,
+                                               std::vector<std::string> names) 
{
+  int names_size = static_cast<int>(names.size());
+  if (names_size == 0) {
+    return input;
+  }
+  std::vector<compute::Expression> expressions;
+  for (int i = 0; i < names_size; i++) {
+    expressions.push_back(compute::field_ref(FieldRef(i)));
+  }
+  return compute::Declaration::Sequence(
+      {std::move(input),
+       {"project",
+        compute::ProjectNodeOptions{std::move(expressions), 
std::move(names)}}});
+}
+
+}  // namespace
+
+static DeclarationFactory MakeWriteDeclarationFactory(
+    const WriteOptionsFactory& write_options_factory) {
+  return [&write_options_factory](compute::Declaration input,
+                                  std::vector<std::string> names) {
+    compute::Declaration projected = ProjectByNamesDeclaration(input, names);
+    std::shared_ptr<compute::ExecNodeOptions> options = 
write_options_factory();
+    return compute::Declaration::Sequence({std::move(projected), {"write", 
options}});

Review Comment:
   Please create a JIRA for this. Perhaps there should be a hierarchy - names 
only, names plus metadata, and schema - in increasing order of detail. The 
names-only option is what's needed for the Substrait plan. If the caller 
provides a schema then the types must match, as you say.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to