westonpace commented on code in PR #14681:
URL: https://github.com/apache/arrow/pull/14681#discussion_r1041921905


##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -638,6 +659,10 @@ Result<std::shared_ptr<Schema>> ExtractSchemaToBind(const 
compute::Declaration&
   } else if (declr.factory_name == "filter") {
     auto input_declr = std::get<compute::Declaration>(declr.inputs[0]);
     ARROW_ASSIGN_OR_RAISE(bind_schema, ExtractSchemaToBind(input_declr));
+  } else if (declr.factory_name == "named_table") {
+    const auto& opts =
+        checked_cast<const compute::NamedTableNodeOptions&>(*(declr.options));

Review Comment:
   ```suggestion
           checked_cast<const compute::NamedTableNodeOptions&>(*declr.options);
   ```



##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -2260,6 +2293,57 @@ TEST(SubstraitRoundTrip, BasicPlanEndToEnd) {
   EXPECT_TRUE(expected_table->Equals(*rnd_trp_table));
 }
 
+TEST(SubstraitRoundTrip, FilterNamedTable) {
+  compute::ExecContext exec_context;
+  arrow::dataset::internal::Initialize();
+
+  const std::vector<std::string> table_names{"table", "1"};
+  const auto dummy_schema =
+      schema({field("A", int32()), field("B", int32()), field("C", int32())});
+  auto filter = compute::equal(compute::field_ref("A"), 
compute::field_ref("B"));
+
+  auto declarations = compute::Declaration::Sequence(
+      {compute::Declaration({"named_table",
+                             compute::NamedTableNodeOptions{table_names, 
dummy_schema},
+                             "n"}),
+       compute::Declaration({"filter", compute::FilterNodeOptions{filter}, 
"f"})});
+
+  ExtensionSet ext_set{};
+  ASSERT_OK_AND_ASSIGN(auto serialized_plan, SerializePlan(declarations, 
&ext_set));
+
+  // creating a dummy dataset using a dummy table
+  auto input_table = TableFromJSON(dummy_schema, {R"([
+      [1, 1, 10],
+      [3, 5, 20],
+      [4, 1, 30],
+      [2, 1, 40],
+      [5, 5, 50],
+      [2, 2, 60]
+  ])"});
+
+  NamedTableProvider table_provider =
+      [&input_table, &table_names](
+          const std::vector<std::string>& names) -> 
Result<compute::Declaration> {
+    if (table_names != names) {
+      return Status::Invalid("Table name mismatch");
+    }
+    std::shared_ptr<compute::ExecNodeOptions> options =
+        std::make_shared<compute::TableSourceNodeOptions>(input_table);
+    return compute::Declaration("table_source", {}, options, "mock_source");

Review Comment:
   ```suggestion
       return compute::Declaration("table_source", {}, std::move(options), 
"mock_source");
   ```



##########
cpp/src/arrow/compute/exec/options.h:
##########
@@ -85,6 +85,19 @@ class ARROW_EXPORT TableSourceNodeOptions : public 
ExecNodeOptions {
   int64_t max_batch_size;
 };
 
+/// \brief Define a lazy resolved Arrow table.
+///
+/// The table uniquely identified by the names can typically be resolved at 
the time when
+/// the plan is to be consumed.

Review Comment:
   ```suggestion
   /// the plan is to be consumed.
   ///
   /// This node is for serialization purposes only and can never be executed
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to