westonpace commented on code in PR #14118:
URL: https://github.com/apache/arrow/pull/14118#discussion_r1047267171
##########
cpp/src/arrow/engine/substrait/serde_test.cc:
##########
@@ -2082,43 +2103,121 @@ TEST(Substrait, BasicPlanRoundTrippingEndToEnd) {
auto sink_decls,
DeserializePlans(
*serialized_plan, [] { return kNullConsumer; }, ext_id_reg,
&ext_set));
- // filter declaration
- auto& roundtripped_filter =
std::get<compute::Declaration>(sink_decls[0].inputs[0]);
- const auto& filter_opts =
- checked_cast<const
compute::FilterNodeOptions&>(*(roundtripped_filter.options));
- auto roundtripped_expr = filter_opts.filter_expression;
-
- if (auto* call = roundtripped_expr.call()) {
- EXPECT_EQ(call->function_name, "equal");
- auto args = call->arguments;
- auto left_index = args[0].field_ref()->field_path()->indices()[0];
- EXPECT_EQ(dummy_schema->field_names()[left_index], filter_col_left);
- auto right_index = args[1].field_ref()->field_path()->indices()[0];
- EXPECT_EQ(dummy_schema->field_names()[right_index], filter_col_right);
- }
- // scan declaration
+ // assert filter declaration
+ const auto& roundtripped_filter =
+ std::get<compute::Declaration>(sink_decls[0].inputs[0]);
+ AssertFilterRelation(roundtripped_filter, std::move(filter_expr),
dummy_schema);
+ // assert scan declaration
const auto& roundtripped_scan =
std::get<compute::Declaration>(roundtripped_filter.inputs[0]);
- const auto& dataset_opts =
- checked_cast<const
dataset::ScanNodeOptions&>(*(roundtripped_scan.options));
- const auto& roundripped_ds = dataset_opts.dataset;
- EXPECT_TRUE(roundripped_ds->schema()->Equals(*dummy_schema));
- ASSERT_OK_AND_ASSIGN(auto roundtripped_frgs, roundripped_ds->GetFragments());
- ASSERT_OK_AND_ASSIGN(auto expected_frgs, dataset->GetFragments());
+ AssertScanRelation(roundtripped_scan, dataset, dummy_schema);
+ // assert results
+ AssertPlanExecutionResult(expected_table, roundtripped_filter, dummy_schema,
+ exec_context);
+}
- auto roundtrip_frg_vec = IteratorToVector(std::move(roundtripped_frgs));
- auto expected_frg_vec = IteratorToVector(std::move(expected_frgs));
- EXPECT_EQ(expected_frg_vec.size(), roundtrip_frg_vec.size());
- int64_t idx = 0;
- for (auto fragment : expected_frg_vec) {
- const auto* l_frag = checked_cast<const
dataset::FileFragment*>(fragment.get());
- const auto* r_frag =
- checked_cast<const
dataset::FileFragment*>(roundtrip_frg_vec[idx++].get());
- EXPECT_TRUE(l_frag->Equals(*r_frag));
+TEST(Substrait, FilterProjectPlanRoundTripping) {
+#ifdef _WIN32
+ GTEST_SKIP() << "ARROW-16392: Substrait File URI not supported for Windows";
+#endif
+ compute::ExecContext exec_context;
+ arrow::dataset::internal::Initialize();
+
+ auto dummy_schema = schema(
+ {field("key", int32()), field("shared", int32()), field("distinct",
int32())});
+
+ // creating a dummy dataset using a dummy table
+ auto table = TableFromJSON(dummy_schema, {R"([
+ [1, 1, 10],
+ [3, 4, 20]
+ ])",
+ R"([
+ [0, 2, 1],
+ [1, 3, 2],
+ [4, 1, 3],
+ [3, 1, 3],
+ [1, 2, 5]
+ ])",
+ R"([
+ [2, 2, 12],
+ [5, 3, 12],
+ [1, 3, 12]
+ ])"});
+
+ auto format = std::make_shared<arrow::dataset::IpcFileFormat>();
+ auto filesystem = std::make_shared<fs::LocalFileSystem>();
+ const std::string file_name = "serde_project_test.arrow";
Review Comment:
@vibhatha and I spoke about this a bit. The round trip tests are simpler,
and we could go in that direction. However, they do not verify much of the
actual Substrait (they only check to see the results are the same after round
tripping).
I am now recommending we keep it this way since it is good to have a few
tests that are doing a more detailed inspection of the created substrait.
Maybe we can revisit this later when we have simpler ways of representing plans
(e.g. text format).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]