lidavidm commented on a change in pull request #11704:
URL: https://github.com/apache/arrow/pull/11704#discussion_r750253049



##########
File path: cpp/src/arrow/dataset/test_util.h
##########
@@ -631,12 +646,92 @@ class FileFormatScanMixin : public 
FileFormatFixtureMixin<FormatHelper>,
     for (auto maybe_batch : PhysicalBatches(fragment)) {
       ASSERT_OK_AND_ASSIGN(auto batch, maybe_batch);
       row_count += batch->num_rows();
-      AssertSchemaEqual(*batch->schema(), *expected_schema,
-                        /*check_metadata=*/false);
+      ASSERT_THAT(
+          batch->schema()->fields(),
+          ::testing::UnorderedPointwise(PointeesEquals(), 
expected_schema->fields()))
+          << "EXPECTED:\n"
+          << expected_schema->ToString() << "\nACTUAL:\n"
+          << batch->schema()->ToString();
     }
 
     ASSERT_EQ(row_count, expected_rows());
   }
+  void TestScanProjectedNested(bool fine_grained_selection = false) {
+    auto f32 = field("f32", float32());
+    auto f64 = field("f64", float64());
+    auto i32 = field("i32", int32());
+    auto i64 = field("i64", int64());
+    auto struct1 = field("struct1", struct_({f32, i32}));
+    auto struct2 = field("struct2", struct_({f64, i64, struct1}));
+    this->SetSchema({struct1, struct2, f32, f64, i32, i64});
+    this->ProjectNested({".struct1.f32", ".struct2.struct1", 
".struct2.struct1.f32"});
+    this->SetFilter(equal(field_ref(FieldRef("struct2", "i64")), literal(0)));
+
+    std::shared_ptr<Schema> expected_schema;
+    if (fine_grained_selection) {
+      // Some formats, like Parquet, let you pluck only a part of a complex 
type
+      expected_schema = schema({
+          field("struct1", struct_({f32})),
+          field("struct2", struct_({i64, struct1})),
+      });
+    } else {
+      expected_schema = schema({struct1, struct2});
+    }

Review comment:
       The overall schema will be the same once we pass through projection, 
i.e. the cast is done in the scanner instead of inside every file format. 
However, the tests here are reading from the fragment directly to check the 
physical schema, instead of the post-projection schema. I'll make sure both 
cases are covered in tests, though.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to