pitrou commented on code in PR #43995:
URL: https://github.com/apache/arrow/pull/43995#discussion_r1856174932
##########
cpp/src/parquet/arrow/arrow_reader_writer_test.cc:
##########
@@ -4085,6 +4085,81 @@ TEST(TestArrowReaderAdHoc, OldDataPageV2) {
TryReadDataFile(path);
}
+TEST(TestArrowReaderAdHoc, LegacyTwoLevelList) {
+ auto VerifyData = [](std::unique_ptr<ParquetFileReader> file_reader) {
+ // Expected Parquet schema of legacy two-level encoding
+ constexpr std::string_view kExpectedLegacyList =
+ "required group field_id=-1 a (List) {\n"
+ " repeated group field_id=-1 array (List) {\n"
+ " repeated int32 field_id=-1 array;\n"
+ " }\n"
+ "}\n";
+
+ // Expected Arrow schema and data
+ auto arrow_inner_list =
+ field("array", list(field("array", ::arrow::int32(),
/*nullable=*/false)),
+ /*nullable=*/false);
+ auto arrow_outer_list = list(arrow_inner_list);
+ auto arrow_schema =
+ ::arrow::schema({field("a", arrow_outer_list, /*nullable=*/false)});
+ auto expected_table = TableFromJSON(arrow_schema,
{R"([[[[1,2],[3,4]]]])"});
+
+ // Verify Parquet schema
+ auto root_group = file_reader->metadata()->schema()->group_node();
+ ASSERT_EQ(1, root_group->field_count());
+ std::stringstream nodeStr;
+ PrintSchema(root_group->field(0).get(), nodeStr);
+ ASSERT_EQ(kExpectedLegacyList, nodeStr.str());
+
+ // Verify Arrow schema and data
+ std::unique_ptr<FileReader> reader;
+ ASSERT_OK_NO_THROW(
+ FileReader::Make(default_memory_pool(), std::move(file_reader),
&reader));
+ std::shared_ptr<Table> table;
+ ASSERT_OK(reader->ReadTable(&table));
Review Comment:
Would have been nice to add a validation call here.
```c++
ASSERT_OK(table->ValidateFull());
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]