mapleFU commented on code in PR #46678:
URL: https://github.com/apache/arrow/pull/46678#discussion_r2124336537


##########
cpp/src/parquet/arrow/arrow_schema_test.cc:
##########
@@ -360,309 +374,320 @@ TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
 }
 
 TEST_F(TestConvertParquetSchema, ParquetMaps) {
-  std::vector<NodePtr> parquet_fields;
-  std::vector<std::shared_ptr<Field>> arrow_fields;
-
   // MAP encoding example taken from parquet-format/LogicalTypes.md
 
-  // Two column map.
-  {
-    auto key = PrimitiveNode::Make("key", Repetition::REQUIRED, 
ParquetType::BYTE_ARRAY,
-                                   ConvertedType::UTF8);
-    auto value = PrimitiveNode::Make("value", Repetition::OPTIONAL,
-                                     ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+  for (const auto& list_case : kListCases) {
+    std::vector<NodePtr> parquet_fields;
+    std::vector<std::shared_ptr<Field>> arrow_fields;
 
-    auto list = GroupNode::Make("key_value", Repetition::REPEATED, {key, 
value});
-    parquet_fields.push_back(
-        GroupNode::Make("my_map", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
-    auto arrow_key = ::arrow::field("key", UTF8, /*nullable=*/false);
-    auto arrow_value = ::arrow::field("value", UTF8, /*nullable=*/true);
-    auto arrow_map = std::make_shared<::arrow::MapType>(
-        ::arrow::field("my_map", ::arrow::struct_({arrow_key, arrow_value}),
-                       /*nullable=*/false),
-        /*nullable=*/false);
+    // Two column map.
+    {
+      auto key = PrimitiveNode::Make("key", Repetition::REQUIRED, 
ParquetType::BYTE_ARRAY,
+                                     ConvertedType::UTF8);
+      auto value = PrimitiveNode::Make("value", Repetition::OPTIONAL,
+                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
 
-    arrow_fields.push_back(::arrow::field("my_map", arrow_map, 
/*nullable=*/false));
-  }
-  // Single column map (i.e. set) gets converted to list of struct.
-  {
-    auto key = PrimitiveNode::Make("key", Repetition::REQUIRED, 
ParquetType::BYTE_ARRAY,
-                                   ConvertedType::UTF8);
+      auto list = GroupNode::Make("key_value", Repetition::REPEATED, {key, 
value});
+      parquet_fields.push_back(
+          GroupNode::Make("my_map", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
+      auto arrow_key = ::arrow::field("key", UTF8, /*nullable=*/false);
+      auto arrow_value = ::arrow::field("value", UTF8, /*nullable=*/true);
+      auto arrow_map = std::make_shared<::arrow::MapType>(
+          ::arrow::field("my_map", ::arrow::struct_({arrow_key, arrow_value}),
+                         /*nullable=*/false),
+          /*nullable=*/false);
+
+      arrow_fields.push_back(::arrow::field("my_map", arrow_map, 
/*nullable=*/false));
+    }
+    // Single column map (i.e. set) gets converted to list of struct.
+    {
+      auto key = PrimitiveNode::Make("key", Repetition::REQUIRED, 
ParquetType::BYTE_ARRAY,
+                                     ConvertedType::UTF8);
+
+      auto list = GroupNode::Make("key_value", Repetition::REPEATED, {key});
+      parquet_fields.push_back(
+          GroupNode::Make("my_set", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
+      auto arrow_list =
+          list_case.type_factory(::arrow::field("key", UTF8, 
/*nullable=*/false));
+      arrow_fields.push_back(::arrow::field("my_set", arrow_list, false));
+    }
+    // Two column map with non-standard field names.
+    {
+      auto key = PrimitiveNode::Make("int_key", Repetition::REQUIRED, 
ParquetType::INT32,
+                                     ConvertedType::INT_32);
+      auto value = PrimitiveNode::Make("str_value", Repetition::OPTIONAL,
+                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
 
-    auto list = GroupNode::Make("key_value", Repetition::REPEATED, {key});
-    parquet_fields.push_back(
-        GroupNode::Make("my_set", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
-    auto arrow_list = ::arrow::list({::arrow::field("key", UTF8, 
/*nullable=*/false)});
-    arrow_fields.push_back(::arrow::field("my_set", arrow_list, false));
-  }
-  // Two column map with non-standard field names.
-  {
-    auto key = PrimitiveNode::Make("int_key", Repetition::REQUIRED, 
ParquetType::INT32,
-                                   ConvertedType::INT_32);
-    auto value = PrimitiveNode::Make("str_value", Repetition::OPTIONAL,
-                                     ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+      auto list = GroupNode::Make("items", Repetition::REPEATED, {key, value});
+      parquet_fields.push_back(
+          GroupNode::Make("items", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
+      auto arrow_value = ::arrow::field("str_value", UTF8, /*nullable=*/true);
+      auto arrow_key = ::arrow::field("int_key", INT32, /*nullable=*/false);
+      auto arrow_map = std::make_shared<::arrow::MapType>(
+          ::arrow::field("items", ::arrow::struct_({arrow_key, arrow_value}), 
false),
+          false);
 
-    auto list = GroupNode::Make("items", Repetition::REPEATED, {key, value});
-    parquet_fields.push_back(
-        GroupNode::Make("items", Repetition::REQUIRED, {list}, 
LogicalType::Map()));
-    auto arrow_value = ::arrow::field("str_value", UTF8, /*nullable=*/true);
-    auto arrow_key = ::arrow::field("int_key", INT32, /*nullable=*/false);
-    auto arrow_map = std::make_shared<::arrow::MapType>(
-        ::arrow::field("items", ::arrow::struct_({arrow_key, arrow_value}), 
false),
-        false);
-
-    arrow_fields.push_back(::arrow::field("items", arrow_map, false));
-  }
+      arrow_fields.push_back(::arrow::field("items", arrow_map, false));
+    }
 
-  auto arrow_schema = ::arrow::schema(arrow_fields);
-  ASSERT_OK(ConvertSchema(parquet_fields));
+    auto arrow_schema = ::arrow::schema(arrow_fields);
 
-  ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(arrow_schema));
-  for (int i = 0; i < arrow_schema->num_fields(); ++i) {
-    auto result_field = result_schema_->field(i);
-    auto expected_field = arrow_schema->field(i);
-    if (expected_field->type()->id() == ::arrow::Type::MAP) {
-      EXPECT_TRUE(
-          
expected_field->type()->field(0)->Equals(result_field->type()->field(0)))
-          << "Map's struct in field " << i
-          << "\n result: " << result_field->type()->field(0)->ToString() << " "
-          << "\n expected: " << expected_field->type()->field(0)->ToString() 
<< "\n";
+    ArrowReaderProperties props;
+    props.set_list_type(list_case.type_id);
+    ASSERT_OK(ConvertSchema(parquet_fields, /*key_value_metadata=*/{}, props));
+
+    ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(arrow_schema));
+    for (int i = 0; i < arrow_schema->num_fields(); ++i) {
+      auto result_field = result_schema_->field(i);
+      auto expected_field = arrow_schema->field(i);
+      if (expected_field->type()->id() == ::arrow::Type::MAP) {
+        EXPECT_TRUE(
+            
expected_field->type()->field(0)->Equals(result_field->type()->field(0)))
+            << "Map's struct in field " << i
+            << "\n result: " << result_field->type()->field(0)->ToString() << 
" "
+            << "\n expected: " << expected_field->type()->field(0)->ToString() 
<< "\n";
+      }
     }
   }
 }
 
 TEST_F(TestConvertParquetSchema, ParquetLists) {
-  std::vector<NodePtr> parquet_fields;
-  std::vector<std::shared_ptr<Field>> arrow_fields;
-
   // LIST encoding example taken from parquet-format/LogicalTypes.md
 
-  // // List<String> (list non-null, elements nullable)
-  // required group my_list (LIST) {
-  //   repeated group list {
-  //     optional binary element (UTF8);
-  //   }
-  // }
-  {
-    auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
-                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::REQUIRED, {list}, 
ConvertedType::LIST));
-    auto arrow_element = ::arrow::field("string", UTF8, true);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, false));
-  }
+  for (const auto& list_case : kListCases) {
+    std::vector<NodePtr> parquet_fields;
+    std::vector<std::shared_ptr<Field>> arrow_fields;
 
-  // // List<String> (list nullable, elements non-null)
-  // optional group my_list (LIST) {
-  //   repeated group list {
-  //     required binary element (UTF8);
-  //   }
-  // }
-  {
-    auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
-                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, 
ConvertedType::LIST));
-    auto arrow_element = ::arrow::field("string", UTF8, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // // List<String> (list non-null, elements nullable)
+    // required group my_list (LIST) {
+    //   repeated group list {
+    //     optional binary element (UTF8);
+    //   }
+    // }
+    {
+      auto element = PrimitiveNode::Make("string", Repetition::OPTIONAL,
+                                         ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+      auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
+      parquet_fields.push_back(
+          GroupNode::Make("my_list", Repetition::REQUIRED, {list}, 
ConvertedType::LIST));
+      auto arrow_element = ::arrow::field("string", UTF8, true);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("my_list", arrow_list, false));
+    }
 
-  // Element types can be nested structures. For example, a list of lists:
-  //
-  // // List<List<Integer>>
-  // optional group array_of_arrays (LIST) {
-  //   repeated group list {
-  //     required group element (LIST) {
-  //       repeated group list {
-  //         required int32 element;
-  //       }
-  //     }
-  //   }
-  // }
-  {
-    auto inner_element =
-        PrimitiveNode::Make("int32", Repetition::REQUIRED, ParquetType::INT32);
-    auto inner_list = GroupNode::Make("list", Repetition::REPEATED, 
{inner_element});
-    auto element = GroupNode::Make("element", Repetition::REQUIRED, 
{inner_list},
-                                   ConvertedType::LIST);
-    auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
-    parquet_fields.push_back(GroupNode::Make("array_of_arrays", 
Repetition::OPTIONAL,
-                                             {list}, ConvertedType::LIST));
-    auto arrow_inner_element = ::arrow::field("int32", INT32, false);
-    auto arrow_inner_list = ::arrow::list(arrow_inner_element);
-    auto arrow_element = ::arrow::field("element", arrow_inner_list, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("array_of_arrays", arrow_list, 
true));
-  }
+    // // List<String> (list nullable, elements non-null)
+    // optional group my_list (LIST) {
+    //   repeated group list {
+    //     required binary element (UTF8);
+    //   }
+    // }
+    {
+      auto element = PrimitiveNode::Make("string", Repetition::REQUIRED,
+                                         ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+      auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
+      parquet_fields.push_back(
+          GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, 
ConvertedType::LIST));
+      auto arrow_element = ::arrow::field("string", UTF8, false);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
+    }
 
-  // // List<String> (list nullable, elements non-null)
-  // optional group my_list (LIST) {
-  //   repeated group element {
-  //     required binary str (UTF8);
-  //   };
-  // }
-  {
-    auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
-                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto list = GroupNode::Make("element", Repetition::REPEATED, {element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, 
ConvertedType::LIST));
-    auto arrow_element = ::arrow::field("str", UTF8, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // Element types can be nested structures. For example, a list of lists:
+    //
+    // // List<List<Integer>>
+    // optional group array_of_arrays (LIST) {
+    //   repeated group list {
+    //     required group element (LIST) {
+    //       repeated group list {
+    //         required int32 element;
+    //       }
+    //     }
+    //   }
+    // }
+    {
+      auto inner_element =
+          PrimitiveNode::Make("int32", Repetition::REQUIRED, 
ParquetType::INT32);
+      auto inner_list = GroupNode::Make("list", Repetition::REPEATED, 
{inner_element});
+      auto element = GroupNode::Make("element", Repetition::REQUIRED, 
{inner_list},
+                                     ConvertedType::LIST);
+      auto list = GroupNode::Make("list", Repetition::REPEATED, {element});
+      parquet_fields.push_back(GroupNode::Make("array_of_arrays", 
Repetition::OPTIONAL,
+                                               {list}, ConvertedType::LIST));
+      auto arrow_inner_element = ::arrow::field("int32", INT32, false);
+      auto arrow_inner_list = list_case.type_factory(arrow_inner_element);
+      auto arrow_element = ::arrow::field("element", arrow_inner_list, false);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("array_of_arrays", arrow_list, 
true));
+    }
 
-  // // List<Integer> (nullable list, non-null elements)
-  // optional group my_list (LIST) {
-  //   repeated int32 element;
-  // }
-  {
-    auto element =
-        PrimitiveNode::Make("element", Repetition::REPEATED, 
ParquetType::INT32);
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, 
ConvertedType::LIST));
-    auto arrow_element = ::arrow::field("element", INT32, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // // List<String> (list nullable, elements non-null)
+    // optional group my_list (LIST) {
+    //   repeated group element {
+    //     required binary str (UTF8);
+    //   };
+    // }
+    {
+      auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
+                                         ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+      auto list = GroupNode::Make("element", Repetition::REPEATED, {element});
+      parquet_fields.push_back(
+          GroupNode::Make("my_list", Repetition::OPTIONAL, {list}, 
ConvertedType::LIST));
+      auto arrow_element = ::arrow::field("str", UTF8, false);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
+    }
 
-  // // List<Tuple<String, Integer>> (nullable list, non-null elements)
-  // optional group my_list (LIST) {
-  //   repeated group element {
-  //     required binary str (UTF8);
-  //     required int32 num;
-  //   };
-  // }
-  {
-    auto str_element = PrimitiveNode::Make("str", Repetition::REQUIRED,
-                                           ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto num_element =
-        PrimitiveNode::Make("num", Repetition::REQUIRED, ParquetType::INT32);
-    auto element =
-        GroupNode::Make("element", Repetition::REPEATED, {str_element, 
num_element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {element}, 
ConvertedType::LIST));
-    auto arrow_str = ::arrow::field("str", UTF8, false);
-    auto arrow_num = ::arrow::field("num", INT32, false);
-    std::vector<std::shared_ptr<Field>> fields({arrow_str, arrow_num});
-    auto arrow_struct = ::arrow::struct_(fields);
-    auto arrow_element = ::arrow::field("element", arrow_struct, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // // List<Integer> (nullable list, non-null elements)
+    // optional group my_list (LIST) {
+    //   repeated int32 element;
+    // }
+    {
+      auto element =
+          PrimitiveNode::Make("element", Repetition::REPEATED, 
ParquetType::INT32);
+      parquet_fields.push_back(GroupNode::Make("my_list", 
Repetition::OPTIONAL, {element},
+                                               ConvertedType::LIST));
+      auto arrow_element = ::arrow::field("element", INT32, false);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
+    }
 
-  // // List<OneTuple<String>> (nullable list, non-null elements)
-  // optional group my_list (LIST) {
-  //   repeated group array {
-  //     required binary str (UTF8);
-  //   };
-  // }
-  // Special case: group is named array
-  {
-    auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
-                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto array = GroupNode::Make("array", Repetition::REPEATED, {element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, 
ConvertedType::LIST));
-    auto arrow_str = ::arrow::field("str", UTF8, false);
-    std::vector<std::shared_ptr<Field>> fields({arrow_str});
-    auto arrow_struct = ::arrow::struct_(fields);
-    auto arrow_element = ::arrow::field("array", arrow_struct, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // // List<Tuple<String, Integer>> (nullable list, non-null elements)
+    // optional group my_list (LIST) {
+    //   repeated group element {
+    //     required binary str (UTF8);
+    //     required int32 num;
+    //   };
+    // }
+    {
+      auto str_element = PrimitiveNode::Make(
+          "str", Repetition::REQUIRED, ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
+      auto num_element =
+          PrimitiveNode::Make("num", Repetition::REQUIRED, ParquetType::INT32);
+      auto element =
+          GroupNode::Make("element", Repetition::REPEATED, {str_element, 
num_element});
+      parquet_fields.push_back(GroupNode::Make("my_list", 
Repetition::OPTIONAL, {element},
+                                               ConvertedType::LIST));
+      auto arrow_str = ::arrow::field("str", UTF8, false);
+      auto arrow_num = ::arrow::field("num", INT32, false);
+      std::vector<std::shared_ptr<Field>> fields({arrow_str, arrow_num});
+      auto arrow_struct = ::arrow::struct_(fields);
+      auto arrow_element = ::arrow::field("element", arrow_struct, false);
+      auto arrow_list = list_case.type_factory(arrow_element);
+      arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
+    }
 
-  // // List<OneTuple<String>> (nullable list, non-null elements)
-  // optional group my_list (LIST) {
-  //   repeated group my_list_tuple {
-  //     required binary str (UTF8);
-  //   };
-  // }
-  // Special case: group named ends in _tuple
-  {
-    auto element = PrimitiveNode::Make("str", Repetition::REQUIRED,
-                                       ParquetType::BYTE_ARRAY, 
ConvertedType::UTF8);
-    auto array = GroupNode::Make("my_list_tuple", Repetition::REPEATED, 
{element});
-    parquet_fields.push_back(
-        GroupNode::Make("my_list", Repetition::OPTIONAL, {array}, 
ConvertedType::LIST));
-    auto arrow_str = ::arrow::field("str", UTF8, false);
-    std::vector<std::shared_ptr<Field>> fields({arrow_str});
-    auto arrow_struct = ::arrow::struct_(fields);
-    auto arrow_element = ::arrow::field("my_list_tuple", arrow_struct, false);
-    auto arrow_list = ::arrow::list(arrow_element);
-    arrow_fields.push_back(::arrow::field("my_list", arrow_list, true));
-  }
+    // // List<OneTuple<String>> (nullable list, non-null elements)
+    // optional group my_list (LIST) {
+    //   repeated group array {
+    //     required binary str (UTF8);
+    //   };
+    // }
+    // Special case: group is named array

Review Comment:
   ohhh, my bad



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to