Copilot commented on code in PR #49125:
URL: https://github.com/apache/arrow/pull/49125#discussion_r2763243086
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -694,17 +698,76 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo
current_levels,
return Status::OK();
}
+Status ResolveList(const GroupNode& group, const Node& list_node,
+ LevelInfo current_levels, SchemaTreeContext* ctx,
+ const SchemaField* out, SchemaField* child_field) {
+ auto check_two_level_list_repeated = [](const GroupNode& group) -> Status {
+ // When it is repeated, the LIST-annotated 2-level structure can only
serve as an
+ // element within another LIST-annotated 2-level structure.
+ if (group.is_repeated() && group.parent() != nullptr &&
+ !group.parent()->logical_type()->is_list()) {
+ return Status::Invalid("LIST-annotated groups must not be repeated.");
+ }
+ return {};
+ };
+
+ if (list_node.is_group()) {
+ const auto& list_group = static_cast<const GroupNode&>(list_node);
+ if (list_group.field_count() > 1) {
+ // The inner type of the list should be a struct when there are multiple
fields
+ // in the repeated group
+ RETURN_NOT_OK(check_two_level_list_repeated(group));
+ return GroupToStruct(list_group, current_levels, ctx, out, child_field);
+ }
+ if (list_group.field_count() == 0) {
+ return Status::Invalid("Group must have at least one child.");
+ }
+
+ if (list_group.logical_type()->is_none() && HasListElementName(list_group,
group)) {
+ // Backward-compatibility rule 4
+ return GroupToStruct(list_group, current_levels, ctx, out, child_field);
+ }
Review Comment:
In ResolveList(), the backward-compatibility rule 4 branch (logical_type is
none + HasListElementName) returns GroupToStruct() without applying the
repeated LIST validation. This means a LIST-annotated group that is `REPEATED`
could be accepted even when its parent is not LIST-annotated (or parent is
null), and for such cases the repetition/definition levels would also be
computed incorrectly since ListToSchemaField() no longer increments for
`group.is_repeated()`. Apply the same `check_two_level_list_repeated(group)`
guard in this branch (and consider treating `parent()==nullptr` as invalid,
consistent with MapToSchemaField).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]