Copilot commented on code in PR #49125:
URL: https://github.com/apache/arrow/pull/49125#discussion_r2763489756


##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -694,17 +698,77 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo 
current_levels,
   return Status::OK();
 }
 
+Status ResolveList(const GroupNode& group, const Node& list_node,
+                   LevelInfo current_levels, SchemaTreeContext* ctx,
+                   const SchemaField* out, SchemaField* child_field) {
+  auto check_two_level_list_repeated = [](const GroupNode& group) -> Status {
+    // When it is repeated, the LIST-annotated 2-level structure can only 
serve as an
+    // element within another LIST-annotated 2-level structure.
+    if (group.is_repeated() &&
+        (group.parent() == nullptr || 
!group.parent()->logical_type()->is_list())) {
+      return Status::Invalid("LIST-annotated groups must not be repeated.");
+    }
+    return {};
+  };
+
+  if (list_node.is_group()) {
+    const auto& list_group = static_cast<const GroupNode&>(list_node);
+    if (list_group.field_count() > 1) {
+      // The inner type of the list should be a struct when there are multiple 
fields
+      // in the repeated group
+      RETURN_NOT_OK(check_two_level_list_repeated(group));
+      return GroupToStruct(list_group, current_levels, ctx, out, child_field);
+    }
+    if (list_group.field_count() == 0) {
+      return Status::Invalid("Group must have at least one child.");
+    }
+
+    if (list_group.logical_type()->is_none() && HasListElementName(list_group, 
group)) {
+      // Backward-compatibility rule 4
+      RETURN_NOT_OK(check_two_level_list_repeated(group));
+      return GroupToStruct(list_group, current_levels, ctx, out, child_field);
+    }
+
+    const auto& repeated_field = list_group.field(0);
+    if (!list_group.logical_type()->is_none() || 
repeated_field->is_repeated()) {
+      RETURN_NOT_OK(check_two_level_list_repeated(group));
+      if (list_group.logical_type()->is_list()) {
+        return ListToSchemaField(list_group, current_levels, ctx, out, 
child_field);
+      } else if (list_group.logical_type()->is_map()) {
+        return MapToSchemaField(list_group, current_levels, ctx, out, 
child_field);
+      } else {
+        current_levels.Increment(list_group);

Review Comment:
   In ResolveList(), `current_levels` has already been incremented for the 
repeated `list_node` (ListToSchemaField() calls 
`current_levels.IncrementRepeated()` before calling ResolveList). Since 
`list_group` is that same repeated node, calling 
`current_levels.Increment(list_group)` here will increment rep/def levels a 
second time and can produce incorrect LevelInfo (and therefore incorrect 
decoding) for LISTs whose repeated group is annotated with a non-(list/map) 
logical type or has a repeated child. Consider removing this increment (or only 
incrementing when there is an additional optional layer to account for) and 
pass the already-adjusted `current_levels` through to GroupToStruct().
   ```suggestion
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to