rdblue commented on code in PR #4685:
URL: https://github.com/apache/iceberg/pull/4685#discussion_r868718919


##########
python/src/iceberg/schema.py:
##########
@@ -498,26 +565,76 @@ def index_name_by_id(schema_or_type) -> Dict[int, str]:
 
 
 class _BuildPositionAccessors(SchemaVisitor[Dict[int, "Accessor"]]):
-    """A schema visitor for generating a field ID to accessor index"""
+    """A schema visitor for generating a field ID to accessor index
+
+    Example:
+        >>> from iceberg.schema import Schema
+        >>> from iceberg.types import *
+        >>> schema = Schema(
+        ...     NestedField(field_id=2, name="id", field_type=IntegerType(), 
is_optional=False),
+        ...     NestedField(field_id=1, name="data", field_type=StringType(), 
is_optional=True),
+        ...     NestedField(
+        ...         field_id=3,
+        ...         name="location",
+        ...         field_type=StructType(
+        ...             NestedField(field_id=5, name="latitude", 
field_type=FloatType(), is_optional=False),
+        ...             NestedField(field_id=6, name="longitude", 
field_type=FloatType(), is_optional=False),
+        ...         ),
+        ...         is_optional=True,
+        ...     ),
+        ...     schema_id=1,
+        ...     identifier_field_ids=[1],
+        ... )
+        >>> result = build_position_accessors(schema)
+        >>> expected = {
+        ...      2: Accessor(position=0, inner=None),
+        ...      1: Accessor(position=1, inner=None),
+        ...      3: Accessor(position=2, inner=None),
+        ...      5: Accessor(position=2, inner=Accessor(position=0, 
inner=None)),
+        ...      6: Accessor(position=2, inner=Accessor(position=1, 
inner=None)),
+        ... }
+        >>> result == expected
+        True
+    """
 
     def __init__(self) -> None:
         self._index: Dict[int, Accessor] = {}
+        self._parents: Dict[int, int] = {}
+        self._pos: Dict[int, int] = defaultdict(lambda: 0)
 
-    def schema(self, schema, result: Dict[int, Accessor]) -> Dict[int, 
Accessor]:
+    def schema(self, schema: Schema) -> Dict[int, Accessor]:
         return self._index
 
-    def struct(self, struct, result: List[Dict[int, Accessor]]) -> Dict[int, 
Accessor]:
-        # TODO: Populate the `self._index` dictionary where the key is the 
field ID and the value is an accessor for that field.
-        #   The equivalent java logic can be found here: 
https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/Accessors.java#L213-L230
+    def struct(self, struct: StructType) -> Dict[int, Accessor]:
         return self._index
 
-    def field(self, field: NestedField, result: Dict[int, Accessor]) -> 
Dict[int, Accessor]:
+    def field(self, field: NestedField) -> Dict[int, Accessor]:
+        field_type = field.type
+        if isinstance(field_type, StructType):
+            # In the case of a struct, we want to map which one the parent is
+            for inner_field in field_type.fields:
+                self._parents[inner_field.field_id] = field.field_id
+        elif isinstance(field_type, MapType):
+            self._parents[field_type.key.field_id] = field.field_id
+            self._parents[field_type.value.field_id] = field.field_id
+        elif isinstance(field_type, ListType):
+            self._parents[field_type.element.field_id] = field.field_id
+
+        parent = 0
+        if field.field_id in self._parents:
+            parent = self._parents[field.field_id]
+            self._index[field.field_id] = 
self._index[parent].with_inner(Accessor(position=self._pos[parent]))
+        else:
+            self._index[field.field_id] = Accessor(position=self._pos[parent])
+
+        self._pos[parent] = self._pos[parent] + 1

Review Comment:
   I see that this is reconstructing the position in the nested type based on 
the number of calls to `field`. This seems a bit hard to follow, at least to 
me. I prefer the more straightforward approach in the Java version, where 
position is handled with a local variable, `i`, in `struct`:
   
   ```java
         for (int i = 0; i < fieldResults.size(); i += 1) {
           Types.NestedField field = fields.get(i);
           Map<Integer, Accessor<StructLike>> result = fieldResults.get(i);
           if (result != null) {
             for (Map.Entry<Integer, Accessor<StructLike>> entry : 
result.entrySet()) {
               accessors.put(entry.getKey(), newAccessor(i, field.isOptional(), 
entry.getValue()));
             }
           } else {
             accessors.put(field.fieldId(), newAccessor(i, field.type()));
           }
         }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to