rdblue commented on code in PR #4685:
URL: https://github.com/apache/iceberg/pull/4685#discussion_r871861711


##########
python/src/iceberg/schema.py:
##########
@@ -497,31 +530,78 @@ def index_name_by_id(schema_or_type) -> Dict[int, str]:
     return indexer.by_id()
 
 
-class _BuildPositionAccessors(SchemaVisitor[Dict[int, "Accessor"]]):
-    """A schema visitor for generating a field ID to accessor index"""
+Position = int
 
-    def __init__(self) -> None:
-        self._index: Dict[int, Accessor] = {}
 
-    def schema(self, schema, result: Dict[int, Accessor]) -> Dict[int, 
Accessor]:
-        return self._index
+class _BuildPositionAccessors(SchemaVisitor[Dict[Position, Accessor]]):
+    """A schema visitor for generating a field ID to accessor index
 
-    def struct(self, struct, result: List[Dict[int, Accessor]]) -> Dict[int, 
Accessor]:
-        # TODO: Populate the `self._index` dictionary where the key is the 
field ID and the value is an accessor for that field.
-        #   The equivalent java logic can be found here: 
https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/Accessors.java#L213-L230
-        return self._index
+    Example:
+        >>> from iceberg.schema import Schema
+        >>> from iceberg.types import *
+        >>> schema = Schema(
+        ...     NestedField(field_id=2, name="id", field_type=IntegerType(), 
is_optional=False),
+        ...     NestedField(field_id=1, name="data", field_type=StringType(), 
is_optional=True),
+        ...     NestedField(
+        ...         field_id=3,
+        ...         name="location",
+        ...         field_type=StructType(
+        ...             NestedField(field_id=5, name="latitude", 
field_type=FloatType(), is_optional=False),
+        ...             NestedField(field_id=6, name="longitude", 
field_type=FloatType(), is_optional=False),
+        ...         ),
+        ...         is_optional=True,
+        ...     ),
+        ...     schema_id=1,
+        ...     identifier_field_ids=[1],
+        ... )
+        >>> result = build_position_accessors(schema)
+        >>> expected = {
+        ...      2: Accessor(position=0, inner=None),
+        ...      1: Accessor(position=1, inner=None),
+        ...      5: Accessor(position=2, inner=Accessor(position=0, 
inner=None)),
+        ...      6: Accessor(position=2, inner=Accessor(position=1, 
inner=None)),
+        ... }
+        >>> result == expected
+        True
+    """
 
-    def field(self, field: NestedField, result: Dict[int, Accessor]) -> 
Dict[int, Accessor]:
-        return self._index
+    @staticmethod
+    def _wrap_leaves(result: Dict[Position, Accessor], position: Position = 0) 
-> Dict[Position, Accessor]:
+        return {field_id: Accessor(position, inner=inner) for field_id, inner 
in result.items()}
 
-    def list(self, list_type: ListType, result: Dict[int, Accessor]) -> 
Dict[int, Accessor]:
-        return self._index
+    def schema(self, schema: Schema, result: Dict[Position, Accessor]) -> 
Dict[Position, Accessor]:
+        return result
 
-    def map(self, map_type: MapType, key_result: Dict[int, Accessor], 
value_result: Dict[int, Accessor]) -> Dict[int, Accessor]:
-        return self._index
+    def struct(self, struct: StructType, field_results: List[Dict[Position, 
Accessor]]) -> Dict[Position, Accessor]:
+        result = {}
 
-    def primitive(self, primitive: PrimitiveType) -> Dict[int, Accessor]:
-        return self._index
+        for position, field in enumerate(struct.fields):
+            if field_results[position]:

Review Comment:
   > So you only want to support structs (including nested structs), but 
discard support for lists and maps?
   
   Yes, because maps and lists aren't really supported. An accessor that is 
produced will be able to load, for example, a map by position, but the inner 
accessor for key will load position 0. What does it mean to call `m.get(0)`? It 
seems to me that we need a different sort of accessor for repeated elements 
like map keys, map values, and list elements. One that gets the collection and 
then calls any inner accessor on each item in the collection. We can do that, 
but Java doesn't actually use it yet so there's not much point to implementing 
it over here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to