Fokko commented on code in PR #4685:
URL: https://github.com/apache/iceberg/pull/4685#discussion_r871846400


##########
python/src/iceberg/schema.py:
##########
@@ -497,31 +530,78 @@ def index_name_by_id(schema_or_type) -> Dict[int, str]:
     return indexer.by_id()
 
 
-class _BuildPositionAccessors(SchemaVisitor[Dict[int, "Accessor"]]):
-    """A schema visitor for generating a field ID to accessor index"""
+Position = int
 
-    def __init__(self) -> None:
-        self._index: Dict[int, Accessor] = {}
 
-    def schema(self, schema, result: Dict[int, Accessor]) -> Dict[int, 
Accessor]:
-        return self._index
+class _BuildPositionAccessors(SchemaVisitor[Dict[Position, Accessor]]):
+    """A schema visitor for generating a field ID to accessor index
 
-    def struct(self, struct, result: List[Dict[int, Accessor]]) -> Dict[int, 
Accessor]:
-        # TODO: Populate the `self._index` dictionary where the key is the 
field ID and the value is an accessor for that field.
-        #   The equivalent java logic can be found here: 
https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/Accessors.java#L213-L230
-        return self._index
+    Example:
+        >>> from iceberg.schema import Schema
+        >>> from iceberg.types import *
+        >>> schema = Schema(
+        ...     NestedField(field_id=2, name="id", field_type=IntegerType(), 
is_optional=False),
+        ...     NestedField(field_id=1, name="data", field_type=StringType(), 
is_optional=True),
+        ...     NestedField(
+        ...         field_id=3,
+        ...         name="location",
+        ...         field_type=StructType(
+        ...             NestedField(field_id=5, name="latitude", 
field_type=FloatType(), is_optional=False),
+        ...             NestedField(field_id=6, name="longitude", 
field_type=FloatType(), is_optional=False),
+        ...         ),
+        ...         is_optional=True,
+        ...     ),
+        ...     schema_id=1,
+        ...     identifier_field_ids=[1],
+        ... )
+        >>> result = build_position_accessors(schema)
+        >>> expected = {
+        ...      2: Accessor(position=0, inner=None),
+        ...      1: Accessor(position=1, inner=None),
+        ...      5: Accessor(position=2, inner=Accessor(position=0, 
inner=None)),
+        ...      6: Accessor(position=2, inner=Accessor(position=1, 
inner=None)),
+        ... }
+        >>> result == expected
+        True
+    """
 
-    def field(self, field: NestedField, result: Dict[int, Accessor]) -> 
Dict[int, Accessor]:
-        return self._index
+    @staticmethod
+    def _wrap_leaves(result: Dict[Position, Accessor], position: Position = 0) 
-> Dict[Position, Accessor]:
+        return {field_id: Accessor(position, inner=inner) for field_id, inner 
in result.items()}
 
-    def list(self, list_type: ListType, result: Dict[int, Accessor]) -> 
Dict[int, Accessor]:
-        return self._index
+    def schema(self, schema: Schema, result: Dict[Position, Accessor]) -> 
Dict[Position, Accessor]:
+        return result
 
-    def map(self, map_type: MapType, key_result: Dict[int, Accessor], 
value_result: Dict[int, Accessor]) -> Dict[int, Accessor]:
-        return self._index
+    def struct(self, struct: StructType, field_results: List[Dict[Position, 
Accessor]]) -> Dict[Position, Accessor]:
+        result = {}
 
-    def primitive(self, primitive: PrimitiveType) -> Dict[int, Accessor]:
-        return self._index
+        for position, field in enumerate(struct.fields):
+            if field_results[position]:

Review Comment:
   This is Python:
   ```python
   ➜  python3
   Python 3.9.12 (main, Mar 26 2022, 15:44:31) 
   [Clang 13.1.6 (clang-1316.0.21.2)] on darwin
   Type "help", "copyright", "credits" or "license" for more information.
   >>> if {}:
   ...   print('if')
   ... else:
   ...   print('else')
   ... 
   else
   ```
   
   > I think that is correct (if `{}` uses the else branch) but I don't think 
there's a need to return accessors for fields inside maps or lists because we 
don't really know how to handle repeated elements. We could extend this later 
so that a repeated layer returns a tuple of the values, but right now we don't 
really need that.
   
   So you only want to support structs (including nested structs), but discard 
support for lists and maps?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to