Fokko commented on code in PR #4685:
URL: https://github.com/apache/iceberg/pull/4685#discussion_r871305407


##########
python/tests/test_schema.py:
##########
@@ -347,3 +351,80 @@ def test_schema_find_type(table_schema_simple):
         == table_schema_simple.find_type("BAZ", case_sensitive=False)
         == BooleanType()
     )
+
+
+def test_build_position_accessors(table_schema_nested):
+    accessors = build_position_accessors(table_schema_nested)
+    assert accessors == {
+        1: Accessor(position=0, inner=None),
+        2: Accessor(position=1, inner=None),
+        3: Accessor(position=2, inner=None),
+        5: Accessor(position=3, inner=Accessor(position=0, inner=None)),
+        7: Accessor(position=4, inner=Accessor(position=0, inner=None)),
+        8: Accessor(position=4, inner=Accessor(position=1, inner=None)),
+        9: Accessor(position=4, inner=Accessor(position=1, 
inner=Accessor(position=0, inner=None))),
+        10: Accessor(position=4, inner=Accessor(position=1, 
inner=Accessor(position=1, inner=None))),
+        12: Accessor(position=5, inner=Accessor(position=0, inner=None)),
+        13: Accessor(position=5, inner=Accessor(position=0, 
inner=Accessor(position=0, inner=None))),
+        14: Accessor(position=5, inner=Accessor(position=0, 
inner=Accessor(position=1, inner=None))),
+    }
+
+
+def test_build_position_accessors_list():
+    iceberg_schema = schema.Schema(
+        NestedField(
+            field_id=4,
+            name="qux",
+            field_type=ListType(element_id=5, element_type=StringType(), 
element_is_optional=True),
+            is_optional=True,
+        ),
+        schema_id=1,
+    )
+    accessors = build_position_accessors(iceberg_schema)

Review Comment:
   @rdblue You mentioned that in the Java implementation there is no accessor 
for the field list. I've changed this on the Python side as well to have the 
same behavior.



##########
python/src/iceberg/schema.py:
##########
@@ -279,53 +326,54 @@ def visit(obj, visitor: SchemaVisitor[T]) -> T:
 @visit.register(Schema)
 def _(obj: Schema, visitor: SchemaVisitor[T]) -> T:
     """Visit a Schema with a concrete SchemaVisitor"""
-    return visitor.schema(obj, visit(obj.as_struct(), visitor))
+    return visit(obj.as_struct(), visitor)
 
 
 @visit.register(StructType)
 def _(obj: StructType, visitor: SchemaVisitor[T]) -> T:
     """Visit a StructType with a concrete SchemaVisitor"""
-    results = []
+
+    result = visitor.struct(obj)
+
     for field in obj.fields:
+        visitor.field(field)
         visitor.before_field(field)
-        try:
-            result = visit(field.type, visitor)
-        finally:
-            visitor.after_field(field)
+        visit(field.type, visitor)
+        visitor.after_field(field)
 
-        results.append(visitor.field(field, result))
-
-    return visitor.struct(obj, results)
+    return result
 
 
 @visit.register(ListType)
 def _(obj: ListType, visitor: SchemaVisitor[T]) -> T:
     """Visit a ListType with a concrete SchemaVisitor"""
+    result = visitor.list(obj)
+
+    visitor.field(obj.element)
+
     visitor.before_list_element(obj.element)
-    try:
-        result = visit(obj.element.type, visitor)
-    finally:
-        visitor.after_list_element(obj.element)
+    visit(obj.element.type, visitor)
+    visitor.after_list_element(obj.element)
 
-    return visitor.list(obj, result)
+    return result
 
 
 @visit.register(MapType)
 def _(obj: MapType, visitor: SchemaVisitor[T]) -> T:
     """Visit a MapType with a concrete SchemaVisitor"""
+    result = visitor.map(obj)
+
+    visitor.field(obj.key)

Review Comment:
   I've reverted this 👍🏻 



##########
python/src/iceberg/schema.py:
##########
@@ -262,10 +267,45 @@ def primitive(self, primitive: PrimitiveType) -> T:
         ...  # pragma: no cover
 
 
+@dataclass(init=True, eq=True)
+class Accessor:
+    """An accessor for a specific position in a container that implements the 
StructProtocol"""
+
+    position: int
+    inner: Optional["Accessor"] = None
+
+    def __str__(self):
+        return f"Accessor(position={self.position},inner={self.inner})"
+
+    def __repr__(self):
+        return self.__str__()
+
+    def with_inner(self, accessor: "Accessor") -> "Accessor":
+        parent = copy.deepcopy(self)
+        acc = parent
+        while acc.inner:
+            acc = acc.inner
+        acc.inner = accessor
+        return parent
+
+    def get(self, container: StructProtocol) -> Any:
+        """Returns the value at self.position in `container`
+
+        Args:
+            container(StructProtocol): A container to access at position 
`self.position`
+
+        Returns:
+            Any: The value at position `self.position` in the container
+        """
+        return container.get(self.position)
+
+
 @singledispatch
 def visit(obj, visitor: SchemaVisitor[T]) -> T:
     """A generic function for applying a schema visitor to any point within a 
schema
 
+    The function traverses the schema in pre-order fashion

Review Comment:
   @rdblue I've seen the light, thank you. Just reverted it to post-order 
traversal. It took me a while to wrap my head around it, but I like the fact 
that there is no state in the visitor itself. This makes is much cleaner. 



##########
python/src/iceberg/schema.py:
##########
@@ -262,10 +267,52 @@ def primitive(self, primitive: PrimitiveType) -> T:
         ...  # pragma: no cover
 
 
+@dataclass(init=True, eq=True)
+class Accessor:
+    """An accessor for a specific position in a container that implements the 
StructProtocol"""
+
+    position: int
+    inner: Optional["Accessor"] = None
+
+    def __str__(self):
+        return f"Accessor(position={self.position},inner={self.inner})"
+
+    def __repr__(self):
+        return self.__str__()
+
+    def with_inner(self, accessor: "Accessor") -> "Accessor":

Review Comment:
   We don't need this one anymore :)



##########
python/src/iceberg/files.py:
##########
@@ -46,8 +46,10 @@ class FileFormat(Enum):
 class StructProtocol(Protocol):  # pragma: no cover
     """A generic protocol used by accessors to get and set at positions of an 
object"""
 
+    @abstractmethod

Review Comment:
   Unfortunately not. At least, PyCharm didn't recognize it and wasn't 
suggesting implementing the abstract classes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to