rdblue commented on code in PR #4685:
URL: https://github.com/apache/iceberg/pull/4685#discussion_r868710344
##########
python/src/iceberg/schema.py:
##########
@@ -498,26 +565,76 @@ def index_name_by_id(schema_or_type) -> Dict[int, str]:
class _BuildPositionAccessors(SchemaVisitor[Dict[int, "Accessor"]]):
- """A schema visitor for generating a field ID to accessor index"""
+ """A schema visitor for generating a field ID to accessor index
+
+ Example:
+ >>> from iceberg.schema import Schema
+ >>> from iceberg.types import *
+ >>> schema = Schema(
+ ... NestedField(field_id=2, name="id", field_type=IntegerType(),
is_optional=False),
+ ... NestedField(field_id=1, name="data", field_type=StringType(),
is_optional=True),
+ ... NestedField(
+ ... field_id=3,
+ ... name="location",
+ ... field_type=StructType(
+ ... NestedField(field_id=5, name="latitude",
field_type=FloatType(), is_optional=False),
+ ... NestedField(field_id=6, name="longitude",
field_type=FloatType(), is_optional=False),
+ ... ),
+ ... is_optional=True,
+ ... ),
+ ... schema_id=1,
+ ... identifier_field_ids=[1],
+ ... )
+ >>> result = build_position_accessors(schema)
+ >>> expected = {
+ ... 2: Accessor(position=0, inner=None),
+ ... 1: Accessor(position=1, inner=None),
+ ... 3: Accessor(position=2, inner=None),
+ ... 5: Accessor(position=2, inner=Accessor(position=0,
inner=None)),
+ ... 6: Accessor(position=2, inner=Accessor(position=1,
inner=None)),
+ ... }
+ >>> result == expected
+ True
+ """
def __init__(self) -> None:
self._index: Dict[int, Accessor] = {}
+ self._parents: Dict[int, int] = {}
+ self._pos: Dict[int, int] = defaultdict(lambda: 0)
- def schema(self, schema, result: Dict[int, Accessor]) -> Dict[int,
Accessor]:
+ def schema(self, schema: Schema) -> Dict[int, Accessor]:
return self._index
- def struct(self, struct, result: List[Dict[int, Accessor]]) -> Dict[int,
Accessor]:
- # TODO: Populate the `self._index` dictionary where the key is the
field ID and the value is an accessor for that field.
- # The equivalent java logic can be found here:
https://github.com/apache/iceberg/blob/master/api/src/main/java/org/apache/iceberg/Accessors.java#L213-L230
+ def struct(self, struct: StructType) -> Dict[int, Accessor]:
return self._index
- def field(self, field: NestedField, result: Dict[int, Accessor]) ->
Dict[int, Accessor]:
+ def field(self, field: NestedField) -> Dict[int, Accessor]:
+ field_type = field.type
+ if isinstance(field_type, StructType):
Review Comment:
I don't think it is necessary to duplicate the traversal logic. This can be
done in `struct` before `field` is handled. Same for `map` and `list`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]