Fokko commented on code in PR #8393:
URL: https://github.com/apache/iceberg/pull/8393#discussion_r1306486592
##########
python/pyiceberg/schema.py:
##########
@@ -882,6 +929,57 @@ def index_by_id(schema_or_type: Union[Schema,
IcebergType]) -> Dict[int, NestedF
return visit(schema_or_type, _IndexById())
+class _IndexParents(SchemaVisitor[Dict[int, int]]):
+ def __init__(self) -> None:
+ self.id_to_parent: Dict[int, int] = {}
+ self.id_stack: List[int] = []
+
+ def before_field(self, field: NestedField) -> None:
+ self.id_stack.append(field.field_id)
+
+ def after_field(self, field: NestedField) -> None:
+ self.id_stack.pop()
+
+ def schema(self, schema: Schema, struct_result: Dict[int, int]) ->
Dict[int, int]:
+ return self.id_to_parent
+
+ def struct(self, struct: StructType, field_results: List[Dict[int, int]])
-> Dict[int, int]:
+ for field in struct.fields:
+ parent_id = self.id_stack[-1] if self.id_stack else None
+ if parent_id is not None:
+ # fields in the root struct are not added
+ self.id_to_parent[field.field_id] = parent_id
+
+ return self.id_to_parent
+
+ def field(self, field: NestedField, field_result: Dict[int, int]) ->
Dict[int, int]:
+ return self.id_to_parent
+
+ def list(self, list_type: ListType, element_result: Dict[int, int]) ->
Dict[int, int]:
+ self.id_to_parent[list_type.element_id] = self.id_stack[-1]
+ return self.id_to_parent
+
+ def map(self, map_type: MapType, key_result: Dict[int, int], value_result:
Dict[int, int]) -> Dict[int, int]:
+ self.id_to_parent[map_type.key_id] = self.id_stack[-1]
+ self.id_to_parent[map_type.value_id] = self.id_stack[-1]
+ return self.id_to_parent
+
+ def primitive(self, primitive: PrimitiveType) -> Dict[int, int]:
+ return self.id_to_parent
+
+
+def index_parents(schema_or_type: Union[Schema, IcebergType]) -> Dict[int,
int]:
Review Comment:
Ah, I think it makes more sense to access it through schema, for example
`schema.field_parent_id(1)`, similar to `find_column_name`. A nice thing here
is that we can catch the dict so we don't have to compute it every time.
```python
@cached_property
def _lazy_id_to_name(self) -> Dict[int, str]:
```
This dict will then be created if there are identifier fields, otherwise,
the first time we access it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]