gabeiglio commented on code in PR #2410:
URL: https://github.com/apache/iceberg-python/pull/2410#discussion_r2727278833
##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,36 @@ def partition_to_path(self, data: Record, schema: Schema)
-> str:
path = "/".join([field_str + "=" + value_str for field_str, value_str
in zip(field_strs, value_strs, strict=True)])
return path
+ def check_compatible(self, schema: Schema, allow_missing_fields: bool =
False) -> None:
+ # if the underlying field is dropped, we cannot check they are
compatible -- continue
+ schema_fields = schema._lazy_id_to_field
+ parents = _index_parents(schema)
+
+ def validate_parents_are_structs(field_id: int) -> None:
+ parent_id = parents.get(field_id)
+ while parent_id:
+ parent_type = schema.find_type(parent_id)
+ if not parent_type.is_struct:
+ raise ValidationError("Invalid partition field parent:
%s", parent_type)
+ parent_id = parents.get(parent_id)
+
+ for field in self.fields:
+ source_field = schema_fields.get(field.source_id)
+ if allow_missing_fields and source_field:
+ continue
+
+ if not isinstance(field.transform, VoidTransform):
+ if source_field:
+ source_type = source_field.field_type
+ if not source_type.is_primitive:
+ raise ValidationError(f"Cannot partition by
non-primitive source field: {source_type}")
+ if not field.transform.can_transform(source_type):
+ raise ValidationError(f"Invalid source type
{source_type} for transform: {field.transform}")
+ # The only valid parent types for a PartitionField are
StructTypes. This must be checked recursively
+ validate_parents_are_structs(field.source_id)
+ else:
+ raise ValidationError(f"Cannot find source column for
partition field: {field}")
Review Comment:
> compute the id to field logic once instead of indexing for each field.
IIUC It is a cached property, so only would be indexed once throughout its
lifetime
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]