Fokko commented on code in PR #4717:
URL: https://github.com/apache/iceberg/pull/4717#discussion_r885357634
##########
python/src/iceberg/table/partitioning.py:
##########
@@ -29,38 +36,79 @@ class PartitionField:
name(str): The name of this partition field
"""
- def __init__(self, source_id: int, field_id: int, transform: Transform,
name: str):
- self._source_id = source_id
- self._field_id = field_id
- self._transform = transform
- self._name = name
+ source_id: int
+ field_id: int
+ transform: Transform
+ name: str
+
+ def __str__(self):
+ return f"{self.field_id}: {self.name}:
{self.transform}({self.source_id})"
- @property
- def source_id(self) -> int:
- return self._source_id
- @property
- def field_id(self) -> int:
- return self._field_id
+@dataclass(eq=False, frozen=True)
+class PartitionSpec:
+ """
+ PartitionSpec capture the transformation from table data to partition
values
- @property
- def name(self) -> str:
- return self._name
+ Attributes:
+ schema(Schema): the schema of data table
+ spec_id(int): any change to PartitionSpec will produce a new specId
+ fields(List[PartitionField): list of partition fields to produce
partition values
+ last_assigned_field_id(int): auto-increment partition field id
starting from PARTITION_DATA_ID_START
+ """
- @property
- def transform(self) -> Transform:
- return self._transform
+ schema: Schema
+ spec_id: int
+ fields: Tuple[PartitionField]
+ last_assigned_field_id: int
+ source_id_to_fields_map: Dict[int, List[PartitionField]] =
field(init=False, repr=False)
+
+ def __post_init__(self):
+ source_id_to_fields_map = dict()
+ for partition_field in self.fields:
+ source_column =
self.schema.find_column_name(partition_field.source_id)
+ if not source_column:
+ raise ValueError(f"Cannot find source column:
{partition_field.source_id}")
+ existing = source_id_to_fields_map.get(partition_field.source_id,
[])
+ existing.append(partition_field)
+ source_id_to_fields_map[partition_field.source_id] = existing
+ object.__setattr__(self, "source_id_to_fields_map",
source_id_to_fields_map)
def __eq__(self, other):
- return (
- self.field_id == other.field_id
- and self.source_id == other.source_id
- and self.name == other.name
- and self.transform == other.transform
- )
+ """
+ Equality check on spec_id and partition fields only
+ """
+ return self.spec_id == other.spec_id and self.fields == other.fields
def __str__(self):
- return f"{self.field_id}: {self.name}:
{self.transform}({self.source_id})"
+ """
+ Produce a human-readable string representation of PartitionSpec
+
+ Note:
+ Only include list of partition fields in the PartitionSpec's
string representation
+ """
+ result_str = "["
+ for partition_field in self.fields:
+ result_str += f"\n {str(partition_field)}"
Review Comment:
Instead of:
```python
>>> from iceberg.types import NestedField, StringType, FixedType
>>>
>>> fields = [
... NestedField(
... field_id=19,
... name='foo',
... field_type=FixedType(22),
... is_optional=False,
... ),
... NestedField(
... field_id=25,
... name='foo',
... field_type=StringType(),
... is_optional=True,
... ),
... ]
>>> result_str = "["
>>> for partition_field in fields:
... result_str += f"\n {str(partition_field)}"
...
>>> if len(fields) > 0:
... result_str += "\n"
...
>>> result_str += "]"
>>>
>>> print(result_str)
[
19: foo: required fixed[22]
25: foo: optional string
]
```
We could also write:
```python
result_str = "["
if fields:
result_str += "\n " + "\n ".join([str(field) for field in fields]) + "\n"
result_str += "]"
print(result_str)
```
Which is equivalent
```python
>>> result_str = "["
>>> if fields:
... result_str += "\n " + "\n ".join([str(field) for field in fields]) +
"\n"
...
>>> result_str += "]"
>>>
>>> print(result_str)
[
19: foo: required fixed[22]
25: foo: optional string
]
```
This is more readable for me at least :)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]