This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 0ab3262e Allow Partition data to be nullable in ManifestEntry (#509)
0ab3262e is described below
commit 0ab3262e035867beba246368c2a46d1c9387f65c
Author: Sung Yun <[email protected]>
AuthorDate: Mon Mar 11 02:02:50 2024 -0600
Allow Partition data to be nullable in ManifestEntry (#509)
* fix
* use partition field nullability
---
pyiceberg/manifest.py | 1 +
pyiceberg/partitioning.py | 3 ++-
tests/conftest.py | 2 +-
tests/table/test_partitioning.py | 2 +-
4 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index 0504626d..03dc3199 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -308,6 +308,7 @@ def data_file_with_partition(partition_type: StructType,
format_version: Literal
field_id=field.field_id,
name=field.name,
field_type=partition_field_to_data_file_partition_field(field.field_type),
+ required=field.required,
)
for field in partition_type.fields
])
diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index 6fa02862..a6692b32 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -218,7 +218,8 @@ class PartitionSpec(IcebergBaseModel):
for field in self.fields:
source_type = schema.find_type(field.source_id)
result_type = field.transform.result_type(source_type)
- nested_fields.append(NestedField(field.field_id, field.name,
result_type, required=False))
+ required = schema.find_field(field.source_id).required
+ nested_fields.append(NestedField(field.field_id, field.name,
result_type, required=required))
return StructType(*nested_fields)
def partition_to_path(self, data: Record, schema: Schema) -> str:
diff --git a/tests/conftest.py b/tests/conftest.py
index a005966e..e090e7c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -892,7 +892,7 @@ manifest_entry_records = [
"data_file": {
"file_path":
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=1/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00002.parquet",
"file_format": "PARQUET",
- "partition": {"VendorID": 1, "tpep_pickup_datetime": 1925},
+ "partition": {"VendorID": 1, "tpep_pickup_datetime": None},
"record_count": 95050,
"file_size_in_bytes": 1265950,
"block_size_in_bytes": 67108864,
diff --git a/tests/table/test_partitioning.py b/tests/table/test_partitioning.py
index cb60c9a8..d7425bc3 100644
--- a/tests/table/test_partitioning.py
+++ b/tests/table/test_partitioning.py
@@ -127,5 +127,5 @@ def test_partition_type(table_schema_simple: Schema) ->
None:
assert spec.partition_type(table_schema_simple) == StructType(
NestedField(field_id=1000, name="str_truncate",
field_type=StringType(), required=False),
- NestedField(field_id=1001, name="int_bucket",
field_type=IntegerType(), required=False),
+ NestedField(field_id=1001, name="int_bucket",
field_type=IntegerType(), required=True),
)