This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new de47590c fix: support MonthTransform for partitioning (#1176)
de47590c is described below

commit de47590c6ac4f507cb2337c20504a62c484339f9
Author: Felix Scherz <[email protected]>
AuthorDate: Mon Sep 16 22:27:08 2024 +0200

    fix: support MonthTransform for partitioning (#1176)
    
    * fix: support MonthTransform for partitioning
    
    * test: add tests for generating default names for other transforms
    
    * fix: delete duplicate test case
    
    * chore: run formatting
---
 pyiceberg/partitioning.py                     |  3 ++
 tests/integration/test_partition_evolution.py | 64 +++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index da52d5df..5f9178eb 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -46,6 +46,7 @@ from pyiceberg.transforms import (
     DayTransform,
     HourTransform,
     IdentityTransform,
+    MonthTransform,
     Transform,
     TruncateTransform,
     UnknownTransform,
@@ -359,6 +360,8 @@ def _visit_partition_field(schema: Schema, field: 
PartitionField, visitor: Parti
         return visitor.day(field.field_id, source_name, field.source_id)
     elif isinstance(transform, HourTransform):
         return visitor.hour(field.field_id, source_name, field.source_id)
+    elif isinstance(transform, MonthTransform):
+        return visitor.month(field.field_id, source_name, field.source_id)
     elif isinstance(transform, YearTransform):
         return visitor.year(field.field_id, source_name, field.source_id)
     elif isinstance(transform, VoidTransform):
diff --git a/tests/integration/test_partition_evolution.py 
b/tests/integration/test_partition_evolution.py
index 805c0c1f..0e607a46 100644
--- a/tests/integration/test_partition_evolution.py
+++ b/tests/integration/test_partition_evolution.py
@@ -92,6 +92,14 @@ def test_add_year(catalog: Catalog) -> None:
     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, YearTransform(), "year_transform"))
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_year_generates_default_name(catalog: Catalog) -> None:
+    table = _table(catalog)
+    table.update_spec().add_field("event_ts", YearTransform()).commit()
+    _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, YearTransform(), "event_ts_year"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_month(catalog: Catalog) -> None:
@@ -100,6 +108,14 @@ def test_add_month(catalog: Catalog) -> None:
     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, MonthTransform(), "month_transform"))
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_month_generates_default_name(catalog: Catalog) -> None:
+    table = _table(catalog)
+    table.update_spec().add_field("event_ts", MonthTransform()).commit()
+    _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, MonthTransform(), "event_ts_month"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_day(catalog: Catalog) -> None:
@@ -108,6 +124,14 @@ def test_add_day(catalog: Catalog) -> None:
     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, DayTransform(), "day_transform"))
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_day_generates_default_name(catalog: Catalog) -> None:
+    table = _table(catalog)
+    table.update_spec().add_field("event_ts", DayTransform()).commit()
+    _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, DayTransform(), "event_ts_day"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_hour(catalog: Catalog) -> None:
@@ -116,6 +140,14 @@ def test_add_hour(catalog: Catalog) -> None:
     _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, HourTransform(), "hour_transform"))
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_hour_generates_default_name(catalog: Catalog) -> None:
+    table = _table(catalog)
+    table.update_spec().add_field("event_ts", HourTransform()).commit()
+    _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2, 
1000, HourTransform(), "event_ts_hour"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
@@ -124,6 +156,14 @@ def test_add_bucket(catalog: Catalog, table_schema_simple: 
Schema) -> None:
     _validate_new_partition_fields(simple_table, 1000, 1, 1000, 
PartitionField(1, 1000, BucketTransform(12), "bucket_transform"))
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_bucket_generates_default_name(catalog: Catalog, 
table_schema_simple: Schema) -> None:
+    simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+    simple_table.update_spec().add_field("foo", BucketTransform(12)).commit()
+    _validate_new_partition_fields(simple_table, 1000, 1, 1000, 
PartitionField(1, 1000, BucketTransform(12), "foo_bucket_12"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
@@ -134,6 +174,14 @@ def test_add_truncate(catalog: Catalog, 
table_schema_simple: Schema) -> None:
     )
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_truncate_generates_default_name(catalog: Catalog, 
table_schema_simple: Schema) -> None:
+    simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+    simple_table.update_spec().add_field("foo", TruncateTransform(1)).commit()
+    _validate_new_partition_fields(simple_table, 1000, 1, 1000, 
PartitionField(1, 1000, TruncateTransform(1), "foo_trunc_1"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_multiple_adds(catalog: Catalog) -> None:
@@ -152,6 +200,22 @@ def test_multiple_adds(catalog: Catalog) -> None:
     )
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_void(catalog: Catalog, table_schema_simple: Schema) -> None:
+    simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+    simple_table.update_spec().add_field("foo", VoidTransform(), 
"void_transform").commit()
+    _validate_new_partition_fields(simple_table, 1000, 1, 1000, 
PartitionField(1, 1000, VoidTransform(), "void_transform"))
+
+
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
+def test_add_void_generates_default_name(catalog: Catalog, 
table_schema_simple: Schema) -> None:
+    simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+    simple_table.update_spec().add_field("foo", VoidTransform()).commit()
+    _validate_new_partition_fields(simple_table, 1000, 1, 1000, 
PartitionField(1, 1000, VoidTransform(), "foo_null"))
+
+
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_add_hour_to_day(catalog: Catalog) -> None:

Reply via email to