This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new de47590c fix: support MonthTransform for partitioning (#1176)
de47590c is described below
commit de47590c6ac4f507cb2337c20504a62c484339f9
Author: Felix Scherz <[email protected]>
AuthorDate: Mon Sep 16 22:27:08 2024 +0200
fix: support MonthTransform for partitioning (#1176)
* fix: support MonthTransform for partitioning
* test: add tests for generating default names for other transforms
* fix: delete duplicate test case
* chore: run formatting
---
pyiceberg/partitioning.py | 3 ++
tests/integration/test_partition_evolution.py | 64 +++++++++++++++++++++++++++
2 files changed, 67 insertions(+)
diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py
index da52d5df..5f9178eb 100644
--- a/pyiceberg/partitioning.py
+++ b/pyiceberg/partitioning.py
@@ -46,6 +46,7 @@ from pyiceberg.transforms import (
DayTransform,
HourTransform,
IdentityTransform,
+ MonthTransform,
Transform,
TruncateTransform,
UnknownTransform,
@@ -359,6 +360,8 @@ def _visit_partition_field(schema: Schema, field:
PartitionField, visitor: Parti
return visitor.day(field.field_id, source_name, field.source_id)
elif isinstance(transform, HourTransform):
return visitor.hour(field.field_id, source_name, field.source_id)
+ elif isinstance(transform, MonthTransform):
+ return visitor.month(field.field_id, source_name, field.source_id)
elif isinstance(transform, YearTransform):
return visitor.year(field.field_id, source_name, field.source_id)
elif isinstance(transform, VoidTransform):
diff --git a/tests/integration/test_partition_evolution.py
b/tests/integration/test_partition_evolution.py
index 805c0c1f..0e607a46 100644
--- a/tests/integration/test_partition_evolution.py
+++ b/tests/integration/test_partition_evolution.py
@@ -92,6 +92,14 @@ def test_add_year(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, YearTransform(), "year_transform"))
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_year_generates_default_name(catalog: Catalog) -> None:
+ table = _table(catalog)
+ table.update_spec().add_field("event_ts", YearTransform()).commit()
+ _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, YearTransform(), "event_ts_year"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_month(catalog: Catalog) -> None:
@@ -100,6 +108,14 @@ def test_add_month(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, MonthTransform(), "month_transform"))
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_month_generates_default_name(catalog: Catalog) -> None:
+ table = _table(catalog)
+ table.update_spec().add_field("event_ts", MonthTransform()).commit()
+ _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, MonthTransform(), "event_ts_month"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_day(catalog: Catalog) -> None:
@@ -108,6 +124,14 @@ def test_add_day(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, DayTransform(), "day_transform"))
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_day_generates_default_name(catalog: Catalog) -> None:
+ table = _table(catalog)
+ table.update_spec().add_field("event_ts", DayTransform()).commit()
+ _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, DayTransform(), "event_ts_day"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_hour(catalog: Catalog) -> None:
@@ -116,6 +140,14 @@ def test_add_hour(catalog: Catalog) -> None:
_validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, HourTransform(), "hour_transform"))
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_hour_generates_default_name(catalog: Catalog) -> None:
+ table = _table(catalog)
+ table.update_spec().add_field("event_ts", HourTransform()).commit()
+ _validate_new_partition_fields(table, 1000, 1, 1000, PartitionField(2,
1000, HourTransform(), "event_ts_hour"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None:
@@ -124,6 +156,14 @@ def test_add_bucket(catalog: Catalog, table_schema_simple:
Schema) -> None:
_validate_new_partition_fields(simple_table, 1000, 1, 1000,
PartitionField(1, 1000, BucketTransform(12), "bucket_transform"))
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_bucket_generates_default_name(catalog: Catalog,
table_schema_simple: Schema) -> None:
+ simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+ simple_table.update_spec().add_field("foo", BucketTransform(12)).commit()
+ _validate_new_partition_fields(simple_table, 1000, 1, 1000,
PartitionField(1, 1000, BucketTransform(12), "foo_bucket_12"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None:
@@ -134,6 +174,14 @@ def test_add_truncate(catalog: Catalog,
table_schema_simple: Schema) -> None:
)
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_truncate_generates_default_name(catalog: Catalog,
table_schema_simple: Schema) -> None:
+ simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+ simple_table.update_spec().add_field("foo", TruncateTransform(1)).commit()
+ _validate_new_partition_fields(simple_table, 1000, 1, 1000,
PartitionField(1, 1000, TruncateTransform(1), "foo_trunc_1"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_multiple_adds(catalog: Catalog) -> None:
@@ -152,6 +200,22 @@ def test_multiple_adds(catalog: Catalog) -> None:
)
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_void(catalog: Catalog, table_schema_simple: Schema) -> None:
+ simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+ simple_table.update_spec().add_field("foo", VoidTransform(),
"void_transform").commit()
+ _validate_new_partition_fields(simple_table, 1000, 1, 1000,
PartitionField(1, 1000, VoidTransform(), "void_transform"))
+
+
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
+def test_add_void_generates_default_name(catalog: Catalog,
table_schema_simple: Schema) -> None:
+ simple_table = _create_table_with_schema(catalog, table_schema_simple, "1")
+ simple_table.update_spec().add_field("foo", VoidTransform()).commit()
+ _validate_new_partition_fields(simple_table, 1000, 1, 1000,
PartitionField(1, 1000, VoidTransform(), "foo_null"))
+
+
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_add_hour_to_day(catalog: Catalog) -> None: