This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new dd8d76d0 Update last-updated-ms for DDL operations (#956)
dd8d76d0 is described below

commit dd8d76d04f2d1c6c8192b83df9e84cee05ad5545
Author: Soumya Ghosh <[email protected]>
AuthorDate: Fri Jul 26 00:07:07 2024 +0530

    Update last-updated-ms for DDL operations (#956)
    
    * Update last-updated-ms for DDL operations
    
    * Update last-updated-ms if there are valid changes
    
    * Update unit tests
---
 pyiceberg/table/__init__.py | 7 +++++++
 tests/catalog/test_glue.py  | 2 ++
 tests/catalog/test_sql.py   | 2 ++
 tests/table/test_init.py    | 4 +++-
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 79af476c..34e9d2c5 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -911,6 +911,9 @@ class _TableMetadataUpdateContext:
             update.sort_order.order_id == sort_order_id for update in 
self._updates if isinstance(update, AddSortOrderUpdate)
         )
 
+    def has_changes(self) -> bool:
+        return len(self._updates) > 0
+
 
 @singledispatch
 def _apply_table_update(update: TableUpdate, base_metadata: TableMetadata, 
context: _TableMetadataUpdateContext) -> TableMetadata:
@@ -1185,6 +1188,10 @@ def update_table_metadata(
     for update in updates:
         new_metadata = _apply_table_update(update, new_metadata, context)
 
+    # Update last_updated_ms if it was not updated by update operations
+    if context.has_changes() and base_metadata.last_updated_ms == 
new_metadata.last_updated_ms:
+        new_metadata = new_metadata.model_copy(update={"last_updated_ms": 
datetime_to_millis(datetime.now().astimezone())})
+
     if enforce_validation:
         return TableMetadataUtil.parse_obj(new_metadata.model_dump())
     else:
diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py
index 10f399b2..e748a71b 100644
--- a/tests/catalog/test_glue.py
+++ b/tests/catalog/test_glue.py
@@ -865,6 +865,7 @@ def test_create_table_transaction(
         partition_spec=PartitionSpec(PartitionField(source_id=1, 
field_id=1000, transform=IdentityTransform(), name="foo")),
         properties={"format-version": format_version},
     ) as txn:
+        last_updated_metadata = txn.table_metadata.last_updated_ms
         with txn.update_schema() as update_schema:
             update_schema.add_column(path="b", field_type=IntegerType())
 
@@ -887,6 +888,7 @@ def test_create_table_transaction(
     assert table.spec().fields_by_source_id(2)[0].name == "bar"
     assert table.spec().fields_by_source_id(2)[0].field_id == 1001
     assert table.spec().fields_by_source_id(2)[0].transform == 
IdentityTransform()
+    assert table.metadata.last_updated_ms > last_updated_metadata
 
 
 @mock_aws
diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
index 7b48b08a..8046ac7a 100644
--- a/tests/catalog/test_sql.py
+++ b/tests/catalog/test_sql.py
@@ -1270,6 +1270,7 @@ def test_commit_table(catalog: SqlCatalog, 
table_schema_nested: Schema, table_id
     namespace = Catalog.namespace_from(table_identifier_nocatalog)
     catalog.create_namespace(namespace)
     table = catalog.create_table(table_identifier, table_schema_nested)
+    last_updated_ms = table.metadata.last_updated_ms
 
     assert catalog._parse_metadata_version(table.metadata_location) == 0
     assert table.metadata.current_schema_id == 0
@@ -1289,6 +1290,7 @@ def test_commit_table(catalog: SqlCatalog, 
table_schema_nested: Schema, table_id
     assert new_schema
     assert new_schema == update._apply()
     assert new_schema.find_field("b").field_type == IntegerType()
+    assert updated_table_metadata.last_updated_ms > last_updated_ms
 
 
 @pytest.mark.parametrize(
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 7a5ea86d..b903c2da 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -615,6 +615,7 @@ def test_apply_set_properties_update(table_v2: Table) -> 
None:
         "test_b": "test_b",
         "test_c": "test_c",
     }
+    assert new_metadata_add_only.last_updated_ms > 
base_metadata.last_updated_ms
 
 
 def test_apply_remove_properties_update(table_v2: Table) -> None:
@@ -689,7 +690,7 @@ def test_update_metadata_add_snapshot(table_v2: Table) -> 
None:
         snapshot_id=25,
         parent_snapshot_id=19,
         sequence_number=200,
-        timestamp_ms=1602638573590,
+        timestamp_ms=1602638593590,
         manifest_list="s3:/a/b/c.avro",
         summary=Summary(Operation.APPEND),
         schema_id=3,
@@ -759,6 +760,7 @@ def test_update_metadata_add_update_sort_order(table_v2: 
Table) -> None:
     assert len(new_metadata.sort_orders) == 2
     assert new_metadata.sort_orders[-1] == new_sort_order
     assert new_metadata.default_sort_order_id == new_sort_order.order_id
+    assert new_metadata.last_updated_ms > table_v2.metadata.last_updated_ms
 
 
 def test_update_metadata_update_sort_order_invalid(table_v2: Table) -> None:

Reply via email to