This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 41258189 fix: Add check table UUID to detect table replacement (#2890)
41258189 is described below

commit 41258189173f94c9099fa55f149c7bc11c2033a1
Author: geruh <[email protected]>
AuthorDate: Thu Jan 15 16:12:08 2026 -0800

    fix: Add check table UUID to detect table replacement (#2890)
    
    # Rationale for this change
    
    This PR adds table UUID validation on refresh and commit to detect when
    a table has been replaced. For example, if a table is dropped and
    recreated with the same name, this prevents accidentally operating on a
    different table than expected.
    
    Modeled after the Java implementation.
    
    
    
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java#L202-L209
    
    Python was missing this check.
    
    ## Are these changes tested?
    
    Added some tests at the table and catalog level
    
    ## Are there any user-facing changes?
    
    no
    
    ---------
    
    Co-authored-by: Kevin Liu <[email protected]>
---
 pyiceberg/table/__init__.py | 13 +++++++
 tests/catalog/test_rest.py  | 88 +++++++++++++++++++++++++++++++++++++++++++++
 tests/table/test_init.py    | 18 ++++++++++
 3 files changed, 119 insertions(+)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index ae5eb400..9fdf5a70 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1133,6 +1133,7 @@ class Table:
             An updated instance of the same Iceberg table
         """
         fresh = self.catalog.load_table(self._identifier)
+        self._check_uuid(self.metadata, fresh.metadata)
         self.metadata = fresh.metadata
         self.io = fresh.io
         self.metadata_location = fresh.metadata_location
@@ -1513,9 +1514,21 @@ class Table:
         """Return the snapshot references in the table."""
         return self.metadata.refs
 
+    @staticmethod
+    def _check_uuid(current_metadata: TableMetadata, new_metadata: 
TableMetadata) -> None:
+        """Validate that the table UUID matches after refresh."""
+        current = current_metadata.table_uuid
+        refreshed = new_metadata.table_uuid
+
+        if current != refreshed:
+            raise ValueError(f"Table UUID does not match: current={current} != 
refreshed={refreshed}")
+
     def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: 
tuple[TableRequirement, ...]) -> None:
         response = self.catalog.commit_table(self, requirements, updates)
 
+        # Ensure table uuid has not changed
+        self._check_uuid(self.metadata, response.metadata)
+
         # 
https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
         # delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set 
to true and uses
         # TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many 
previous versions to keep -
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 8b9cbd89..37c373cc 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -57,6 +57,7 @@ from pyiceberg.table.metadata import TableMetadataV1
 from pyiceberg.table.sorting import SortField, SortOrder
 from pyiceberg.transforms import IdentityTransform, TruncateTransform
 from pyiceberg.typedef import RecursiveDict
+from pyiceberg.types import StringType
 from pyiceberg.utils.config import Config
 
 TEST_URI = "https://iceberg-test-catalog/";
@@ -1165,6 +1166,9 @@ def test_create_staged_table_200(
     example_table_metadata_with_no_location: dict[str, Any],
     example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any],
 ) -> None:
+    expected_table_uuid = 
example_table_metadata_with_no_location["metadata"]["table-uuid"]
+    example_table_metadata_no_snapshot_v1_rest_json["metadata"]["table-uuid"] 
= expected_table_uuid
+
     rest_mock.post(
         f"{TEST_URI}v1/namespaces/fokko/tables",
         json=example_table_metadata_with_no_location,
@@ -2226,3 +2230,87 @@ class TestRestCatalogClose:
         # View endpoints should be supported when enabled
         catalog._check_endpoint(Capability.V1_LIST_VIEWS)
         catalog._check_endpoint(Capability.V1_DELETE_VIEW)
+
+
+def test_table_uuid_check_on_commit(rest_mock: Mocker, 
example_table_metadata_v2: dict[str, Any]) -> None:
+    """Test that UUID mismatch is detected on commit response (matches Java 
RESTTableOperations behavior)."""
+    original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
+    different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+    metadata_location = "s3://warehouse/database/table/metadata.json"
+
+    rest_mock.get(
+        f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+        json={
+            "metadata-location": metadata_location,
+            "metadata": example_table_metadata_v2,
+            "config": {},
+        },
+        status_code=200,
+        request_headers=TEST_HEADERS,
+    )
+
+    catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
+    table = catalog.load_table(("namespace", "table_name"))
+
+    assert str(table.metadata.table_uuid) == original_uuid
+
+    metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": 
different_uuid}
+
+    rest_mock.post(
+        f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+        json={
+            "metadata-location": metadata_location,
+            "metadata": metadata_with_different_uuid,
+        },
+        status_code=200,
+        request_headers=TEST_HEADERS,
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        table.update_schema().add_column("new_col", StringType()).commit()
+
+    assert "Table UUID does not match" in str(exc_info.value)
+    assert f"current={original_uuid}" in str(exc_info.value)
+    assert f"refreshed={different_uuid}" in str(exc_info.value)
+
+
+def test_table_uuid_check_on_refresh(rest_mock: Mocker, 
example_table_metadata_v2: dict[str, Any]) -> None:
+    original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
+    different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+    metadata_location = "s3://warehouse/database/table/metadata.json"
+
+    rest_mock.get(
+        f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+        json={
+            "metadata-location": metadata_location,
+            "metadata": example_table_metadata_v2,
+            "config": {},
+        },
+        status_code=200,
+        request_headers=TEST_HEADERS,
+    )
+
+    catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
+    table = catalog.load_table(("namespace", "table_name"))
+
+    assert str(table.metadata.table_uuid) == original_uuid
+
+    metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": 
different_uuid}
+
+    rest_mock.get(
+        f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+        json={
+            "metadata-location": metadata_location,
+            "metadata": metadata_with_different_uuid,
+            "config": {},
+        },
+        status_code=200,
+        request_headers=TEST_HEADERS,
+    )
+
+    with pytest.raises(ValueError) as exc_info:
+        table.refresh()
+
+    assert "Table UUID does not match" in str(exc_info.value)
+    assert f"current={original_uuid}" in str(exc_info.value)
+    assert f"refreshed={different_uuid}" in str(exc_info.value)
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index e40513fe..ff5fbbf3 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
     if model != type(model).model_validate(model_data):
         pytest.fail(f"model {type(model)} did not roundtrip successfully")
     return True
+
+
+def test_check_uuid_raises_when_mismatch(table_v2: Table, 
example_table_metadata_v2: dict[str, Any]) -> None:
+    different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+    metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": 
different_uuid}
+    new_metadata = TableMetadataV2(**metadata_with_different_uuid)
+
+    with pytest.raises(ValueError) as exc_info:
+        Table._check_uuid(table_v2.metadata, new_metadata)
+
+    assert "Table UUID does not match" in str(exc_info.value)
+    assert different_uuid in str(exc_info.value)
+
+
+def test_check_uuid_passes_when_match(table_v2: Table, 
example_table_metadata_v2: dict[str, Any]) -> None:
+    new_metadata = TableMetadataV2(**example_table_metadata_v2)
+    # Should not raise with same uuid
+    Table._check_uuid(table_v2.metadata, new_metadata)

Reply via email to