This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new c3c34687 Fix deepcopy of primitive types (#857)
c3c34687 is described below

commit c3c346873ac89cabee4048d218bc20ebf9de15f2
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Thu Jun 27 04:20:28 2024 -0300

    Fix deepcopy of primitive types (#857)
---
 pyiceberg/types.py           |  2 +-
 pyiceberg/utils/singleton.py | 13 ++++++++
 tests/conftest.py            | 74 ++++++++++++++++++++++++++++++++++++++++++++
 tests/table/test_init.py     | 28 +++++++++++++++++
 tests/test_types.py          | 11 +++++++
 5 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/types.py b/pyiceberg/types.py
index 7e3862b7..e92056e0 100644
--- a/pyiceberg/types.py
+++ b/pyiceberg/types.py
@@ -160,7 +160,7 @@ class IcebergType(IcebergBaseModel):
         return isinstance(self, StructType)
 
 
-class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton):
+class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
     """Base class for all Iceberg Primitive Types."""
 
     root: Any = Field()
diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py
index 90e909ec..8a4bbf91 100644
--- a/pyiceberg/utils/singleton.py
+++ b/pyiceberg/utils/singleton.py
@@ -47,3 +47,16 @@ class Singleton:
         if key not in cls._instances:
             cls._instances[key] = super().__new__(cls)
         return cls._instances[key]
+
+    def __deepcopy__(self, memo: Dict[int, Any]) -> Any:
+        """
+        Prevent deep copy operations for singletons.
+
+        The IcebergRootModel inherits from Pydantic RootModel,
+        which has its own implementation of deepcopy. When deepcopy
+        runs, it calls the RootModel __deepcopy__ method and ignores
+        that it's a Singleton. To handle this, the order of inheritance
+        is adjusted and a __deepcopy__ method is implemented for
+        singletons that simply returns itself.
+        """
+        return self
diff --git a/tests/conftest.py b/tests/conftest.py
index 2092d93d..d200f3ab 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -865,12 +865,70 @@ EXAMPLE_TABLE_METADATA_V2 = {
     "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", 
"max-ref-age-ms": 10000000}},
 }
 
+TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES = {
+    "format-version": 2,
+    "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+    "location": "s3://bucket/test/location",
+    "last-sequence-number": 34,
+    "last-updated-ms": 1602638573590,
+    "last-column-id": 7,
+    "current-schema-id": 1,
+    "schemas": [
+        {
+            "type": "struct",
+            "schema-id": 1,
+            "identifier-field-ids": [1],
+            "fields": [
+                {"id": 1, "name": "x", "required": True, "type": "long"},
+                {"id": 4, "name": "a", "required": True, "type": "decimal(16, 
2)"},
+                {"id": 5, "name": "b", "required": True, "type": "decimal(16, 
8)"},
+                {"id": 6, "name": "c", "required": True, "type": "fixed[16]"},
+                {"id": 7, "name": "d", "required": True, "type": "fixed[18]"},
+            ],
+        }
+    ],
+    "default-spec-id": 0,
+    "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": 
"identity", "source-id": 1, "field-id": 1000}]}],
+    "last-partition-id": 1000,
+    "properties": {"read.split.target.size": "134217728"},
+    "current-snapshot-id": 3055729675574597004,
+    "snapshots": [
+        {
+            "snapshot-id": 3051729675574597004,
+            "timestamp-ms": 1515100955770,
+            "sequence-number": 0,
+            "summary": {"operation": "append"},
+            "manifest-list": "s3://a/b/1.avro",
+        },
+        {
+            "snapshot-id": 3055729675574597004,
+            "parent-snapshot-id": 3051729675574597004,
+            "timestamp-ms": 1555100955770,
+            "sequence-number": 1,
+            "summary": {"operation": "append"},
+            "manifest-list": "s3://a/b/2.avro",
+            "schema-id": 1,
+        },
+    ],
+    "snapshot-log": [
+        {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
+        {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
+    ],
+    "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", 
"timestamp-ms": 1515100}],
+    "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", 
"max-ref-age-ms": 10000000}},
+}
+
 
 @pytest.fixture
 def example_table_metadata_v2() -> Dict[str, Any]:
     return EXAMPLE_TABLE_METADATA_V2
 
 
[email protected]
+def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
+    return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES
+
+
 @pytest.fixture(scope="session")
 def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
     from pyiceberg.io.pyarrow import PyArrowFileIO
@@ -2064,6 +2122,22 @@ def table_v2(example_table_metadata_v2: Dict[str, Any]) 
-> Table:
     )
 
 
[email protected]
+def table_v2_with_fixed_and_decimal_types(
+    table_metadata_v2_with_fixed_and_decimal_types: Dict[str, Any],
+) -> Table:
+    table_metadata = TableMetadataV2(
+        **table_metadata_v2_with_fixed_and_decimal_types,
+    )
+    return Table(
+        identifier=("database", "table"),
+        metadata=table_metadata,
+        metadata_location=f"{table_metadata.location}/uuid.metadata.json",
+        io=load_file_io(),
+        catalog=NoopCatalog("NoopCatalog"),
+    )
+
+
 @pytest.fixture
 def 
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots:
 Dict[str, Any]) -> Table:
     table_metadata = 
TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 6f8260fa..d7c4ffee 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -93,7 +93,9 @@ from pyiceberg.types import (
     BinaryType,
     BooleanType,
     DateType,
+    DecimalType,
     DoubleType,
+    FixedType,
     FloatType,
     IntegerType,
     ListType,
@@ -845,6 +847,32 @@ def test_update_metadata_with_multiple_updates(table_v1: 
Table) -> None:
     assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"}
 
 
+def test_update_metadata_schema_immutability(
+    table_v2_with_fixed_and_decimal_types: TableMetadataV2,
+) -> None:
+    update = SetSnapshotRefUpdate(
+        ref_name="main",
+        type="branch",
+        snapshot_id=3051729675574597004,
+        max_ref_age_ms=123123123,
+        max_snapshot_age_ms=12312312312,
+        min_snapshots_to_keep=1,
+    )
+
+    new_metadata = update_table_metadata(
+        table_v2_with_fixed_and_decimal_types.metadata,
+        (update,),
+    )
+
+    assert new_metadata.schemas[0].fields == (
+        NestedField(field_id=1, name="x", field_type=LongType(), 
required=True),
+        NestedField(field_id=4, name="a", field_type=DecimalType(precision=16, 
scale=2), required=True),
+        NestedField(field_id=5, name="b", field_type=DecimalType(precision=16, 
scale=8), required=True),
+        NestedField(field_id=6, name="c", field_type=FixedType(length=16), 
required=True),
+        NestedField(field_id=7, name="d", field_type=FixedType(length=18), 
required=True),
+    )
+
+
 def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
     base_metadata = table_v1.metadata
     base_metadata_backup = base_metadata.model_copy(deep=True)
diff --git a/tests/test_types.py b/tests/test_types.py
index f56f5a0f..0ffb1d07 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -619,3 +619,14 @@ def test_types_singleton() -> None:
     assert id(BooleanType()) == id(BooleanType())
     assert id(FixedType(22)) == id(FixedType(22))
     assert id(FixedType(19)) != id(FixedType(25))
+
+
+def test_deepcopy_of_singleton_fixed_type() -> None:
+    """FixedType is a singleton, so deepcopy should return the same instance"""
+    from copy import deepcopy
+
+    list_of_fixed_types = [FixedType(22), FixedType(19)]
+    copied_list = deepcopy(list_of_fixed_types)
+
+    for lhs, rhs in zip(list_of_fixed_types, copied_list):
+        assert id(lhs) == id(rhs)

Reply via email to