This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new c3c34687 Fix deepcopy of primitive types (#857)
c3c34687 is described below
commit c3c346873ac89cabee4048d218bc20ebf9de15f2
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Thu Jun 27 04:20:28 2024 -0300
Fix deepcopy of primitive types (#857)
---
pyiceberg/types.py | 2 +-
pyiceberg/utils/singleton.py | 13 ++++++++
tests/conftest.py | 74 ++++++++++++++++++++++++++++++++++++++++++++
tests/table/test_init.py | 28 +++++++++++++++++
tests/test_types.py | 11 +++++++
5 files changed, 127 insertions(+), 1 deletion(-)
diff --git a/pyiceberg/types.py b/pyiceberg/types.py
index 7e3862b7..e92056e0 100644
--- a/pyiceberg/types.py
+++ b/pyiceberg/types.py
@@ -160,7 +160,7 @@ class IcebergType(IcebergBaseModel):
return isinstance(self, StructType)
-class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton):
+class PrimitiveType(Singleton, IcebergRootModel[str], IcebergType):
"""Base class for all Iceberg Primitive Types."""
root: Any = Field()
diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py
index 90e909ec..8a4bbf91 100644
--- a/pyiceberg/utils/singleton.py
+++ b/pyiceberg/utils/singleton.py
@@ -47,3 +47,16 @@ class Singleton:
if key not in cls._instances:
cls._instances[key] = super().__new__(cls)
return cls._instances[key]
+
+ def __deepcopy__(self, memo: Dict[int, Any]) -> Any:
+ """
+ Prevent deep copy operations for singletons.
+
+ The IcebergRootModel inherits from Pydantic RootModel,
+ which has its own implementation of deepcopy. When deepcopy
+ runs, it calls the RootModel __deepcopy__ method and ignores
+ that it's a Singleton. To handle this, the order of inheritance
+ is adjusted and a __deepcopy__ method is implemented for
+ singletons that simply returns itself.
+ """
+ return self
diff --git a/tests/conftest.py b/tests/conftest.py
index 2092d93d..d200f3ab 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -865,12 +865,70 @@ EXAMPLE_TABLE_METADATA_V2 = {
"refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag",
"max-ref-age-ms": 10000000}},
}
+TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES = {
+ "format-version": 2,
+ "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+ "location": "s3://bucket/test/location",
+ "last-sequence-number": 34,
+ "last-updated-ms": 1602638573590,
+ "last-column-id": 7,
+ "current-schema-id": 1,
+ "schemas": [
+ {
+ "type": "struct",
+ "schema-id": 1,
+ "identifier-field-ids": [1],
+ "fields": [
+ {"id": 1, "name": "x", "required": True, "type": "long"},
+ {"id": 4, "name": "a", "required": True, "type": "decimal(16,
2)"},
+ {"id": 5, "name": "b", "required": True, "type": "decimal(16,
8)"},
+ {"id": 6, "name": "c", "required": True, "type": "fixed[16]"},
+ {"id": 7, "name": "d", "required": True, "type": "fixed[18]"},
+ ],
+ }
+ ],
+ "default-spec-id": 0,
+ "partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform":
"identity", "source-id": 1, "field-id": 1000}]}],
+ "last-partition-id": 1000,
+ "properties": {"read.split.target.size": "134217728"},
+ "current-snapshot-id": 3055729675574597004,
+ "snapshots": [
+ {
+ "snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1515100955770,
+ "sequence-number": 0,
+ "summary": {"operation": "append"},
+ "manifest-list": "s3://a/b/1.avro",
+ },
+ {
+ "snapshot-id": 3055729675574597004,
+ "parent-snapshot-id": 3051729675574597004,
+ "timestamp-ms": 1555100955770,
+ "sequence-number": 1,
+ "summary": {"operation": "append"},
+ "manifest-list": "s3://a/b/2.avro",
+ "schema-id": 1,
+ },
+ ],
+ "snapshot-log": [
+ {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
+ {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
+ ],
+ "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json",
"timestamp-ms": 1515100}],
+ "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag",
"max-ref-age-ms": 10000000}},
+}
+
@pytest.fixture
def example_table_metadata_v2() -> Dict[str, Any]:
return EXAMPLE_TABLE_METADATA_V2
[email protected]
+def table_metadata_v2_with_fixed_and_decimal_types() -> Dict[str, Any]:
+ return TABLE_METADATA_V2_WITH_FIXED_AND_DECIMAL_TYPES
+
+
@pytest.fixture(scope="session")
def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str:
from pyiceberg.io.pyarrow import PyArrowFileIO
@@ -2064,6 +2122,22 @@ def table_v2(example_table_metadata_v2: Dict[str, Any])
-> Table:
)
[email protected]
+def table_v2_with_fixed_and_decimal_types(
+ table_metadata_v2_with_fixed_and_decimal_types: Dict[str, Any],
+) -> Table:
+ table_metadata = TableMetadataV2(
+ **table_metadata_v2_with_fixed_and_decimal_types,
+ )
+ return Table(
+ identifier=("database", "table"),
+ metadata=table_metadata,
+ metadata_location=f"{table_metadata.location}/uuid.metadata.json",
+ io=load_file_io(),
+ catalog=NoopCatalog("NoopCatalog"),
+ )
+
+
@pytest.fixture
def
table_v2_with_extensive_snapshots(example_table_metadata_v2_with_extensive_snapshots:
Dict[str, Any]) -> Table:
table_metadata =
TableMetadataV2(**example_table_metadata_v2_with_extensive_snapshots)
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index 6f8260fa..d7c4ffee 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -93,7 +93,9 @@ from pyiceberg.types import (
BinaryType,
BooleanType,
DateType,
+ DecimalType,
DoubleType,
+ FixedType,
FloatType,
IntegerType,
ListType,
@@ -845,6 +847,32 @@ def test_update_metadata_with_multiple_updates(table_v1:
Table) -> None:
assert new_metadata.properties == {"owner": "test", "test_a": "test_a1"}
+def test_update_metadata_schema_immutability(
+ table_v2_with_fixed_and_decimal_types: TableMetadataV2,
+) -> None:
+ update = SetSnapshotRefUpdate(
+ ref_name="main",
+ type="branch",
+ snapshot_id=3051729675574597004,
+ max_ref_age_ms=123123123,
+ max_snapshot_age_ms=12312312312,
+ min_snapshots_to_keep=1,
+ )
+
+ new_metadata = update_table_metadata(
+ table_v2_with_fixed_and_decimal_types.metadata,
+ (update,),
+ )
+
+ assert new_metadata.schemas[0].fields == (
+ NestedField(field_id=1, name="x", field_type=LongType(),
required=True),
+ NestedField(field_id=4, name="a", field_type=DecimalType(precision=16,
scale=2), required=True),
+ NestedField(field_id=5, name="b", field_type=DecimalType(precision=16,
scale=8), required=True),
+ NestedField(field_id=6, name="c", field_type=FixedType(length=16),
required=True),
+ NestedField(field_id=7, name="d", field_type=FixedType(length=18),
required=True),
+ )
+
+
def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
base_metadata = table_v1.metadata
base_metadata_backup = base_metadata.model_copy(deep=True)
diff --git a/tests/test_types.py b/tests/test_types.py
index f56f5a0f..0ffb1d07 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -619,3 +619,14 @@ def test_types_singleton() -> None:
assert id(BooleanType()) == id(BooleanType())
assert id(FixedType(22)) == id(FixedType(22))
assert id(FixedType(19)) != id(FixedType(25))
+
+
+def test_deepcopy_of_singleton_fixed_type() -> None:
+ """FixedType is a singleton, so deepcopy should return the same instance"""
+ from copy import deepcopy
+
+ list_of_fixed_types = [FixedType(22), FixedType(19)]
+ copied_list = deepcopy(list_of_fixed_types)
+
+ for lhs, rhs in zip(list_of_fixed_types, copied_list):
+ assert id(lhs) == id(rhs)