This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new c3bf16c3 Allow union of `{int,long}`, `{float,double}`, etc (#1283)
c3bf16c3 is described below

commit c3bf16c3d168159f034ce8f4fc079328c27ecb21
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Nov 5 16:54:27 2024 +0100

    Allow union of `{int,long}`, `{float,double}`, etc (#1283)
    
    * Allow union of `{int,long}`, `{float,double}`, etc
    
    * Thanks Kevin!
    
    Co-authored-by: Kevin Liu <[email protected]>
    
    * Thanks Kevin!
    
    Co-authored-by: Kevin Liu <[email protected]>
    
    * MOAR tests
    
    * lint
    
    * Make the tests happy
    
    * Remove redundant test
    
    ---------
    
    Co-authored-by: Kevin Liu <[email protected]>
---
 pyiceberg/table/update/schema.py |  8 +++++++-
 tests/test_schema.py             | 39 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/table/update/schema.py b/pyiceberg/table/update/schema.py
index 0442a604..0c83628f 100644
--- a/pyiceberg/table/update/schema.py
+++ b/pyiceberg/table/update/schema.py
@@ -770,7 +770,13 @@ class _UnionByNameVisitor(SchemaWithPartnerVisitor[int, 
bool]):
             self.update_schema.make_column_optional(full_name)
 
         if field.field_type.is_primitive and field.field_type != 
existing_field.field_type:
-            self.update_schema.update_column(full_name, 
field_type=field.field_type)
+            try:
+                # If the current type is wider than the new type, then
+                # we perform a noop
+                _ = promote(field.field_type, existing_field.field_type)
+            except ResolveError:
+                # If this is not the case, perform the type evolution
+                self.update_schema.update_column(full_name, 
field_type=field.field_type)
 
         if field.doc is not None and field.doc != existing_field.doc:
             self.update_schema.update_column(full_name, doc=field.doc)
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 7f2ab906..4d894b0d 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1189,6 +1189,17 @@ def test_detect_invalid_top_level_maps() -> None:
         _ = UpdateSchema(transaction=None, 
schema=current_schema).union_by_name(new_schema)._apply()  # type: ignore
 
 
+def test_allow_double_to_float() -> None:
+    current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=DoubleType(), required=False))
+    new_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=FloatType(), required=False))
+
+    applied = UpdateSchema(transaction=None, 
schema=current_schema).union_by_name(new_schema)._apply()  # type: ignore
+
+    assert applied.as_struct() == current_schema.as_struct()
+    assert len(applied.fields) == 1
+    assert isinstance(applied.fields[0].field_type, DoubleType)
+
+
 def test_promote_float_to_double() -> None:
     current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=FloatType(), required=False))
     new_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=DoubleType(), required=False))
@@ -1200,11 +1211,33 @@ def test_promote_float_to_double() -> None:
     assert isinstance(applied.fields[0].field_type, DoubleType)
 
 
-def test_detect_invalid_promotion_double_to_float() -> None:
-    current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=DoubleType(), required=False))
+def test_allow_long_to_int() -> None:
+    current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=LongType(), required=False))
+    new_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=IntegerType(), required=False))
+
+    applied = UpdateSchema(transaction=None, 
schema=current_schema).union_by_name(new_schema)._apply()  # type: ignore
+
+    assert applied.as_struct() == current_schema.as_struct()
+    assert len(applied.fields) == 1
+    assert isinstance(applied.fields[0].field_type, LongType)
+
+
+def test_promote_int_to_long() -> None:
+    current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=IntegerType(), required=False))
+    new_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=LongType(), required=False))
+
+    applied = UpdateSchema(transaction=None, 
schema=current_schema).union_by_name(new_schema)._apply()  # type: ignore
+
+    assert applied.as_struct() == new_schema.as_struct()
+    assert len(applied.fields) == 1
+    assert isinstance(applied.fields[0].field_type, LongType)
+
+
+def test_detect_invalid_promotion_string_to_float() -> None:
+    current_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=StringType(), required=False))
     new_schema = Schema(NestedField(field_id=1, name="aCol", 
field_type=FloatType(), required=False))
 
-    with pytest.raises(ValidationError, match="Cannot change column type: 
aCol: double -> float"):
+    with pytest.raises(ValidationError, match="Cannot change column type: 
aCol: string -> float"):
         _ = UpdateSchema(transaction=None, 
schema=current_schema).union_by_name(new_schema)._apply()  # type: ignore
 
 

Reply via email to