This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new c3bf16c3 Allow union of `{int,long}`, `{float,double}`, etc (#1283)
c3bf16c3 is described below
commit c3bf16c3d168159f034ce8f4fc079328c27ecb21
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Nov 5 16:54:27 2024 +0100
Allow union of `{int,long}`, `{float,double}`, etc (#1283)
* Allow union of `{int,long}`, `{float,double}`, etc
* Thanks Kevin!
Co-authored-by: Kevin Liu <[email protected]>
* Thanks Kevin!
Co-authored-by: Kevin Liu <[email protected]>
* MOAR tests
* lint
* Make the tests happy
* Remove redundant test
---------
Co-authored-by: Kevin Liu <[email protected]>
---
pyiceberg/table/update/schema.py | 8 +++++++-
tests/test_schema.py | 39 ++++++++++++++++++++++++++++++++++++---
2 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/pyiceberg/table/update/schema.py b/pyiceberg/table/update/schema.py
index 0442a604..0c83628f 100644
--- a/pyiceberg/table/update/schema.py
+++ b/pyiceberg/table/update/schema.py
@@ -770,7 +770,13 @@ class _UnionByNameVisitor(SchemaWithPartnerVisitor[int,
bool]):
self.update_schema.make_column_optional(full_name)
if field.field_type.is_primitive and field.field_type !=
existing_field.field_type:
- self.update_schema.update_column(full_name,
field_type=field.field_type)
+ try:
+ # If the current type is wider than the new type, then
+ # we perform a noop
+ _ = promote(field.field_type, existing_field.field_type)
+ except ResolveError:
+ # If this is not the case, perform the type evolution
+ self.update_schema.update_column(full_name,
field_type=field.field_type)
if field.doc is not None and field.doc != existing_field.doc:
self.update_schema.update_column(full_name, doc=field.doc)
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 7f2ab906..4d894b0d 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -1189,6 +1189,17 @@ def test_detect_invalid_top_level_maps() -> None:
_ = UpdateSchema(transaction=None,
schema=current_schema).union_by_name(new_schema)._apply() # type: ignore
+def test_allow_double_to_float() -> None:
+ current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=DoubleType(), required=False))
+ new_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=FloatType(), required=False))
+
+ applied = UpdateSchema(transaction=None,
schema=current_schema).union_by_name(new_schema)._apply() # type: ignore
+
+ assert applied.as_struct() == current_schema.as_struct()
+ assert len(applied.fields) == 1
+ assert isinstance(applied.fields[0].field_type, DoubleType)
+
+
def test_promote_float_to_double() -> None:
current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=FloatType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=DoubleType(), required=False))
@@ -1200,11 +1211,33 @@ def test_promote_float_to_double() -> None:
assert isinstance(applied.fields[0].field_type, DoubleType)
-def test_detect_invalid_promotion_double_to_float() -> None:
- current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=DoubleType(), required=False))
+def test_allow_long_to_int() -> None:
+ current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=LongType(), required=False))
+ new_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=IntegerType(), required=False))
+
+ applied = UpdateSchema(transaction=None,
schema=current_schema).union_by_name(new_schema)._apply() # type: ignore
+
+ assert applied.as_struct() == current_schema.as_struct()
+ assert len(applied.fields) == 1
+ assert isinstance(applied.fields[0].field_type, LongType)
+
+
+def test_promote_int_to_long() -> None:
+ current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=IntegerType(), required=False))
+ new_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=LongType(), required=False))
+
+ applied = UpdateSchema(transaction=None,
schema=current_schema).union_by_name(new_schema)._apply() # type: ignore
+
+ assert applied.as_struct() == new_schema.as_struct()
+ assert len(applied.fields) == 1
+ assert isinstance(applied.fields[0].field_type, LongType)
+
+
+def test_detect_invalid_promotion_string_to_float() -> None:
+ current_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=StringType(), required=False))
new_schema = Schema(NestedField(field_id=1, name="aCol",
field_type=FloatType(), required=False))
- with pytest.raises(ValidationError, match="Cannot change column type:
aCol: double -> float"):
+ with pytest.raises(ValidationError, match="Cannot change column type:
aCol: string -> float"):
_ = UpdateSchema(transaction=None,
schema=current_schema).union_by_name(new_schema)._apply() # type: ignore