This is an automated email from the ASF dual-hosted git repository.

sungwy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 0b487f27 fix (issue-1079): allow update_column to set doc as '' (#1083)
0b487f27 is described below

commit 0b487f27ca4bb5427f096a1dec738659e0bdd0b7
Author: Tiansu <[email protected]>
AuthorDate: Mon Aug 26 16:38:29 2024 +0200

    fix (issue-1079): allow update_column to set doc as '' (#1083)
    
    * fix (issue-1079): allow update_column to set doc as ''
    
    * fix (issue-1079): add test for required and fix version
    
    * Apply suggestions from code review
    
    Co-authored-by: Sung Yun <[email protected]>
    
    * Apply suggestions from code review
    
    ---------
    
    Co-authored-by: Tiansu Yu <tiansu.yu@icloud>
    Co-authored-by: Sung Yun <[email protected]>
---
 pyiceberg/table/__init__.py |  7 ++++---
 tests/table/test_init.py    | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index 47944831..8ec44c6b 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -2492,12 +2492,13 @@ class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
                 except ResolveError as e:
                     raise ValidationError(f"Cannot change column type: 
{full_name}: {field.field_type} -> {field_type}") from e
 
+        # if other updates for the same field exist in one transaction:
         if updated := self._updates.get(field.field_id):
             self._updates[field.field_id] = NestedField(
                 field_id=updated.field_id,
                 name=updated.name,
                 field_type=field_type or updated.field_type,
-                doc=doc or updated.doc,
+                doc=doc if doc is not None else updated.doc,
                 required=updated.required,
             )
         else:
@@ -2505,7 +2506,7 @@ class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
                 field_id=field.field_id,
                 name=field.name,
                 field_type=field_type or field.field_type,
-                doc=doc or field.doc,
+                doc=doc if doc is not None else field.doc,
                 required=field.required,
             )
 
@@ -2878,7 +2879,7 @@ class UnionByNameVisitor(SchemaWithPartnerVisitor[int, 
bool]):
         if field.field_type.is_primitive and field.field_type != 
existing_field.field_type:
             self.update_schema.update_column(full_name, 
field_type=field.field_type)
 
-        if field.doc is not None and not field.doc != existing_field.doc:
+        if field.doc is not None and field.doc != existing_field.doc:
             self.update_schema.update_column(full_name, doc=field.doc)
 
     def _find_field_type(self, field_id: int) -> IcebergType:
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index e5458677..484abc24 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -512,6 +512,41 @@ def test_add_column(table_v2: Table) -> None:
     assert apply_schema.highest_field_id == 4
 
 
+def test_update_column(table_v1: Table, table_v2: Table) -> None:
+    """
+    Table should be able to update existing property `doc`
+    Table should also be able to update property `required`, if the field is 
not an identifier field.
+    """
+    COMMENT2 = "comment2"
+    for table in [table_v1, table_v2]:
+        original_schema = table.schema()
+        # update existing doc to a new doc
+        assert original_schema.find_field("y").doc == "comment"
+        new_schema = table.transaction().update_schema().update_column("y", 
doc=COMMENT2)._apply()
+        assert new_schema.find_field("y").doc == COMMENT2, "failed to update 
existing field doc"
+
+        # update existing doc to an emtpy string
+        assert new_schema.find_field("y").doc == COMMENT2
+        new_schema2 = table.transaction().update_schema().update_column("y", 
doc="")._apply()
+        assert new_schema2.find_field("y").doc == "", "failed to remove 
existing field doc"
+
+        # update required to False
+        assert original_schema.find_field("z").required is True
+        new_schema3 = table.transaction().update_schema().update_column("z", 
required=False)._apply()
+        assert new_schema3.find_field("z").required is False, "failed to 
update existing field required"
+
+        # assert the above two updates also works with union_by_name
+        assert (
+            table.update_schema().union_by_name(new_schema)._apply() == 
new_schema
+        ), "failed to update existing field doc with union_by_name"
+        assert (
+            table.update_schema().union_by_name(new_schema2)._apply() == 
new_schema2
+        ), "failed to remove existing field doc with union_by_name"
+        assert (
+            table.update_schema().union_by_name(new_schema3)._apply() == 
new_schema3
+        ), "failed to update existing field required with union_by_name"
+
+
 def test_add_primitive_type_column(table_v2: Table) -> None:
     primitive_type: Dict[str, PrimitiveType] = {
         "boolean": BooleanType(),

Reply via email to