jcellary commented on code in PR #3121:
URL: https://github.com/apache/iceberg-python/pull/3121#discussion_r2904321908


##########
pyiceberg/catalog/hive.py:
##########
@@ -499,6 +499,109 @@ def _do_wait_for_lock() -> LockResponse:
 
         return _do_wait_for_lock()
 
+    def _do_commit(
+        self,
+        open_client: Client,
+        table_identifier: Identifier,
+        database_name: str,
+        table_name: str,
+        requirements: tuple[TableRequirement, ...],
+        updates: tuple[TableUpdate, ...],
+    ) -> CommitTableResponse:
+        """Perform the actual commit logic (get table, update, write metadata, 
alter/create in HMS).
+
+        This method contains the core commit logic, separated from locking 
concerns.
+        """
+        hive_table: HiveTable | None
+        current_table: Table | None
+        try:
+            hive_table = self._get_hive_table(open_client, database_name, 
table_name)
+            current_table = self._convert_hive_into_iceberg(hive_table)
+        except NoSuchTableError:
+            hive_table = None
+            current_table = None
+
+        updated_staged_table = self._update_and_stage_table(current_table, 
table_identifier, requirements, updates)
+        if current_table and updated_staged_table.metadata == 
current_table.metadata:
+            # no changes, do nothing
+            return CommitTableResponse(metadata=current_table.metadata, 
metadata_location=current_table.metadata_location)
+        self._write_metadata(
+            metadata=updated_staged_table.metadata,
+            io=updated_staged_table.io,
+            metadata_path=updated_staged_table.metadata_location,
+        )
+
+        if hive_table and current_table:
+            # Table exists, update it.
+
+            # Note on table properties:
+            # - Iceberg table properties are stored in both HMS and Iceberg 
metadata JSON.
+            # - Updates are reflected in both locations
+            # - Existing HMS table properties (set by external systems like 
Hive/Spark) are preserved.
+            #
+            # While it is possible to modify HMS table properties through this 
API, it is not recommended:
+            # - Mixing HMS-specific properties in Iceberg metadata can cause 
confusion
+            # - New/updated HMS table properties will also be stored in 
Iceberg metadata (even though it is HMS-specific)
+            # - HMS-native properties (set outside Iceberg) cannot be deleted 
since they are not visible to Iceberg
+            #   (However, if you first SET an HMS property via Iceberg, it 
becomes tracked in Iceberg metadata,
+            #   and can then be deleted via Iceberg - which removes it from 
both Iceberg metadata and HMS)
+            new_iceberg_properties = _construct_parameters(
+                metadata_location=updated_staged_table.metadata_location,
+                previous_metadata_location=current_table.metadata_location,
+                metadata_properties=updated_staged_table.properties,
+            )
+            # Detect properties that were removed from Iceberg metadata
+            deleted_iceberg_properties = current_table.properties.keys() - 
updated_staged_table.properties.keys()
+
+            # Merge: preserve HMS-native properties, remove deleted Iceberg 
properties, apply new Iceberg properties
+            existing_hms_parameters = dict(hive_table.parameters or {})
+            for key in deleted_iceberg_properties:
+                existing_hms_parameters.pop(key, None)
+            existing_hms_parameters.update(new_iceberg_properties)
+            hive_table.parameters = existing_hms_parameters
+
+            # Update hive's schema and properties
+            hive_table.sd = _construct_hive_storage_descriptor(
+                updated_staged_table.schema(),
+                updated_staged_table.location(),
+                property_as_bool(self.properties, HIVE2_COMPATIBLE, 
HIVE2_COMPATIBLE_DEFAULT),
+            )
+            open_client.alter_table_with_environment_context(
+                dbname=database_name,
+                tbl_name=table_name,
+                new_tbl=hive_table,
+                
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: 
DO_NOT_UPDATE_STATS_DEFAULT}),

Review Comment:
   Good catch. I extended my PR. Hopefully I got that right, as the flow is 
pretty complex. I pushed the code to our fork to see if it hasn't affected our 
production jobs.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to