geruh commented on code in PR #2927:
URL: https://github.com/apache/iceberg-python/pull/2927#discussion_r2706536421
##########
tests/integration/test_reads.py:
##########
@@ -135,6 +135,118 @@ def test_hive_properties(catalog: Catalog) -> None:
assert hive_table.parameters.get("abc") is None
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive")])
+def test_hive_preserves_hms_specific_properties(catalog: Catalog) -> None:
+ """Test that HMS-specific table properties are preserved during table
commits.
+
+ This verifies that HMS-specific properties that are not managed by Iceberg
+ are preserved during commits, rather than being lost.
+
+ Regression test for: https://github.com/apache/iceberg-python/issues/2926
+ """
+ table = create_table(catalog)
+ hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
+ with hive_client as open_client:
+ hive_table = open_client.get_table(*TABLE_NAME)
+ # Add HMS-specific properties that aren't managed by Iceberg
+ hive_table.parameters["table_category"] = "production"
+ hive_table.parameters["data_owner"] = "data_team"
+ open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)
+
+ with hive_client as open_client:
+ hive_table = open_client.get_table(*TABLE_NAME)
+ assert hive_table.parameters.get("table_category") == "production"
+ assert hive_table.parameters.get("data_owner") == "data_team"
+
+ table.transaction().set_properties({"iceberg_property":
"new_value"}).commit_transaction()
+
+ # Verify that HMS-specific properties are STILL present after commit
+ with hive_client as open_client:
+ hive_table = open_client.get_table(*TABLE_NAME)
+ # HMS-specific properties should be preserved
+ assert hive_table.parameters.get("table_category") == "production", (
+ "HMS property 'table_category' was lost during commit!"
+ )
+ assert hive_table.parameters.get("data_owner") == "data_team", "HMS
property 'data_owner' was lost during commit!"
+ # Iceberg properties should also be present
+ assert hive_table.parameters.get("iceberg_property") == "new_value"
+
+
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive")])
+def test_hive_iceberg_property_takes_precedence(catalog: Catalog) -> None:
Review Comment:
This is a good call, this should be phrased as iceberg metadata is the
source of truth, and HMS parameters are a projection of Iceberg state, plus any
HMS-only properties that external tools may have set.
Looking at the Java implementation, they follow this order to sync
properties:
1. We start our with the existing HMS params
2. Push all Iceberg properties into HMS
3. Remove old props
4. Then set all the iceberg properties
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]