This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 43670985 fix: Pass `snapshot_id` as kwarg to RemoveStatisticsUpdate
(#2694)
43670985 is described below
commit 436709856672c19332df504b44ffb0d5e093d508
Author: Brad Cannon <[email protected]>
AuthorDate: Fri Nov 7 14:57:11 2025 -0500
fix: Pass `snapshot_id` as kwarg to RemoveStatisticsUpdate (#2694)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
Closes #2558
# Rationale for this change
When removing snapshots with statistics, `RemoveStatisticsUpdate` was
being instantiated with a positional argument, which, as suggested by
@vndv, "violates Pydantic's BaseModel requirement that all fields be
passed as keyword arguments". Shout out to @vndv for catching this 🚀
This caused a `TypeError: BaseModel.__init__() takes 1 positional
argument but 2 were given` when calling
`table.maintenance.expire_snapshots().older_than(...).commit()`.
## Are these changes tested?
Yes, and a new test was added.
Existing tests only tested the `RemoveStatisticsUpdate` directly, but
didnt test the code path through `RemoveSnapshotsUpdate` that triggers
the bug.
Added `test_update_remove_snapshots_with_statistics` to
`test_expire_snapshots.py` to extend coverage for the condition.
## Are there any user-facing changes?
No.
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/table/update/__init__.py | 2 +-
tests/table/test_expire_snapshots.py | 37 ++++++++++++++++++++++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/pyiceberg/table/update/__init__.py
b/pyiceberg/table/update/__init__.py
index 9d6ab38e..6533719b 100644
--- a/pyiceberg/table/update/__init__.py
+++ b/pyiceberg/table/update/__init__.py
@@ -528,7 +528,7 @@ def _(update: RemoveSnapshotsUpdate, base_metadata:
TableMetadata, context: _Tab
if ref.snapshot_id in update.snapshot_ids
)
remove_statistics_updates = (
- RemoveStatisticsUpdate(statistics_file.snapshot_id)
+ RemoveStatisticsUpdate(snapshot_id=statistics_file.snapshot_id)
for statistics_file in base_metadata.statistics
if statistics_file.snapshot_id in update.snapshot_ids
)
diff --git a/tests/table/test_expire_snapshots.py
b/tests/table/test_expire_snapshots.py
index 51f5ba68..d11851f2 100644
--- a/tests/table/test_expire_snapshots.py
+++ b/tests/table/test_expire_snapshots.py
@@ -23,6 +23,7 @@ from uuid import uuid4
import pytest
from pyiceberg.table import CommitTableResponse, Table
+from pyiceberg.table.update import RemoveSnapshotsUpdate, update_table_metadata
from pyiceberg.table.update.snapshot import ExpireSnapshots
@@ -280,3 +281,39 @@ def test_concurrent_operations() -> None:
assert results["expire1_snapshots"] == expected_1, "Worker 1 snapshots
contaminated"
assert results["expire2_snapshots"] == expected_2, "Worker 2 snapshots
contaminated"
+
+
+def test_update_remove_snapshots_with_statistics(table_v2_with_statistics:
Table) -> None:
+ """
+ Test removing snapshots from a table that has statistics.
+
+ This test exercises the code path where RemoveStatisticsUpdate is
instantiated
+ within the RemoveSnapshotsUpdate handler. Before the fix for #2558, this
would
+ fail with: TypeError: BaseModel.__init__() takes 1 positional argument but
2 were given
+ """
+ # The table has 2 snapshots with IDs: 3051729675574597004 and
3055729675574597004
+ # Both snapshots have statistics associated with them
+ REMOVE_SNAPSHOT = 3051729675574597004
+ KEEP_SNAPSHOT = 3055729675574597004
+
+ # Verify fixture assumptions
+ assert len(table_v2_with_statistics.metadata.snapshots) == 2
+ assert len(table_v2_with_statistics.metadata.statistics) == 2
+ assert any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in
table_v2_with_statistics.metadata.statistics), (
+ "Snapshot to remove should have statistics"
+ )
+
+ # This should trigger RemoveStatisticsUpdate instantiation for the removed
snapshot
+ update = RemoveSnapshotsUpdate(snapshot_ids=[REMOVE_SNAPSHOT])
+ new_metadata = update_table_metadata(table_v2_with_statistics.metadata,
(update,))
+
+ # Verify the snapshot was removed
+ assert len(new_metadata.snapshots) == 1
+ assert new_metadata.snapshots[0].snapshot_id == KEEP_SNAPSHOT
+
+ # Verify the statistics for the removed snapshot were also removed
+ assert len(new_metadata.statistics) == 1
+ assert new_metadata.statistics[0].snapshot_id == KEEP_SNAPSHOT
+ assert not any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in
new_metadata.statistics), (
+ "Statistics for removed snapshot should be gone"
+ )