ForeverAngry commented on code in PR #2143: URL: https://github.com/apache/iceberg-python/pull/2143#discussion_r2265521441
########## pyiceberg/table/update/snapshot.py: ########## @@ -920,87 +919,49 @@ class ExpireSnapshots(UpdateTableMetadata["ExpireSnapshots"]): _requirements: Tuple[TableRequirement, ...] = () def _commit(self) -> UpdatesAndRequirements: - """ - Commit the staged updates and requirements. + """Commit the staged updates and requirements. - This will remove the snapshots with the given IDs, but will always skip protected snapshots (branch/tag heads). + This will remove the snapshots with the given IDs. Returns: Tuple of updates and requirements to be committed, as required by the calling parent apply functions. """ - # Remove any protected snapshot IDs from the set to expire, just in case - protected_ids = self._get_protected_snapshot_ids() - self._snapshot_ids_to_expire -= protected_ids update = RemoveSnapshotsUpdate(snapshot_ids=self._snapshot_ids_to_expire) - self._updates += (update,) - return self._updates, self._requirements + return (update,), () def _get_protected_snapshot_ids(self) -> Set[int]: - """ - Get the IDs of protected snapshots. + """Get the IDs of protected snapshots. - These are the HEAD snapshots of all branches and all tagged snapshots. These ids are to be excluded from expiration. + These are the HEAD snapshots of all branches and all tagged snapshots. + These ids are to be excluded from expiration. Returns: Set of protected snapshot IDs to exclude from expiration. """ - protected_ids: Set[int] = set() + protected_ids = set() for ref in self._transaction.table_metadata.refs.values(): if ref.snapshot_ref_type in [SnapshotRefType.TAG, SnapshotRefType.BRANCH]: protected_ids.add(ref.snapshot_id) return protected_ids - def expire_snapshot_by_id(self, snapshot_id: int) -> ExpireSnapshots: - """ - Expire a snapshot by its ID. - - This will mark the snapshot for expiration. + def by_id(self, snapshot_id: int) -> ExpireSnapshots: + """Expire a snapshot by its ID. Args: snapshot_id (int): The ID of the snapshot to expire. + Returns: This for method chaining. """ if self._transaction.table_metadata.snapshot_by_id(snapshot_id) is None: raise ValueError(f"Snapshot with ID {snapshot_id} does not exist.") - if snapshot_id in self._get_protected_snapshot_ids(): - raise ValueError(f"Snapshot with ID {snapshot_id} is protected and cannot be expired.") + protected_ids = self._get_protected_snapshot_ids() + if snapshot_id in protected_ids: + raise ValueError(f"Cannot expire snapshot {snapshot_id} as it is referenced by a branch or tag.") self._snapshot_ids_to_expire.add(snapshot_id) - - return self - - def expire_snapshots_by_ids(self, snapshot_ids: List[int]) -> "ExpireSnapshots": - """ - Expire multiple snapshots by their IDs. - - This will mark the snapshots for expiration. - - Args: - snapshot_ids (List[int]): List of snapshot IDs to expire. - Returns: - This for method chaining. - """ - for snapshot_id in snapshot_ids: - self.expire_snapshot_by_id(snapshot_id) - return self - - def expire_snapshots_older_than(self, timestamp_ms: int) -> "ExpireSnapshots": Review Comment: Well, i removed the `expire_snapshots_older_than` function, because it wasnt a feature of the ExpireSnapshots class that was merged into the main branch, since it sounded like we wanted to just refactor the api, and not add any new features to that actual implementation in this pr. I'm happy to add the `expire_snapshots_older_than` back. Can you verify if you want that back or not. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org