gabeiglio commented on code in PR #3049:
URL: https://github.com/apache/iceberg-python/pull/3049#discussion_r2816374817


##########
pyiceberg/table/update/validate.py:
##########
@@ -216,6 +225,61 @@ def _added_data_files(
                 yield entry
 
 
+def _added_delete_files(
+    table: Table,
+    starting_snapshot: Snapshot,
+    data_filter: BooleanExpression | None,
+    partition_set: dict[int, set[Record]] | None,
+    parent_snapshot: Snapshot | None,
+) -> DeleteFileIndex:
+    """Return matching delete files that have been added to the table since a 
starting snapshot.
+
+    Args:
+        table: Table to get the history from
+        starting_snapshot: Starting snapshot to get the history from
+        data_filter: Optional filter to match data files
+        partition_set: Optional set of partitions to match data files
+        parent_snapshot: Parent snapshot to get the history from
+
+    Returns:
+        DeleteFileIndex
+    """
+    if parent_snapshot is None or table.format_version < 2:
+        return DeleteFileIndex()
+
+    manifests, snapshot_ids = _validation_history(
+        table, parent_snapshot, starting_snapshot, 
VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES
+    )
+
+    dfi = DeleteFileIndex()
+
+    for manifest in manifests:
+        for entry in manifest.fetch_manifest_entry(table.io, 
discard_deleted=False):
+            if _filter_manifest_entries(
+                entry, snapshot_ids, data_filter, partition_set, 
ManifestEntryStatus.ADDED, table.schema()
+            ):
+                dfi.add_delete_file(entry, entry.data_file.partition)
+
+    return dfi
+
+
+def _starting_sequence_number(table: Table, starting_snapshot: Snapshot | 
None) -> int:
+    """Find the starting sequence number from a snapshot.
+
+    Args:
+        table: Table to find snapshot from
+        starting_snapshot: Snapshot from where to start looking
+
+    Returns
+        Sequence number as int
+    """
+    if starting_snapshot is not None:
+        if snapshot := table.snapshot_by_id(starting_snapshot.snapshot_id):

Review Comment:
   Good catch we dont, I was following the java impl and missed this 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to