This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch pyiceberg-0.9.x
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git

commit 4e2515f6a9c8a6f5e06be2d0e22003af9bb9b03b
Author: Fokko Driesprong <fo...@apache.org>
AuthorDate: Tue Mar 18 12:11:00 2025 +0100

    Include DELETE entries when inspecting (#1731)
    
    While doing some checks, I've noticed that these are missing.
---
 pyiceberg/table/inspect.py              | 2 +-
 tests/integration/test_inspect_table.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py
index b850aa10..878ae71c 100644
--- a/pyiceberg/table/inspect.py
+++ b/pyiceberg/table/inspect.py
@@ -161,7 +161,7 @@ class InspectTable:
         entries = []
         snapshot = self._get_snapshot(snapshot_id)
         for manifest in snapshot.manifests(self.tbl.io):
-            for entry in manifest.fetch_manifest_entry(io=self.tbl.io):
+            for entry in manifest.fetch_manifest_entry(io=self.tbl.io, 
discard_deleted=False):
                 column_sizes = entry.data_file.column_sizes or {}
                 value_counts = entry.data_file.value_counts or {}
                 null_value_counts = entry.data_file.null_value_counts or {}
diff --git a/tests/integration/test_inspect_table.py 
b/tests/integration/test_inspect_table.py
index 75fe92a6..a2a5fe45 100644
--- a/tests/integration/test_inspect_table.py
+++ b/tests/integration/test_inspect_table.py
@@ -164,6 +164,8 @@ def test_inspect_entries(
 
     # Write some data
     tbl.append(arrow_table_with_null)
+    # Generate a DELETE entry
+    tbl.overwrite(arrow_table_with_null)
 
     def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) 
-> None:
         assert df.column_names == [
@@ -185,6 +187,8 @@ def test_inspect_entries(
 
         lhs = df.to_pandas()
         rhs = spark_df.toPandas()
+        assert len(lhs) == len(rhs)
+
         for column in df.column_names:
             for left, right in zip(lhs[column].to_list(), 
rhs[column].to_list()):
                 if column == "data_file":

Reply via email to