This is an automated email from the ASF dual-hosted git repository. fokko pushed a commit to branch pyiceberg-0.9.x in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
commit 4e2515f6a9c8a6f5e06be2d0e22003af9bb9b03b Author: Fokko Driesprong <fo...@apache.org> AuthorDate: Tue Mar 18 12:11:00 2025 +0100 Include DELETE entries when inspecting (#1731) While doing some checks, I've noticed that these are missing. --- pyiceberg/table/inspect.py | 2 +- tests/integration/test_inspect_table.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py index b850aa10..878ae71c 100644 --- a/pyiceberg/table/inspect.py +++ b/pyiceberg/table/inspect.py @@ -161,7 +161,7 @@ class InspectTable: entries = [] snapshot = self._get_snapshot(snapshot_id) for manifest in snapshot.manifests(self.tbl.io): - for entry in manifest.fetch_manifest_entry(io=self.tbl.io): + for entry in manifest.fetch_manifest_entry(io=self.tbl.io, discard_deleted=False): column_sizes = entry.data_file.column_sizes or {} value_counts = entry.data_file.value_counts or {} null_value_counts = entry.data_file.null_value_counts or {} diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 75fe92a6..a2a5fe45 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -164,6 +164,8 @@ def test_inspect_entries( # Write some data tbl.append(arrow_table_with_null) + # Generate a DELETE entry + tbl.overwrite(arrow_table_with_null) def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: assert df.column_names == [ @@ -185,6 +187,8 @@ def test_inspect_entries( lhs = df.to_pandas() rhs = spark_df.toPandas() + assert len(lhs) == len(rhs) + for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): if column == "data_file":