This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new cdc3e54a Disallow writing empty Manifest files (#876)
cdc3e54a is described below

commit cdc3e54aad23638a3411fb4a771a2901e5bf8a93
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Jul 9 08:28:27 2024 +0200

    Disallow writing empty Manifest files (#876)
    
    * Disallow writing empty Avro files/blocks
    
    Raising an exception when doing this might look extreme, but
    there is no real good reason to allow this.
    
    * Relax the constaints a bit
---
 pyiceberg/manifest.py        |  6 ++++++
 tests/utils/test_manifest.py | 19 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index bf5749ce..4fd82fec 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -685,6 +685,10 @@ class ManifestWriter(ABC):
         traceback: Optional[TracebackType],
     ) -> None:
         """Close the writer."""
+        if (self._added_files + self._existing_files + self._deleted_files) == 
0:
+            # This is just a guard to ensure that we don't write empty 
manifest files
+            raise ValueError("An empty manifest file has been written")
+
         self.closed = True
         self._writer.__exit__(exc_type, exc_value, traceback)
 
@@ -757,6 +761,8 @@ class ManifestWriter(ABC):
         elif entry.status == ManifestEntryStatus.DELETED:
             self._deleted_files += 1
             self._deleted_rows += entry.data_file.record_count
+        else:
+            raise ValueError(f"Unknown entry: {entry.status}")
 
         self._partitions.append(entry.data_file.partition)
 
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index a812b384..ecb99e28 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -35,7 +35,7 @@ from pyiceberg.manifest import (
     write_manifest,
     write_manifest_list,
 )
-from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, 
PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.table.snapshots import Operation, Snapshot, Summary
 from pyiceberg.transforms import IdentityTransform
@@ -306,6 +306,23 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: 
str) -> None:
     assert entry.status == ManifestEntryStatus.ADDED
 
 
+def test_write_empty_manifest() -> None:
+    io = load_file_io()
+    test_schema = Schema(NestedField(1, "foo", IntegerType(), False))
+    with TemporaryDirectory() as tmpdir:
+        tmp_avro_file = tmpdir + "/test_write_manifest.avro"
+
+        with pytest.raises(ValueError, match="An empty manifest file has been 
written"):
+            with write_manifest(
+                format_version=1,
+                spec=UNPARTITIONED_PARTITION_SPEC,
+                schema=test_schema,
+                output_file=io.new_output(tmp_avro_file),
+                snapshot_id=8744736658442914487,
+            ) as _:
+                pass
+
+
 @pytest.mark.parametrize("format_version", [1, 2])
 def test_write_manifest(
     generated_manifest_file_file_v1: str, generated_manifest_file_file_v2: 
str, format_version: TableVersion

Reply via email to