Fokko commented on code in PR #8622:
URL: https://github.com/apache/iceberg/pull/8622#discussion_r1335658397


##########
python/tests/utils/test_manifest.py:
##########
@@ -278,3 +299,243 @@ def 
test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:
     assert entry.file_sequence_number == 3
     assert entry.snapshot_id == 8744736658442914487
     assert entry.status == ManifestEntryStatus.ADDED
+
+
[email protected]("format_version", [1, 2])
+def test_write_manifest(generated_manifest_file_file_v1: str, 
generated_manifest_file_file_v2: str, format_version: int) -> None:
+    io = load_file_io()
+    snapshot = Snapshot(
+        snapshot_id=25,
+        parent_snapshot_id=19,
+        timestamp_ms=1602638573590,
+        manifest_list=generated_manifest_file_file_v1 if format_version == 1 
else generated_manifest_file_file_v2,
+        summary=Summary(Operation.APPEND),
+        schema_id=3,
+    )
+    demo_manifest_file = snapshot.manifests(io)[0]
+    manifest_entries = demo_manifest_file.fetch_manifest_entry(io)
+    test_schema = Schema(
+        NestedField(1, "VendorID", IntegerType(), False), NestedField(2, 
"tpep_pickup_datetime", IntegerType(), False)
+    )
+    test_spec = PartitionSpec(
+        PartitionField(source_id=1, field_id=1, transform=IdentityTransform(), 
name="VendorID"),
+        PartitionField(source_id=2, field_id=2, transform=IdentityTransform(), 
name="tpep_pickup_datetime"),
+        spec_id=demo_manifest_file.partition_spec_id,
+    )
+    with TemporaryDirectory() as tmpdir:
+        tmp_avro_file = tmpdir + "/test_write_manifest.avro"
+        output = io.new_output(tmp_avro_file)
+        with write_manifest(
+            format_version=format_version,  # type: ignore
+            spec=test_spec,
+            schema=test_schema,
+            output_file=output,
+            snapshot_id=8744736658442914487,
+        ) as writer:
+            for entry in manifest_entries:
+                writer.add_entry(entry)
+            new_manifest = writer.to_manifest_file()
+            with pytest.raises(RuntimeError):
+                writer.add_entry(manifest_entries[0])
+
+        expected_metadata = {
+            "schema": test_schema.json(),
+            "partition-spec": test_spec.json(),
+            "partition-spec-id": str(test_spec.spec_id),
+            "format-version": str(format_version),
+        }
+        if format_version == 2:
+            expected_metadata["content"] = "data"
+        _verify_metadata_with_fastavro(
+            tmp_avro_file,
+            expected_metadata,
+        )
+        new_manifest_entries = new_manifest.fetch_manifest_entry(io)
+
+        manifest_entry = new_manifest_entries[0]
+
+        assert manifest_entry.status == ManifestEntryStatus.ADDED
+        assert manifest_entry.snapshot_id == 8744736658442914487
+        assert manifest_entry.data_sequence_number == 0 if format_version == 1 
else 3
+        assert isinstance(manifest_entry.data_file, DataFile)
+
+        data_file = manifest_entry.data_file
+
+        assert data_file.content is DataFileContent.DATA
+        assert (
+            data_file.file_path
+            == 
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
+        )
+        assert data_file.file_format == FileFormat.PARQUET
+        assert repr(data_file.partition) == "Record[VendorID=1, 
tpep_pickup_datetime=1925]"

Review Comment:
   Can we also test this without the repr? This way we can check the types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to