rdblue commented on code in PR #7778:
URL: https://github.com/apache/iceberg/pull/7778#discussion_r1219847972


##########
python/pyiceberg/manifest.py:
##########
@@ -283,7 +298,49 @@ def files(input_file: InputFile) -> Iterator[DataFile]:
 
 
 def read_manifest_list(input_file: InputFile) -> Iterator[ManifestFile]:
+    """
+    Reads the manifests from the manifest list
+
+    Args:
+        input_file: The input file where the stream can be read from
+
+    Returns:
+        An iterator of ManifestFiles that are part of the list
+    """
     with AvroFile[ManifestFile](
-        input_file, MANIFEST_FILE_SCHEMA, {-1: ManifestFile, 508: 
PartitionFieldSummary}, {517: ManifestContent}
+        input_file,
+        MANIFEST_FILE_SCHEMA,
+        read_types={-1: ManifestFile, 508: PartitionFieldSummary},
+        read_enums={517: ManifestContent},
     ) as reader:
         yield from reader
+
+
+def _inherit_sequence_number(entry: ManifestEntry, manifest: ManifestFile) -> 
ManifestEntry:
+    """Inherits the sequence numbers
+
+    More information in the spec: 
https://iceberg.apache.org/spec/#sequence-number-inheritance
+
+    Args:
+        entry: The manifest entry that has null sequence numbers
+        manifest: The manifest that has a sequence number
+
+    Returns:
+        The manifest entry with the sequence numbers set
+    """
+    # The snapshot_id is required in V1, inherit with V2 when null
+    if entry.snapshot_id is None:
+        entry.snapshot_id = manifest.added_snapshot_id
+
+    # in v1 tables, the data sequence number is not persisted and can be 
safely defaulted to 0
+    # in v2 tables, the data sequence number should be inherited iff the entry 
status is ADDED
+    if entry.sequence_number is None and (manifest.sequence_number == 0 or 
entry.status == ManifestEntryStatus.ADDED):
+        entry.sequence_number = manifest.sequence_number

Review Comment:
   We may want to rename this to `data_sequence_number` since that was recently 
done in the JVM implementation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to