This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new dfc5851d fix(manifest): allow ManifestListWriter to reference 
lower-version manifests (#1030)
dfc5851d is described below

commit dfc5851d936707925d7ca0d9b33cccad5da93bb4
Author: Ashley Jeffs <[email protected]>
AuthorDate: Wed May 6 20:34:54 2026 +0100

    fix(manifest): allow ManifestListWriter to reference lower-version 
manifests (#1030)
    
    ## Problem
    
    `ManifestListWriter.AddManifests` requires every input manifest file's
    version to match the writer's version exactly, which contradicts the
    Iceberg spec: a v2 manifest list must be able to reference v1 manifest
    files so that v1 tables can be upgraded without rewriting historical
    manifests. Java's
    
[`ManifestListWriter`](https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestListWriter.java)
    handles this; `iceberg-go` does not.
    
    The practical symptom for downstream users is that any commit to a table
    whose metadata has been upgraded from v1 to v2 — whether via
    `Transaction.UpgradeFormatVersion` or out-of-band by another engine —
    fails with `invalid argument: ManifestListWriter only supports version 2
    manifest files`. The v1 manifest files written before the upgrade are
    never rewritten and continue to surface via
    `snapshotProducer.existingManifests()` on every subsequent commit, so
    the failure is permanent.
    
    ## Fix
    
    `manifest.go:1410-1414`: replace the `file.Version() != m.version`
    exact-match gate with `file.Version() > m.version`. Newer-than-writer
    inputs are still rejected — the v2 entry schema has no place for v3
    fields such as `first_row_id`, and accepting them would silently drop
    data. Lower-than-writer inputs are accepted because the in-memory
    `manifestFile` produced from a v1 input (via `manifestFileV1.toFile()`
    or `NewManifestFile(1, ...)`) already carries the spec's inheritance
    values — `Content = data` and `SeqNumber = MinSeqNumber = 0` — so it can
    be encoded directly against the v2/v3 entry schema. The existing
    `first_row_id` assignment for data manifests with `FirstRowIDValue ==
    nil` covers v1 inputs in v3 lists without further changes.
    
    ## Testing
    
    - `TestV2ManifestListAcceptsV1Manifests` — round-trips a v1 manifest
    through a v2 manifest list and asserts that the decoded entry has
    `content = data` and `sequence_number = min_sequence_number = 0` per
    spec inheritance.
    - `TestV3ManifestListAcceptsV1AndV2Manifests` — writes a v1 data
    manifest and a v2 delete manifest into a v3 list; asserts inheritance
    for the v1 entry, asserts that `first_row_id` is assigned to the v1 data
    manifest and is left unset on the v2 delete manifest (data-only
    assignment per the existing v3 writer rules).
    - `TestV2ManifestListRejectsV3Manifests` — confirms the downgrade
    direction is still blocked.
    - `TestWriteManifestListClosesWriterOnError` — existing test updated to
    drive its `AddManifests` failure path through a v3-in-v2 input (still
    rejected) instead of v1-in-v2.
    
    Confirmed failing on `main` before the fix and passing after. `go test
    ./...` passes.
    
    Fixes #1029
---
 manifest.go      | 14 ++++++++--
 manifest_test.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/manifest.go b/manifest.go
index 602586f6..3de9c128 100644
--- a/manifest.go
+++ b/manifest.go
@@ -1488,8 +1488,18 @@ func (m *ManifestListWriter) AddManifests(files 
[]ManifestFile) error {
 
        case 2, 3:
                for _, file := range files {
-                       if file.Version() != m.version {
-                               return fmt.Errorf("%w: ManifestListWriter only 
supports version %d manifest files", ErrInvalidArgument, m.version)
+                       // Per the Iceberg spec a v2 manifest list may 
reference v1 manifest
+                       // files (and a v3 list may reference v1 or v2 
manifests) so that a
+                       // table can be upgraded without rewriting historical 
manifests. The
+                       // in-memory ManifestFile produced for v1 inputs 
already carries the
+                       // inheritance values mandated by the spec — 
Content=data and
+                       // SeqNumber/MinSeqNumber=0 — so it can be encoded 
directly against
+                       // the v2/v3 entry schema. Newer-than-writer inputs are 
rejected
+                       // because the v2 schema cannot represent v3 fields 
such as
+                       // first_row_id.
+                       if file.Version() > m.version {
+                               return fmt.Errorf("%w: manifest list v%d cannot 
reference v%d manifest files",
+                                       ErrInvalidArgument, m.version, 
file.Version())
                        }
 
                        wrapped := *(file.(*manifestFile))
diff --git a/manifest_test.go b/manifest_test.go
index 36966d45..1bf944e8 100644
--- a/manifest_test.go
+++ b/manifest_test.go
@@ -1972,6 +1972,9 @@ func (w *limitedWriter) Write(p []byte) (int, error) {
 }
 
 func (m *ManifestTestSuite) TestWriteManifestListClosesWriterOnError() {
+       // A v2 manifest list cannot reference v3 manifests because the v2 entry
+       // schema has no first_row_id column; this gives us a deterministic
+       // AddManifests failure to assert that Close still flushes.
        seqNum := int64(7)
        var header bytes.Buffer
        writer, err := NewManifestListWriterV2(&header, snapshotID, seqNum, nil)
@@ -1981,13 +1984,88 @@ func (m *ManifestTestSuite) 
TestWriteManifestListClosesWriterOnError() {
        out := &limitedWriter{limit: header.Len(), err: errLimitedWrite}
        err = WriteManifestList(2, out, snapshotID, nil, &seqNum, 0, 
[]ManifestFile{
                manifestFileRecordsV2[0],
-               manifestFileRecordsV1[0],
+               manifestFileRecordsV3[0],
        })
        m.Require().Error(err)
-       m.Require().ErrorContains(err, "ManifestListWriter only supports 
version 2 manifest files")
+       m.Require().ErrorContains(err, "manifest list v2 cannot reference v3 
manifest files")
        m.Require().ErrorIs(err, errLimitedWrite)
 }
 
+// TestV2ManifestListAcceptsV1Manifests verifies the spec-mandated upgrade 
path:
+// a v2 manifest list must be able to reference v1 manifest files written 
before
+// the table was upgraded, with sequence_number and min_sequence_number both
+// inherited as 0 and content inherited as data.
+func (m *ManifestTestSuite) TestV2ManifestListAcceptsV1Manifests() {
+       seqNum := int64(42)
+       var buf bytes.Buffer
+
+       err := WriteManifestList(2, &buf, snapshotID, nil, &seqNum, 0, 
[]ManifestFile{
+               manifestFileRecordsV1[0],
+       })
+       m.Require().NoError(err)
+
+       got, err := ReadManifestList(&buf)
+       m.Require().NoError(err)
+       m.Require().Len(got, 1)
+
+       entry := got[0]
+       m.Equal(manifestFileRecordsV1[0].FilePath(), entry.FilePath())
+       m.Equal(manifestFileRecordsV1[0].Length(), entry.Length())
+       m.Equal(ManifestContentData, entry.ManifestContent(), "v1 inheritance: 
content must be data")
+       m.Equal(int64(0), entry.SequenceNum(), "v1 inheritance: sequence_number 
must be 0")
+       m.Equal(int64(0), entry.MinSequenceNum(), "v1 inheritance: 
min_sequence_number must be 0")
+       m.Equal(manifestFileRecordsV1[0].AddedRows(), entry.AddedRows())
+}
+
+// TestV3ManifestListAcceptsV1AndV2Manifests verifies that a v3 manifest list
+// can reference both v1 and v2 manifest files (e.g. after a v1->v3 upgrade)
+// and that first_row_id is assigned to data manifests during the write.
+func (m *ManifestTestSuite) TestV3ManifestListAcceptsV1AndV2Manifests() {
+       seqNum := int64(7)
+       firstRowID := int64(1000)
+       var buf bytes.Buffer
+
+       err := WriteManifestList(3, &buf, snapshotID, nil, &seqNum, firstRowID, 
[]ManifestFile{
+               manifestFileRecordsV1[0],
+               manifestFileRecordsV2[0],
+       })
+       m.Require().NoError(err)
+
+       got, err := ReadManifestList(&buf)
+       m.Require().NoError(err)
+       m.Require().Len(got, 2)
+
+       v1Entry := got[0]
+       m.Equal(manifestFileRecordsV1[0].FilePath(), v1Entry.FilePath())
+       m.Equal(ManifestContentData, v1Entry.ManifestContent())
+       m.Equal(int64(0), v1Entry.SequenceNum())
+       m.Equal(int64(0), v1Entry.MinSequenceNum())
+       m.Require().NotNil(v1Entry.FirstRowID(), "v3 list must assign 
first_row_id for data manifests")
+       m.Equal(firstRowID, *v1Entry.FirstRowID())
+
+       v2Entry := got[1]
+       m.Equal(manifestFileRecordsV2[0].FilePath(), v2Entry.FilePath())
+       // manifestFileRecordsV2[0] is a delete manifest, so first_row_id is not
+       // assigned (assignment is data-only per the v3 ManifestListWriter 
rules).
+       m.Equal(ManifestContentDeletes, v2Entry.ManifestContent())
+       m.Nil(v2Entry.FirstRowID(), "delete manifests must not be assigned 
first_row_id")
+}
+
+// TestV2ManifestListRejectsV3Manifests confirms that a v2 manifest list still
+// refuses to reference v3 manifest files, since the v2 entry schema has no
+// place to record first_row_id and accepting them would silently drop data.
+func (m *ManifestTestSuite) TestV2ManifestListRejectsV3Manifests() {
+       seqNum := int64(1)
+       var buf bytes.Buffer
+
+       err := WriteManifestList(2, &buf, snapshotID, nil, &seqNum, 0, 
[]ManifestFile{
+               manifestFileRecordsV3[0],
+       })
+       m.Require().Error(err)
+       m.Require().ErrorIs(err, ErrInvalidArgument)
+       m.Require().ErrorContains(err, "manifest list v2 cannot reference v3 
manifest files")
+}
+
 // TestManifestRoundTripSortOrderID verifies that a sort_order_id written onto
 // a data file survives an avro manifest round-trip (write → read). This
 // backs the end-to-end guarantee that callers of WriteTask/WriteFileInfo see

Reply via email to