This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new dfc5851d fix(manifest): allow ManifestListWriter to reference
lower-version manifests (#1030)
dfc5851d is described below
commit dfc5851d936707925d7ca0d9b33cccad5da93bb4
Author: Ashley Jeffs <[email protected]>
AuthorDate: Wed May 6 20:34:54 2026 +0100
fix(manifest): allow ManifestListWriter to reference lower-version
manifests (#1030)
## Problem
`ManifestListWriter.AddManifests` requires every input manifest file's
version to match the writer's version exactly, which contradicts the
Iceberg spec: a v2 manifest list must be able to reference v1 manifest
files so that v1 tables can be upgraded without rewriting historical
manifests. Java's
[`ManifestListWriter`](https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/ManifestListWriter.java)
handles this; `iceberg-go` does not.
The practical symptom for downstream users is that any commit to a table
whose metadata has been upgraded from v1 to v2 — whether via
`Transaction.UpgradeFormatVersion` or out-of-band by another engine —
fails with `invalid argument: ManifestListWriter only supports version 2
manifest files`. The v1 manifest files written before the upgrade are
never rewritten and continue to surface via
`snapshotProducer.existingManifests()` on every subsequent commit, so
the failure is permanent.
## Fix
`manifest.go:1410-1414`: replace the `file.Version() != m.version`
exact-match gate with `file.Version() > m.version`. Newer-than-writer
inputs are still rejected — the v2 entry schema has no place for v3
fields such as `first_row_id`, and accepting them would silently drop
data. Lower-than-writer inputs are accepted because the in-memory
`manifestFile` produced from a v1 input (via `manifestFileV1.toFile()`
or `NewManifestFile(1, ...)`) already carries the spec's inheritance
values — `Content = data` and `SeqNumber = MinSeqNumber = 0` — so it can
be encoded directly against the v2/v3 entry schema. The existing
`first_row_id` assignment for data manifests with `FirstRowIDValue ==
nil` covers v1 inputs in v3 lists without further changes.
## Testing
- `TestV2ManifestListAcceptsV1Manifests` — round-trips a v1 manifest
through a v2 manifest list and asserts that the decoded entry has
`content = data` and `sequence_number = min_sequence_number = 0` per
spec inheritance.
- `TestV3ManifestListAcceptsV1AndV2Manifests` — writes a v1 data
manifest and a v2 delete manifest into a v3 list; asserts inheritance
for the v1 entry, asserts that `first_row_id` is assigned to the v1 data
manifest and is left unset on the v2 delete manifest (data-only
assignment per the existing v3 writer rules).
- `TestV2ManifestListRejectsV3Manifests` — confirms the downgrade
direction is still blocked.
- `TestWriteManifestListClosesWriterOnError` — existing test updated to
drive its `AddManifests` failure path through a v3-in-v2 input (still
rejected) instead of v1-in-v2.
Confirmed failing on `main` before the fix and passing after. `go test
./...` passes.
Fixes #1029
---
manifest.go | 14 ++++++++--
manifest_test.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 92 insertions(+), 4 deletions(-)
diff --git a/manifest.go b/manifest.go
index 602586f6..3de9c128 100644
--- a/manifest.go
+++ b/manifest.go
@@ -1488,8 +1488,18 @@ func (m *ManifestListWriter) AddManifests(files
[]ManifestFile) error {
case 2, 3:
for _, file := range files {
- if file.Version() != m.version {
- return fmt.Errorf("%w: ManifestListWriter only
supports version %d manifest files", ErrInvalidArgument, m.version)
+ // Per the Iceberg spec a v2 manifest list may
reference v1 manifest
+ // files (and a v3 list may reference v1 or v2
manifests) so that a
+ // table can be upgraded without rewriting historical
manifests. The
+ // in-memory ManifestFile produced for v1 inputs
already carries the
+ // inheritance values mandated by the spec —
Content=data and
+ // SeqNumber/MinSeqNumber=0 — so it can be encoded
directly against
+ // the v2/v3 entry schema. Newer-than-writer inputs are
rejected
+ // because the v2 schema cannot represent v3 fields
such as
+ // first_row_id.
+ if file.Version() > m.version {
+ return fmt.Errorf("%w: manifest list v%d cannot
reference v%d manifest files",
+ ErrInvalidArgument, m.version,
file.Version())
}
wrapped := *(file.(*manifestFile))
diff --git a/manifest_test.go b/manifest_test.go
index 36966d45..1bf944e8 100644
--- a/manifest_test.go
+++ b/manifest_test.go
@@ -1972,6 +1972,9 @@ func (w *limitedWriter) Write(p []byte) (int, error) {
}
func (m *ManifestTestSuite) TestWriteManifestListClosesWriterOnError() {
+ // A v2 manifest list cannot reference v3 manifests because the v2 entry
+ // schema has no first_row_id column; this gives us a deterministic
+ // AddManifests failure to assert that Close still flushes.
seqNum := int64(7)
var header bytes.Buffer
writer, err := NewManifestListWriterV2(&header, snapshotID, seqNum, nil)
@@ -1981,13 +1984,88 @@ func (m *ManifestTestSuite)
TestWriteManifestListClosesWriterOnError() {
out := &limitedWriter{limit: header.Len(), err: errLimitedWrite}
err = WriteManifestList(2, out, snapshotID, nil, &seqNum, 0,
[]ManifestFile{
manifestFileRecordsV2[0],
- manifestFileRecordsV1[0],
+ manifestFileRecordsV3[0],
})
m.Require().Error(err)
- m.Require().ErrorContains(err, "ManifestListWriter only supports
version 2 manifest files")
+ m.Require().ErrorContains(err, "manifest list v2 cannot reference v3
manifest files")
m.Require().ErrorIs(err, errLimitedWrite)
}
+// TestV2ManifestListAcceptsV1Manifests verifies the spec-mandated upgrade
path:
+// a v2 manifest list must be able to reference v1 manifest files written
before
+// the table was upgraded, with sequence_number and min_sequence_number both
+// inherited as 0 and content inherited as data.
+func (m *ManifestTestSuite) TestV2ManifestListAcceptsV1Manifests() {
+ seqNum := int64(42)
+ var buf bytes.Buffer
+
+ err := WriteManifestList(2, &buf, snapshotID, nil, &seqNum, 0,
[]ManifestFile{
+ manifestFileRecordsV1[0],
+ })
+ m.Require().NoError(err)
+
+ got, err := ReadManifestList(&buf)
+ m.Require().NoError(err)
+ m.Require().Len(got, 1)
+
+ entry := got[0]
+ m.Equal(manifestFileRecordsV1[0].FilePath(), entry.FilePath())
+ m.Equal(manifestFileRecordsV1[0].Length(), entry.Length())
+ m.Equal(ManifestContentData, entry.ManifestContent(), "v1 inheritance:
content must be data")
+ m.Equal(int64(0), entry.SequenceNum(), "v1 inheritance: sequence_number
must be 0")
+ m.Equal(int64(0), entry.MinSequenceNum(), "v1 inheritance:
min_sequence_number must be 0")
+ m.Equal(manifestFileRecordsV1[0].AddedRows(), entry.AddedRows())
+}
+
+// TestV3ManifestListAcceptsV1AndV2Manifests verifies that a v3 manifest list
+// can reference both v1 and v2 manifest files (e.g. after a v1->v3 upgrade)
+// and that first_row_id is assigned to data manifests during the write.
+func (m *ManifestTestSuite) TestV3ManifestListAcceptsV1AndV2Manifests() {
+ seqNum := int64(7)
+ firstRowID := int64(1000)
+ var buf bytes.Buffer
+
+ err := WriteManifestList(3, &buf, snapshotID, nil, &seqNum, firstRowID,
[]ManifestFile{
+ manifestFileRecordsV1[0],
+ manifestFileRecordsV2[0],
+ })
+ m.Require().NoError(err)
+
+ got, err := ReadManifestList(&buf)
+ m.Require().NoError(err)
+ m.Require().Len(got, 2)
+
+ v1Entry := got[0]
+ m.Equal(manifestFileRecordsV1[0].FilePath(), v1Entry.FilePath())
+ m.Equal(ManifestContentData, v1Entry.ManifestContent())
+ m.Equal(int64(0), v1Entry.SequenceNum())
+ m.Equal(int64(0), v1Entry.MinSequenceNum())
+ m.Require().NotNil(v1Entry.FirstRowID(), "v3 list must assign
first_row_id for data manifests")
+ m.Equal(firstRowID, *v1Entry.FirstRowID())
+
+ v2Entry := got[1]
+ m.Equal(manifestFileRecordsV2[0].FilePath(), v2Entry.FilePath())
+ // manifestFileRecordsV2[0] is a delete manifest, so first_row_id is not
+ // assigned (assignment is data-only per the v3 ManifestListWriter
rules).
+ m.Equal(ManifestContentDeletes, v2Entry.ManifestContent())
+ m.Nil(v2Entry.FirstRowID(), "delete manifests must not be assigned
first_row_id")
+}
+
+// TestV2ManifestListRejectsV3Manifests confirms that a v2 manifest list still
+// refuses to reference v3 manifest files, since the v2 entry schema has no
+// place to record first_row_id and accepting them would silently drop data.
+func (m *ManifestTestSuite) TestV2ManifestListRejectsV3Manifests() {
+ seqNum := int64(1)
+ var buf bytes.Buffer
+
+ err := WriteManifestList(2, &buf, snapshotID, nil, &seqNum, 0,
[]ManifestFile{
+ manifestFileRecordsV3[0],
+ })
+ m.Require().Error(err)
+ m.Require().ErrorIs(err, ErrInvalidArgument)
+ m.Require().ErrorContains(err, "manifest list v2 cannot reference v3
manifest files")
+}
+
// TestManifestRoundTripSortOrderID verifies that a sort_order_id written onto
// a data file survives an avro manifest round-trip (write → read). This
// backs the end-to-end guarantee that callers of WriteTask/WriteFileInfo see