This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new dc7a9c38 fix(table): AddFiles() should respect
commit.manifest-merge.enabled (#806)
dc7a9c38 is described below
commit dc7a9c384ad425979543e70cbd5205c5f02e40a3
Author: Rudy <[email protected]>
AuthorDate: Wed Mar 25 13:18:44 2026 -0700
fix(table): AddFiles() should respect commit.manifest-merge.enabled (#806)
## Summary
Closes #805
`AddFiles()` hardcodes `.fastAppend()` at line 780, bypassing
`appendSnapshotProducer()`
which reads `commit.manifest-merge.enabled`. Setting this property has
no effect when
using `AddFiles()` — manifests accumulate one-per-commit indefinitely
regardless of the
property value.
---
## Root Cause
`appendSnapshotProducer()` already exists at line 126 and correctly
reads the property.
It is used by `AddDataFiles()` and `Append()` but was missed in
`AddFiles()`:
```go
// transaction.go line 780 — BEFORE (hardcoded, ignores property):
updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
// AFTER (same as AddDataFiles() and Append()):
updater := t.appendSnapshotProducer(fs, snapshotProps)
```
| Method | Uses `appendSnapshotProducer()` |
|---|---|
| `AddDataFiles()` | ✅ yes |
| `Append()` | ✅ yes |
| `AddFiles()` | ❌ hardcoded `.fastAppend()` — **this bug** |
---
## Java Reference
`BaseTable.newAppend()` always returns `MergeAppend`.
`MANIFEST_MERGE_ENABLED_DEFAULT = true`
in the Java implementation. The Go implementation should behave
consistently with the
reference implementation.
---
## Test Gap
Three existing tests set `commit.manifest-merge.enabled=true` but none
call `AddFiles()`:
| Test | File | Uses `AddFiles()`? |
|---|---|---|
| `TestCommitV3RowLineageDeltaIncludesExistingRows` |
`snapshot_producers_test.go` | ❌ calls `newMergeAppendFilesProducer()`
directly |
| `TestMergeManifests` | `table_test.go` | ❌ uses `AppendTable()` →
`Append()` |
| `TestSetProperties` | `transaction_test.go` | ❌ only sets the
property, never appends |
The new regression test `table/addfiles_merge_regression_test.go`
exercises `AddFiles()`
directly with `commit.manifest-merge.enabled=true` and verifies the
merge fires.
It can be moved into `transaction_test.go` if preferred by reviewers.
---
## Changes
- **`table/transaction.go`** — 1-line fix: use
`appendSnapshotProducer()` instead of hardcoded `.fastAppend()`
- **`table/addfiles_merge_regression_test.go`** — regression test
proving the bug (fails before fix, passes after)
---
## Verification
**Regression test:**
- 3 sequential `AddFiles()` commits with `minCountToMerge=2`
- Before fix: 3 manifests (merge never fires)
- After fix: 1 merged manifest (merge fires on commit 3)
**Production verification** on a table with 1 commit/min:
- Before fix: HEAD snapshot accumulated **1121 manifests** after 18
hours (one per commit, never merged)
- After fix: HEAD snapshot has **16 manifests** after the next commit
(merge fired)
---
## Note on AI Assistance
This PR was developed with AI assistance (Claude). The author has
verified the
implementation, Java reference behavior, test coverage gap, and
production impact end-to-end.
---
table/addfiles_merge_regression_test.go | 162 ++++++++++++++++++++++++++++++++
table/transaction.go | 2 +-
2 files changed, 163 insertions(+), 1 deletion(-)
diff --git a/table/addfiles_merge_regression_test.go
b/table/addfiles_merge_regression_test.go
new file mode 100644
index 00000000..00ab419b
--- /dev/null
+++ b/table/addfiles_merge_regression_test.go
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table_test
+
+// Regression test for: AddFiles() ignoring commit.manifest-merge.enabled.
+//
+// Root cause: transaction.go hardcoded .fastAppend() in AddFiles(), bypassing
+// appendSnapshotProducer() which reads ManifestMergeEnabledKey.
+//
+// Java reference: BaseTable.newAppend() always uses MergeAppend (merge
default=true).
+// AddDataFiles() and Append() in Go both correctly call
appendSnapshotProducer().
+// Only AddFiles() was missing the call.
+//
+// Fix: change line 780 in transaction.go from:
+//
+// updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
+//
+// to:
+//
+// updater := t.appendSnapshotProducer(fs, snapshotProps)
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ "github.com/apache/arrow-go/v18/arrow"
+ "github.com/apache/arrow-go/v18/arrow/array"
+ "github.com/apache/arrow-go/v18/arrow/memory"
+ "github.com/apache/arrow-go/v18/parquet/pqarrow"
+ "github.com/apache/iceberg-go"
+ iceio "github.com/apache/iceberg-go/io"
+ "github.com/apache/iceberg-go/table"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// mergeCatalog is a minimal in-memory catalog that supports sequential commits
+// by applying updates to its stored metadata on each CommitTable call.
+type mergeCatalog struct {
+ meta table.Metadata
+}
+
+func (c *mergeCatalog) LoadTable(_ context.Context, _ table.Identifier)
(*table.Table, error) {
+ return nil, nil
+}
+
+func (c *mergeCatalog) CommitTable(_ context.Context, _ table.Identifier, _
[]table.Requirement, updates []table.Update) (table.Metadata, string, error) {
+ meta, err := table.UpdateTableMetadata(c.meta, updates, "")
+ if err != nil {
+ return nil, "", err
+ }
+
+ c.meta = meta
+
+ return meta, "", nil
+}
+
+// TestAddFilesRespectsMergeEnabled verifies that AddFiles() respects the
+// commit.manifest-merge.enabled table property.
+//
+// Before fix: 3 sequential AddFiles() commits produce 3 manifests because
+// fastAppend() is hardcoded and the merge property is ignored.
+//
+// After fix: with minCountToMerge=2, the 3rd commit triggers a merge and
+// produces 1 merged manifest.
+func TestAddFilesRespectsMergeEnabled(t *testing.T) {
+ ctx := context.Background()
+ dir := t.TempDir()
+ fs := iceio.LocalFS{}
+
+ // Minimal Arrow schema matching the Iceberg schema below.
+ arrowSchema := arrow.NewSchema([]arrow.Field{
+ {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: false},
+ }, nil)
+
+ // writeParquet creates a single-row Parquet file at the given path.
+ writeParquet := func(path string) {
+ bldr := array.NewInt32Builder(memory.DefaultAllocator)
+ defer bldr.Release()
+
+ bldr.AppendValues([]int32{1}, nil)
+ col := bldr.NewArray()
+ defer col.Release()
+
+ rec := array.NewRecordBatch(arrowSchema, []arrow.Array{col}, 1)
+ defer rec.Release()
+
+ arrTbl := array.NewTableFromRecords(arrowSchema,
[]arrow.RecordBatch{rec})
+ defer arrTbl.Release()
+
+ fo, err := fs.Create(path)
+ require.NoError(t, err)
+
+ require.NoError(t, pqarrow.WriteTable(arrTbl, fo,
arrTbl.NumRows(),
+ nil, pqarrow.DefaultWriterProps()))
+ }
+
+ schema := iceberg.NewSchema(1,
+ iceberg.NestedField{ID: 1, Name: "id", Type:
iceberg.PrimitiveTypes.Int32, Required: true},
+ )
+
+ // Create table with manifest merging enabled and minCountToMerge=2.
+ // With minCount=2, a commit that accumulates >2 manifests triggers a
merge.
+ meta, err := table.NewMetadata(schema, iceberg.UnpartitionedSpec,
+ table.UnsortedSortOrder, dir, iceberg.Properties{
+ table.ManifestMergeEnabledKey: "true",
+ table.ManifestMinMergeCountKey: "2",
+ })
+ require.NoError(t, err)
+
+ cat := &mergeCatalog{meta: meta}
+ ident := table.Identifier{"default", "test_addfiles_merge"}
+ tbl := table.New(ident, meta, dir+"/metadata/00000.json",
+ func(_ context.Context) (iceio.IO, error) { return fs, nil },
+ cat,
+ )
+
+ // Perform 3 separate AddFiles commits — simulating the committer's
1-per-batch pattern.
+ // minCountToMerge=2 means the 3rd commit should trigger a merge.
+ const numCommits = 3
+ for i := range numCommits {
+ filePath := fmt.Sprintf("%s/data-%d.parquet", dir, i)
+ writeParquet(filePath)
+
+ txn := tbl.NewTransaction()
+ require.NoError(t, txn.AddFiles(ctx, []string{filePath}, nil,
false),
+ "AddFiles commit %d failed", i+1)
+
+ tbl, err = txn.Commit(ctx)
+ require.NoError(t, err, "Commit %d failed", i+1)
+ }
+
+ snap := tbl.CurrentSnapshot()
+ require.NotNil(t, snap, "table should have a current snapshot after
commits")
+
+ manifests, err := snap.Manifests(fs)
+ require.NoError(t, err, "reading manifests from HEAD snapshot failed")
+
+ // After fix: merge fires on commit 3 (accumulated 3 > minCount 2) → 1
merged manifest.
+ // Before fix: 3 manifests (fastAppend carries all forward without
merging).
+ assert.Equal(t, 1, len(manifests),
+ "expected 1 merged manifest after %d AddFiles commits with
manifest merge enabled "+
+ "(minCountToMerge=2), got %d manifests — "+
+ "AddFiles() is not respecting
commit.manifest-merge.enabled",
+ numCommits, len(manifests))
+}
diff --git a/table/transaction.go b/table/transaction.go
index 655ee748..51776a3e 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -798,7 +798,7 @@ func (t *Transaction) AddFiles(ctx context.Context,
filePaths []string, snapshot
return err
}
- updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
+ updater := t.appendSnapshotProducer(fs, snapshotProps)
dataFiles, err := filesToDataFiles(ctx, fs, t.meta, filePaths,
addFilesOp.concurrency)
if err != nil {