This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new dc7a9c38 fix(table): AddFiles() should respect 
commit.manifest-merge.enabled (#806)
dc7a9c38 is described below

commit dc7a9c384ad425979543e70cbd5205c5f02e40a3
Author: Rudy <[email protected]>
AuthorDate: Wed Mar 25 13:18:44 2026 -0700

    fix(table): AddFiles() should respect commit.manifest-merge.enabled (#806)
    
    ## Summary
    
    Closes #805
    
    `AddFiles()` hardcodes `.fastAppend()` at line 780, bypassing
    `appendSnapshotProducer()`
    which reads `commit.manifest-merge.enabled`. Setting this property has
    no effect when
    using `AddFiles()` — manifests accumulate one-per-commit indefinitely
    regardless of the
    property value.
    
    ---
    
    ## Root Cause
    
    `appendSnapshotProducer()` already exists at line 126 and correctly
    reads the property.
    It is used by `AddDataFiles()` and `Append()` but was missed in
    `AddFiles()`:
    
    ```go
    // transaction.go line 780 — BEFORE (hardcoded, ignores property):
    updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
    
    // AFTER (same as AddDataFiles() and Append()):
    updater := t.appendSnapshotProducer(fs, snapshotProps)
    ```
    
    | Method | Uses `appendSnapshotProducer()` |
    |---|---|
    | `AddDataFiles()` | ✅ yes |
    | `Append()` | ✅ yes |
    | `AddFiles()` | ❌ hardcoded `.fastAppend()` — **this bug** |
    
    ---
    
    ## Java Reference
    
    `BaseTable.newAppend()` always returns `MergeAppend`.
    `MANIFEST_MERGE_ENABLED_DEFAULT = true`
    in the Java implementation. The Go implementation should behave
    consistently with the
    reference implementation.
    
    ---
    
    ## Test Gap
    
    Three existing tests set `commit.manifest-merge.enabled=true` but none
    call `AddFiles()`:
    
    | Test | File | Uses `AddFiles()`? |
    |---|---|---|
    | `TestCommitV3RowLineageDeltaIncludesExistingRows` |
    `snapshot_producers_test.go` | ❌ calls `newMergeAppendFilesProducer()`
    directly |
    | `TestMergeManifests` | `table_test.go` | ❌ uses `AppendTable()` →
    `Append()` |
    | `TestSetProperties` | `transaction_test.go` | ❌ only sets the
    property, never appends |
    
    The new regression test `table/addfiles_merge_regression_test.go`
    exercises `AddFiles()`
    directly with `commit.manifest-merge.enabled=true` and verifies the
    merge fires.
    It can be moved into `transaction_test.go` if preferred by reviewers.
    
    ---
    
    ## Changes
    
    - **`table/transaction.go`** — 1-line fix: use
    `appendSnapshotProducer()` instead of hardcoded `.fastAppend()`
    - **`table/addfiles_merge_regression_test.go`** — regression test
    proving the bug (fails before fix, passes after)
    
    ---
    
    ## Verification
    
    **Regression test:**
    - 3 sequential `AddFiles()` commits with `minCountToMerge=2`
    - Before fix: 3 manifests (merge never fires)
    - After fix: 1 merged manifest (merge fires on commit 3)
    
    **Production verification** on a table with 1 commit/min:
    - Before fix: HEAD snapshot accumulated **1121 manifests** after 18
    hours (one per commit, never merged)
    - After fix: HEAD snapshot has **16 manifests** after the next commit
    (merge fired)
    
    ---
    
    ## Note on AI Assistance
    
    This PR was developed with AI assistance (Claude). The author has
    verified the
    implementation, Java reference behavior, test coverage gap, and
    production impact end-to-end.
---
 table/addfiles_merge_regression_test.go | 162 ++++++++++++++++++++++++++++++++
 table/transaction.go                    |   2 +-
 2 files changed, 163 insertions(+), 1 deletion(-)

diff --git a/table/addfiles_merge_regression_test.go 
b/table/addfiles_merge_regression_test.go
new file mode 100644
index 00000000..00ab419b
--- /dev/null
+++ b/table/addfiles_merge_regression_test.go
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table_test
+
+// Regression test for: AddFiles() ignoring commit.manifest-merge.enabled.
+//
+// Root cause: transaction.go hardcoded .fastAppend() in AddFiles(), bypassing
+// appendSnapshotProducer() which reads ManifestMergeEnabledKey.
+//
+// Java reference: BaseTable.newAppend() always uses MergeAppend (merge 
default=true).
+// AddDataFiles() and Append() in Go both correctly call 
appendSnapshotProducer().
+// Only AddFiles() was missing the call.
+//
+// Fix: change line 780 in transaction.go from:
+//
+//     updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
+//
+// to:
+//
+//     updater := t.appendSnapshotProducer(fs, snapshotProps)
+
+import (
+       "context"
+       "fmt"
+       "testing"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/array"
+       "github.com/apache/arrow-go/v18/arrow/memory"
+       "github.com/apache/arrow-go/v18/parquet/pqarrow"
+       "github.com/apache/iceberg-go"
+       iceio "github.com/apache/iceberg-go/io"
+       "github.com/apache/iceberg-go/table"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
+)
+
+// mergeCatalog is a minimal in-memory catalog that supports sequential commits
+// by applying updates to its stored metadata on each CommitTable call.
+type mergeCatalog struct {
+       meta table.Metadata
+}
+
+func (c *mergeCatalog) LoadTable(_ context.Context, _ table.Identifier) 
(*table.Table, error) {
+       return nil, nil
+}
+
+func (c *mergeCatalog) CommitTable(_ context.Context, _ table.Identifier, _ 
[]table.Requirement, updates []table.Update) (table.Metadata, string, error) {
+       meta, err := table.UpdateTableMetadata(c.meta, updates, "")
+       if err != nil {
+               return nil, "", err
+       }
+
+       c.meta = meta
+
+       return meta, "", nil
+}
+
+// TestAddFilesRespectsMergeEnabled verifies that AddFiles() respects the
+// commit.manifest-merge.enabled table property.
+//
+// Before fix: 3 sequential AddFiles() commits produce 3 manifests because
+// fastAppend() is hardcoded and the merge property is ignored.
+//
+// After fix: with minCountToMerge=2, the 3rd commit triggers a merge and
+// produces 1 merged manifest.
+func TestAddFilesRespectsMergeEnabled(t *testing.T) {
+       ctx := context.Background()
+       dir := t.TempDir()
+       fs := iceio.LocalFS{}
+
+       // Minimal Arrow schema matching the Iceberg schema below.
+       arrowSchema := arrow.NewSchema([]arrow.Field{
+               {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: false},
+       }, nil)
+
+       // writeParquet creates a single-row Parquet file at the given path.
+       writeParquet := func(path string) {
+               bldr := array.NewInt32Builder(memory.DefaultAllocator)
+               defer bldr.Release()
+
+               bldr.AppendValues([]int32{1}, nil)
+               col := bldr.NewArray()
+               defer col.Release()
+
+               rec := array.NewRecordBatch(arrowSchema, []arrow.Array{col}, 1)
+               defer rec.Release()
+
+               arrTbl := array.NewTableFromRecords(arrowSchema, 
[]arrow.RecordBatch{rec})
+               defer arrTbl.Release()
+
+               fo, err := fs.Create(path)
+               require.NoError(t, err)
+
+               require.NoError(t, pqarrow.WriteTable(arrTbl, fo, 
arrTbl.NumRows(),
+                       nil, pqarrow.DefaultWriterProps()))
+       }
+
+       schema := iceberg.NewSchema(1,
+               iceberg.NestedField{ID: 1, Name: "id", Type: 
iceberg.PrimitiveTypes.Int32, Required: true},
+       )
+
+       // Create table with manifest merging enabled and minCountToMerge=2.
+       // With minCount=2, a commit that accumulates >2 manifests triggers a 
merge.
+       meta, err := table.NewMetadata(schema, iceberg.UnpartitionedSpec,
+               table.UnsortedSortOrder, dir, iceberg.Properties{
+                       table.ManifestMergeEnabledKey:  "true",
+                       table.ManifestMinMergeCountKey: "2",
+               })
+       require.NoError(t, err)
+
+       cat := &mergeCatalog{meta: meta}
+       ident := table.Identifier{"default", "test_addfiles_merge"}
+       tbl := table.New(ident, meta, dir+"/metadata/00000.json",
+               func(_ context.Context) (iceio.IO, error) { return fs, nil },
+               cat,
+       )
+
+       // Perform 3 separate AddFiles commits — simulating the committer's 
1-per-batch pattern.
+       // minCountToMerge=2 means the 3rd commit should trigger a merge.
+       const numCommits = 3
+       for i := range numCommits {
+               filePath := fmt.Sprintf("%s/data-%d.parquet", dir, i)
+               writeParquet(filePath)
+
+               txn := tbl.NewTransaction()
+               require.NoError(t, txn.AddFiles(ctx, []string{filePath}, nil, 
false),
+                       "AddFiles commit %d failed", i+1)
+
+               tbl, err = txn.Commit(ctx)
+               require.NoError(t, err, "Commit %d failed", i+1)
+       }
+
+       snap := tbl.CurrentSnapshot()
+       require.NotNil(t, snap, "table should have a current snapshot after 
commits")
+
+       manifests, err := snap.Manifests(fs)
+       require.NoError(t, err, "reading manifests from HEAD snapshot failed")
+
+       // After fix: merge fires on commit 3 (accumulated 3 > minCount 2) → 1 
merged manifest.
+       // Before fix: 3 manifests (fastAppend carries all forward without 
merging).
+       assert.Equal(t, 1, len(manifests),
+               "expected 1 merged manifest after %d AddFiles commits with 
manifest merge enabled "+
+                       "(minCountToMerge=2), got %d manifests — "+
+                       "AddFiles() is not respecting 
commit.manifest-merge.enabled",
+               numCommits, len(manifests))
+}
diff --git a/table/transaction.go b/table/transaction.go
index 655ee748..51776a3e 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -798,7 +798,7 @@ func (t *Transaction) AddFiles(ctx context.Context, 
filePaths []string, snapshot
                return err
        }
 
-       updater := t.updateSnapshot(fs, snapshotProps, OpAppend).fastAppend()
+       updater := t.appendSnapshotProducer(fs, snapshotProps)
 
        dataFiles, err := filesToDataFiles(ctx, fs, t.meta, filePaths, 
addFilesOp.concurrency)
        if err != nil {

Reply via email to