This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 0c4b5b71 feat(table): skip validator registration for no-op producers
(#1022)
0c4b5b71 is described below
commit 0c4b5b7171d6882934ff0f0c8061484c175536c3
Author: Hectar <[email protected]>
AuthorDate: Thu May 7 16:49:01 2026 +0900
feat(table): skip validator registration for no-op producers (#1022)
## Summary
This PR gates validator registration on a new `needsValidation() bool`
method on the `producerImpl` interface:
- `fastAppendFiles` returns `false`: appends are commutative and need no
conflict checks
- `mergeAppendFiles` inherits `false` via embedding
- `overwriteFiles` returns `true`: real conflict validation required
- `snapshotProducer.commit()` skips registration when
`needsValidation()` is false
The no-op `validate()` methods are kept as belt-and-suspenders.
Behavior is locked with two unit tests asserting that fast-append and
merge-append commits leave `txn.validators` empty.
Parent: #830
---------
Signed-off-by: hectar-glitches <[email protected]>
---
table/commit_validation_test.go | 34 ++++++++++++++++++++++++++++++++++
table/producer_validate_test.go | 8 ++++----
table/snapshot_producers.go | 21 ++++++++++++++++-----
table/snapshot_producers_test.go | 2 ++
4 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/table/commit_validation_test.go b/table/commit_validation_test.go
index 232d8ebb..f805c298 100644
--- a/table/commit_validation_test.go
+++ b/table/commit_validation_test.go
@@ -147,3 +147,37 @@ func TestRowDelta_RegistersValidatorOnTransaction(t
*testing.T) {
require.NotEmpty(t, txn.validators,
"RowDelta.Commit must append at least one validator to the
transaction")
}
+
+// TestFastAppend_CommitLeavesValidatorsEmpty asserts that
+// fastAppendFiles.commit() does not register a validator on
+// txn.validators. Fast-appends are commutative and need no
+// conflict validation; accumulating no-op closures is wasteful.
+func TestFastAppend_CommitLeavesValidatorsEmpty(t *testing.T) {
+ trackIO := newTrackingIO()
+ spec := iceberg.NewPartitionSpec()
+ txn := createTestTransaction(t, trackIO, spec)
+
+ sp := newFastAppendFilesProducer(OpAppend, txn, trackIO, nil, nil)
+ sp.appendDataFile(newTestDataFile(t, spec, "file://data.parquet", nil))
+
+ _, _, err := sp.commit(context.Background())
+ require.NoError(t, err)
+ assert.Empty(t, txn.validators,
+ "fastAppendFiles.commit must not register a validator on the
transaction")
+}
+
+// TestMergeAppend_CommitLeavesValidatorsEmpty pins the same behaviour
+// for mergeAppendFiles, which inherits needsValidation() via embedding.
+func TestMergeAppend_CommitLeavesValidatorsEmpty(t *testing.T) {
+ trackIO := newTrackingIO()
+ spec := iceberg.NewPartitionSpec()
+ txn := createTestTransaction(t, trackIO, spec)
+
+ sp := newMergeAppendFilesProducer(OpAppend, txn, trackIO, nil, nil)
+ sp.appendDataFile(newTestDataFile(t, spec, "file://data.parquet", nil))
+
+ _, _, err := sp.commit(context.Background())
+ require.NoError(t, err)
+ assert.Empty(t, txn.validators,
+ "mergeAppendFiles.commit must not register a validator on the
transaction")
+}
diff --git a/table/producer_validate_test.go b/table/producer_validate_test.go
index 65bd6c1a..6ac1840f 100644
--- a/table/producer_validate_test.go
+++ b/table/producer_validate_test.go
@@ -108,13 +108,13 @@ func TestFastAppendFiles_ValidateNoop(t *testing.T) {
require.NoError(t, fa.validate(newEmptyConflictContext(t)))
}
-// mergeAppendFiles embeds fastAppendFiles; the validator promotion
-// makes it satisfy producerImpl without an explicit method. Pin the
-// behavior so a future refactor doesn't silently drop it.
-func TestMergeAppendFiles_InheritsFastAppendValidator(t *testing.T) {
+// mergeAppendFiles has its own explicit needsValidation() and validate()
+// methods. Pin the behavior so a future refactor doesn't silently drop it.
+func TestMergeAppendFiles_ValidateNoop(t *testing.T) {
ma := &mergeAppendFiles{}
require.NoError(t, ma.validate(nil))
require.NoError(t, ma.validate(newEmptyConflictContext(t)))
+ require.False(t, ma.needsValidation())
var _ producerImpl = ma
}
diff --git a/table/snapshot_producers.go b/table/snapshot_producers.go
index 85c15f35..9d0f052b 100644
--- a/table/snapshot_producers.go
+++ b/table/snapshot_producers.go
@@ -55,6 +55,11 @@ type producerImpl interface {
// producers that are safe against concurrent appends (fast-append
// and merge-append).
validate(cc *conflictContext) error
+ // needsValidation reports whether this producer's validate method
+ // performs real conflict checks. Return false only if validate is
+ // unconditionally a no-op; commit() skips validator registration
+ // entirely when this returns false, so validate will never run.
+ needsValidation() bool
}
func newManifestFileName(num int, commit uuid.UUID) string {
@@ -112,6 +117,8 @@ func (fa *fastAppendFiles) validate(_ *conflictContext)
error {
return nil
}
+func (fa *fastAppendFiles) needsValidation() bool { return false }
+
type overwriteFiles struct {
base *snapshotProducer
@@ -324,6 +331,8 @@ func (of *overwriteFiles) deletedEntries(ctx
context.Context) ([]iceberg.Manifes
return finalResult, nil
}
+func (of *overwriteFiles) needsValidation() bool { return true }
+
type manifestMergeManager struct {
targetSizeBytes int
minCountToMerge int
@@ -498,6 +507,8 @@ func (m *mergeAppendFiles) processManifests(manifests
[]iceberg.ManifestFile) ([
return append(result, unmergedDeleteManifests...), nil
}
+func (m *mergeAppendFiles) needsValidation() bool { return false }
+
type snapshotProducer struct {
producerImpl
@@ -893,11 +904,11 @@ func (sp *snapshotProducer) commit(ctx context.Context)
(_ []Update, _ []Require
// transaction. doCommit runs it against the current catalog state
// before cat.CommitTable so conflicts the catalog can't see
// (partition-filter overlap, referenced-file removal) are caught
- // pre-flight. Fast/merge-append producers return nil here and
- // pay only the no-op-closure cost. Nil producerImpl is possible
- // only in unit tests that exercise commit() directly on a bare
- // snapshotProducer; guard to keep those tests green.
- if impl := sp.producerImpl; impl != nil {
+ // pre-flight. Producers that are commutative against concurrent
+ // appends (fast-append, merge-append) opt out via needsValidation()
+ // == false and skip registration entirely. The nil guard remains for
+ // unit tests that drive commit() on a bare snapshotProducer.
+ if impl := sp.producerImpl; impl != nil && impl.needsValidation() {
sp.txn.validators = append(sp.txn.validators, func(cc
*conflictContext) error {
return impl.validate(cc)
})
diff --git a/table/snapshot_producers_test.go b/table/snapshot_producers_test.go
index d9d31bf8..03087f70 100644
--- a/table/snapshot_producers_test.go
+++ b/table/snapshot_producers_test.go
@@ -724,6 +724,8 @@ func (e *errorOnDeletedEntries) validate(_
*conflictContext) error {
return nil
}
+func (e *errorOnDeletedEntries) needsValidation() bool { return true }
+
// blockingTrackingIO extends trackingIO to signal when a writer is created.
type blockingTrackingIO struct {
*trackingIO