This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 95df02cc fix: replace panics with proper error returns (#758)
95df02cc is described below

commit 95df02cc2eafc08e9ea5b18b9c9c60bdfba4e3e5
Author: Shubhendu <[email protected]>
AuthorDate: Fri Feb 27 01:51:19 2026 +0530

    fix: replace panics with proper error returns (#758)
    
    Two related bug classes existed in the schema layer that could surface
    as confusing runtime behaviour:
    
    1. panic() used as an error-signalling mechanism in visitor code.
    Several internal functions (addField,
    pruneColVisitor.Field/List/Map/projectSelectedStruct, validAvroName,
    sanitizeColumnNameVisitor.Field) used panic to signal validation errors.
    This was partially masked by a defer recover() in Visit, PreOrderVisit,
    and VisitSchemaWithPartner, but those recover blocks had two problems of
    their own:
    - String panics (e.g. panic("cannot validate empty name")) were wrapped
    with %s rather than %w, so the recovered error had no ErrInvalidSchema
    in its chain and errors.Is(err, ErrInvalidSchema) returned false.
    - All panics — both string and error — were re-wrapped with the prefix
    "error encountered during schema visitor: ...", leaking the internal
    panic/recover mechanism into caller-visible error messages.
    2. NestedField.UnmarshalJSON performs no validation of required fields.
    Go's encoding/json silently discards unknown keys, so a payload using
    "field_id" instead of "id", or "field_name" instead of "name", is
    accepted without error and leaves ID = 0 / Name = "". These zero values
    then propagate downstream and trigger the panics described above.
    
    ### Changes
    
    schema.go
    - Visit, PreOrderVisit, VisitSchemaWithPartner: recover block fixed —
    string panics wrapped with ErrInvalidSchema, error panics passed through
    directly.
    - validAvroName: panic("cannot validate empty name") → return false.
    - sanitizeName: guard added for empty input.
    - addField: reverts to panic(fmt.Errorf("%w: ...", ErrInvalidSchema,
    ...)) — no err field on indexByName.
    - pruneColVisitor.Field/List/Map: reverts to panic(...) — no err field,
    no if p.err != nil guards.
    - pruneColVisitor.projectSelectedStruct: bare string panic upgraded to
    typed error panic.
    - sanitizeColumnNameVisitor.Field: s.err accumulation →
    panic(fmt.Errorf("%w: ...", ErrInvalidSchema, ...)) — struct reverts to
    stateless struct{}, value receiver.
    - FindFieldByName, FindFieldByNameCaseInsensitive: propagate
    lazyNameToID errors instead of discarding with _.
    - Select: propagates lazyNameToID/lazyNameToIDLower errors instead of
    discarding with _.
    
    types.go
    - NestedField.UnmarshalJSON: validates ID != 0 and Name != "" after
    decoding; returns ErrInvalidSchema on failure.
    
    schema_test.go — two new tests:
    - TestIndexByNameDuplicateFieldNames: asserts IndexByName returns
    errors.Is(err, ErrInvalidSchema) with message containing "multiple
    fields for name".
    - TestSanitizeColumnNamesEmptyFieldName: asserts SanitizeColumnNames
    returns errors.Is(err, ErrInvalidSchema) with message containing "field
    name cannot be empty".
    
    types_test.go — three new tests:
    - TestNestedFieldUnmarshalMissingID: absent "id" key → ErrInvalidSchema.
    - TestNestedFieldUnmarshalWrongIDKey: "field_id" instead of "id" →
    ErrInvalidSchema.
    - TestNestedFieldUnmarshalMissingName: absent "name" key →
    ErrInvalidSchema.
    
    table/metadata_schema_compatibility_test.go
    - TestNewMetadata_DuplicateValues: replaced assert.Contains(t,
    err.Error(), "invalid schema: error encountered during schema visitor")
    with assert.ErrorIs(t, err,
    iceberg.ErrInvalidSchema) — the old assertion tested for the
    panic/recover wrapper that no longer appears in error messages.
    
    table/table_test.go
    - TestAddFilesFailsSchemaMismatch: updated expected error prefix from
    "error encountered during schema visitor:" to "invalid schema:" — the
    string panic in arrow_utils.go is now
    wrapped with ErrInvalidSchema by the fixed recover.
    
    ### Behaviour after this change
    
    Scenario: json.Unmarshal with "field_id" instead of "id"
    Before: Silent ID=0, no error
    After: ErrInvalidSchema
    ────────────────────────────────────────
    Scenario: json.Unmarshal with "field_name" instead of "name"
    Before: Silent Name="", no error
    After: ErrInvalidSchema
    ────────────────────────────────────────
    Scenario: SanitizeColumnNames with empty field name
    Before: Opaque error, errors.Is returns false
    After: ErrInvalidSchema, errors.Is returns true
    ────────────────────────────────────────
    Scenario: IndexByName with duplicate field names
    Before: errors.Is works but message contained "error encountered during
    schema visitor:"
    After: Clean message, errors.Is works
    ────────────────────────────────────────
    Scenario: All visitor errors
    Before: May contain "error encountered during schema visitor:" prefix
    After: No prefix; all errors carry ErrInvalidSchema
    
    Fixes: https://github.com/apache/iceberg-go/issues/757
    
    ---------
    
    Signed-off-by: Shubhendu Ram Tripathi <[email protected]>
    Co-authored-by: Matt Topol <[email protected]>
---
 schema.go                                   | 41 ++++++++++++++++++++++-------
 schema_test.go                              | 21 +++++++++++++++
 table/metadata_schema_compatibility_test.go |  2 +-
 table/table_test.go                         |  2 +-
 types.go                                    |  8 ++++++
 types_test.go                               | 30 +++++++++++++++++++++
 6 files changed, 93 insertions(+), 11 deletions(-)

diff --git a/schema.go b/schema.go
index f3bd9ce0..99f6efee 100644
--- a/schema.go
+++ b/schema.go
@@ -264,7 +264,10 @@ func (s *Schema) FindColumnName(fieldID int) (string, 
bool) {
 // Note: This search is done in a case sensitive manner. To perform
 // a case insensitive search, use [*Schema.FindFieldByNameCaseInsensitive].
 func (s *Schema) FindFieldByName(name string) (NestedField, bool) {
-       idx, _ := s.lazyNameToID()
+       idx, err := s.lazyNameToID()
+       if err != nil {
+               return NestedField{}, false
+       }
 
        id, ok := idx[name]
        if !ok {
@@ -277,7 +280,10 @@ func (s *Schema) FindFieldByName(name string) 
(NestedField, bool) {
 // FindFieldByNameCaseInsensitive is like [*Schema.FindFieldByName],
 // but performs a case insensitive search.
 func (s *Schema) FindFieldByNameCaseInsensitive(name string) (NestedField, 
bool) {
-       idx, _ := s.lazyNameToIDLower()
+       idx, err := s.lazyNameToIDLower()
+       if err != nil {
+               return NestedField{}, false
+       }
 
        id, ok := idx[strings.ToLower(name)]
        if !ok {
@@ -384,7 +390,11 @@ var void = Void{}
 func (s *Schema) Select(caseSensitive bool, names ...string) (*Schema, error) {
        ids := make(map[int]Void)
        if caseSensitive {
-               nameMap, _ := s.lazyNameToID()
+               nameMap, err := s.lazyNameToID()
+               if err != nil {
+                       return nil, err
+               }
+
                for _, n := range names {
                        id, ok := nameMap[n]
                        if !ok {
@@ -393,7 +403,11 @@ func (s *Schema) Select(caseSensitive bool, names 
...string) (*Schema, error) {
                        ids[id] = void
                }
        } else {
-               nameMap, _ := s.lazyNameToIDLower()
+               nameMap, err := s.lazyNameToIDLower()
+               if err != nil {
+                       return nil, err
+               }
+
                for _, n := range names {
                        id, ok := nameMap[strings.ToLower(n)]
                        if !ok {
@@ -508,7 +522,7 @@ func Visit[T any](sc *Schema, visitor SchemaVisitor[T]) 
(res T, err error) {
                if r := recover(); r != nil {
                        switch e := r.(type) {
                        case string:
-                               err = fmt.Errorf("error encountered during 
schema visitor: %s", e)
+                               err = fmt.Errorf("%w: %s", ErrInvalidSchema, e)
                        case error:
                                err = fmt.Errorf("error encountered during 
schema visitor: %w", e)
                        }
@@ -667,7 +681,7 @@ func PreOrderVisit[T any](sc *Schema, visitor 
PreOrderSchemaVisitor[T]) (res T,
                if r := recover(); r != nil {
                        switch e := r.(type) {
                        case string:
-                               err = fmt.Errorf("error encountered during 
schema visitor: %s", e)
+                               err = fmt.Errorf("%w: %s", ErrInvalidSchema, e)
                        case error:
                                err = fmt.Errorf("error encountered during 
schema visitor: %w", e)
                        }
@@ -1068,7 +1082,7 @@ func (*pruneColVisitor) projectSelectedStruct(projected 
Type) *StructType {
                return ty
        }
 
-       panic("expected a struct")
+       panic(fmt.Errorf("%w: expected a struct type, got %T", 
ErrInvalidSchema, projected))
 }
 
 func (*pruneColVisitor) projectList(listType *ListType, elementResult Type) 
*ListType {
@@ -1351,7 +1365,7 @@ func VisitSchemaWithPartner[T, P any](sc *Schema, partner 
P, visitor SchemaWithP
                if r := recover(); r != nil {
                        switch e := r.(type) {
                        case string:
-                               err = fmt.Errorf("error encountered during 
schema visitor: %s", e)
+                               err = fmt.Errorf("%w: %s", ErrInvalidSchema, e)
                        case error:
                                err = fmt.Errorf("error encountered during 
schema visitor: %w", e)
                        }
@@ -1476,7 +1490,7 @@ func makeCompatibleName(n string) string {
 
 func validAvroName(n string) bool {
        if len(n) == 0 {
-               panic("cannot validate empty name")
+               return false
        }
 
        if !unicode.IsLetter(rune(n[0])) && n[0] != '_' {
@@ -1501,6 +1515,10 @@ func sanitize(r rune) string {
 }
 
 func sanitizeName(n string) string {
+       if len(n) == 0 {
+               return ""
+       }
+
        var b strings.Builder
        b.Grow(len(n))
 
@@ -1540,6 +1558,11 @@ func (sanitizeColumnNameVisitor) Schema(_ *Schema, 
structResult NestedField) Nes
 
 func (sanitizeColumnNameVisitor) Field(field NestedField, fieldResult 
NestedField) NestedField {
        field.Type = fieldResult.Type
+
+       if field.Name == "" {
+               panic(fmt.Errorf("%w: field name cannot be empty", 
ErrInvalidSchema))
+       }
+
        field.Name = makeCompatibleName(field.Name)
 
        return field
diff --git a/schema_test.go b/schema_test.go
index c4ea21a6..82daa0c8 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -922,3 +922,24 @@ func TestHighestFieldIDListType(t *testing.T) {
        )
        assert.Equal(t, 2, tableSchema.HighestFieldID())
 }
+
+func TestIndexByNameDuplicateFieldNames(t *testing.T) {
+       sc := iceberg.NewSchema(1,
+               iceberg.NestedField{ID: 1, Name: "col", Type: 
iceberg.PrimitiveTypes.String, Required: false},
+               iceberg.NestedField{ID: 2, Name: "col", Type: 
iceberg.PrimitiveTypes.Int32, Required: false},
+       )
+
+       _, err := iceberg.IndexByName(sc)
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+       assert.ErrorContains(t, err, "multiple fields for name col")
+}
+
+func TestSanitizeColumnNamesEmptyFieldName(t *testing.T) {
+       sc := iceberg.NewSchema(1,
+               iceberg.NestedField{ID: 1, Name: "", Type: 
iceberg.PrimitiveTypes.String, Required: false},
+       )
+
+       _, err := iceberg.SanitizeColumnNames(sc)
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+       assert.ErrorContains(t, err, "field name cannot be empty")
+}
diff --git a/table/metadata_schema_compatibility_test.go 
b/table/metadata_schema_compatibility_test.go
index eb67fe54..2fc5aca7 100644
--- a/table/metadata_schema_compatibility_test.go
+++ b/table/metadata_schema_compatibility_test.go
@@ -47,6 +47,6 @@ func TestNewMetadata_DuplicateValues(t *testing.T) {
        )
 
        require.Error(t, err)
-       assert.Contains(t, err.Error(), "invalid schema: error encountered 
during schema visitor")
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
        assert.Contains(t, err.Error(), "multiple fields for name foo")
 }
diff --git a/table/table_test.go b/table/table_test.go
index 6b47bcb0..02e34482 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -507,7 +507,7 @@ func (t *TableWritingTestSuite) 
TestAddFilesFailsSchemaMismatch() {
        tx := tbl.NewTransaction()
        err = tx.AddFiles(t.ctx, files, nil, false)
        t.Error(err)
-       t.EqualError(err, `error encountered during schema visitor: mismatch in 
fields:
+       t.EqualError(err, `invalid schema: mismatch in fields:
    | Table Field              | Requested Field         
 ✅ | 1: foo: optional boolean | 1: foo: optional boolean
 ✅ | 2: bar: optional string  | 2: bar: optional string 
diff --git a/types.go b/types.go
index 7ec0e08c..84083adf 100644
--- a/types.go
+++ b/types.go
@@ -244,6 +244,14 @@ func (n *NestedField) UnmarshalJSON(b []byte) error {
 
        n.Type = aux.Type.Type
 
+       if n.ID == 0 {
+               return fmt.Errorf("%w: field is missing required 'id' key in 
JSON", ErrInvalidSchema)
+       }
+
+       if n.Name == "" {
+               return fmt.Errorf("%w: field is missing required 'name' key in 
JSON", ErrInvalidSchema)
+       }
+
        return nil
 }
 
diff --git a/types_test.go b/types_test.go
index 8649c259..b888736e 100644
--- a/types_test.go
+++ b/types_test.go
@@ -393,3 +393,33 @@ func TestUnknownTypeEquality(t *testing.T) {
        assert.Equal(t, "unknown", unknown1.String())
        assert.Equal(t, "unknown", unknown2.String())
 }
+
+func TestNestedFieldUnmarshalMissingID(t *testing.T) {
+       // "id" key is absent — json.Unmarshal should error instead of silently 
leaving the ID zero
+       data := []byte(`{"name":"col","type":"string","required":false}`)
+
+       var f iceberg.NestedField
+       err := json.Unmarshal(data, &f)
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+       assert.ErrorContains(t, err, "missing required 'id'")
+}
+
+func TestNestedFieldUnmarshalWrongIDKey(t *testing.T) {
+       // "field_id" instead of "id" — json.Unmarshal should error instead of 
silently ignore
+       data := 
[]byte(`{"field_id":1,"name":"col","type":"string","required":false}`)
+
+       var f iceberg.NestedField
+       err := json.Unmarshal(data, &f)
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+       assert.ErrorContains(t, err, "missing required 'id'")
+}
+
+func TestNestedFieldUnmarshalMissingName(t *testing.T) {
+       // "name" key is absent — json.Unmarshal should error instead of 
silently leave as ""
+       data := []byte(`{"id":1,"type":"string","required":false}`)
+
+       var f iceberg.NestedField
+       err := json.Unmarshal(data, &f)
+       assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
+       assert.ErrorContains(t, err, "missing required 'name'")
+}

Reply via email to