This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new a6f29f2f fix: field id:0 incorrectly rejected as missing by 
NestedField.UnmarshalJSON (#761)
a6f29f2f is described below

commit a6f29f2f6c23872cd452f4cf01175591410daff4
Author: Shubhendu <[email protected]>
AuthorDate: Fri Feb 27 22:22:22 2026 +0530

    fix: field id:0 incorrectly rejected as missing by 
NestedField.UnmarshalJSON (#761)
    
    ### Problem
    
    Commit 95df02c introduced validation in NestedField.UnmarshalJSON to
    catch payloads that omit the required "id" key (e.g. using "field_id" by
    mistake). The check used was:
    ```
    if n.ID == 0 {
        return fmt.Errorf("%w: field is missing required 'id' key in JSON", 
ErrInvalidSchema)
    }
    ```
    This conflates two distinct situations:
    
    1. The "id" key is absent from JSON → n.ID stays at its zero value 0
    (invalid, should error).
    2. The "id" key is present with value 0 → n.ID is also 0 (valid, should
    not error).
    
    Go's encoding/json leaves fields at their zero value when a key is
    absent, so both cases are indistinguishable using n.ID == 0. As a
    result, any schema payload with a field using id:0 — which Spark sends
    for freshly created tables — is incorrectly rejected with
    ErrInvalidSchema.
    
    ### Example failing payload:
    ```
    {
      "name": "my_table1",
      "schema": {
        "type": "struct",
        "schema-id": 0,
        "fields": [
          {"id": 0, "name": "id",      "required": false, "type": "long"},
          {"id": 1, "name": "strings", "required": false, "type": "string"},
          {"id": 2, "name": "floats",  "required": false, "type": "double"}
        ]
      },
      "stage-create": true,
      ...
    }
    ```
    
    ### Fix
    
    Use *int for the id field in the unmarshaling aux struct. A nil pointer
    unambiguously means the key was absent from JSON; a non-nil pointer
    (including &0) means the key was present. The outer ID *int field
    shadows the embedded Alias.ID int during JSON decoding, and n.ID is then
    set explicitly from the dereferenced pointer.
    
    ```
    aux := struct {
        ID   *int      `json:"id"`   // nil = key absent, &0 = key present with 
value 0
        Type typeIFace `json:"type"`
        *Alias
    }{Alias: (*Alias)(n)}
    ```
    
    ### Changes
    
    types.go
    - NestedField.UnmarshalJSON: introduce ID *int in the aux struct; check
    aux.ID == nil instead of n.ID == 0; set n.ID = *aux.ID after the nil
    guard.
    
    types_test.go
    - TestNestedFieldUnmarshalZeroIDIsValid: new test asserting that
    {"id":0,...} unmarshals without error and produces ID == 0.
    
    ### Behavior after this change
    
    | Scenario | Before | After |
    
    
|----------------------------|-------------------------------|------------------|
    | "id" key absent from JSON | ErrInvalidSchema | ErrInvalidSchema |
    | "field_id" instead of "id" | ErrInvalidSchema | ErrInvalidSchema |
    | "id":0 present in JSON | ErrInvalidSchema (regression) | OK, ID == 0 |
    | "id":N (N > 0) present | OK | OK |
    
    Signed-off-by: Shubhendu Ram Tripathi <[email protected]>
---
 types.go      |  4 +++-
 types_test.go | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/types.go b/types.go
index 84083adf..dc8beb2f 100644
--- a/types.go
+++ b/types.go
@@ -232,6 +232,7 @@ func (n NestedField) MarshalJSON() ([]byte, error) {
 func (n *NestedField) UnmarshalJSON(b []byte) error {
        type Alias NestedField
        aux := struct {
+               ID   *int      `json:"id"`
                Type typeIFace `json:"type"`
                *Alias
        }{
@@ -244,9 +245,10 @@ func (n *NestedField) UnmarshalJSON(b []byte) error {
 
        n.Type = aux.Type.Type
 
-       if n.ID == 0 {
+       if aux.ID == nil {
                return fmt.Errorf("%w: field is missing required 'id' key in 
JSON", ErrInvalidSchema)
        }
+       n.ID = *aux.ID
 
        if n.Name == "" {
                return fmt.Errorf("%w: field is missing required 'name' key in 
JSON", ErrInvalidSchema)
diff --git a/types_test.go b/types_test.go
index b888736e..cc43f61b 100644
--- a/types_test.go
+++ b/types_test.go
@@ -423,3 +423,14 @@ func TestNestedFieldUnmarshalMissingName(t *testing.T) {
        assert.ErrorIs(t, err, iceberg.ErrInvalidSchema)
        assert.ErrorContains(t, err, "missing required 'name'")
 }
+
+func TestNestedFieldUnmarshalZeroIDIsValid(t *testing.T) {
+       // id:0 is a valid field ID — should NOT be treated as a missing key
+       data := []byte(`{"id":0,"name":"col","type":"string","required":false}`)
+
+       var f iceberg.NestedField
+       err := json.Unmarshal(data, &f)
+       assert.NoError(t, err)
+       assert.Equal(t, 0, f.ID)
+       assert.Equal(t, "col", f.Name)
+}

Reply via email to