This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 9efe04b  fix(arrow/array): optional struct array with required field 
(#359)
9efe04b is described below

commit 9efe04b210beda9145716b9f315db9cc6348f62b
Author: Matt Topol <[email protected]>
AuthorDate: Fri Apr 25 08:39:30 2025 -0700

    fix(arrow/array): optional struct array with required field (#359)
    
    ### Rationale for this change
    https://github.com/apache/iceberg-go/issues/398 discovered that the
    current `NewStructArrayWithFields` fails if any child is marked as
    non-nullable but has nulls (as would be the case in an optional struct
    array full of nulls with a required field). So we need to allow
    constructing such an array.
    
    ### What changes are included in this PR?
    A new function is created, `NewStructArrayWithFieldsAndNulls` which
    takes in the top level struct null bitmap, the number of nulls, offset
    columns and list of fields.
    
    ### Are these changes tested?
    Yes, a unit test was created for it.
    
    ### Are there any user-facing changes?
    The above case that would error, now will no longer error.
    
    Co-authored-by: Matt Topol <[email protected]>
---
 arrow/array/struct.go      | 20 +++++++++++++++-----
 arrow/array/struct_test.go | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/arrow/array/struct.go b/arrow/array/struct.go
index 564ae09..957947b 100644
--- a/arrow/array/struct.go
+++ b/arrow/array/struct.go
@@ -46,6 +46,13 @@ func NewStructArray(cols []arrow.Array, names []string) 
(*Struct, error) {
 // and provided fields. As opposed to NewStructArray, this allows you to 
provide
 // the full fields to utilize for the struct column instead of just the names.
 func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field) 
(*Struct, error) {
+       return NewStructArrayWithFieldsAndNulls(cols, fields, nil, 0, 0)
+}
+
+// NewStructArrayWithFieldsAndNulls is like NewStructArrayWithFields as a 
convenience function,
+// but also takes in a null bitmap, the number of nulls, and an optional offset
+// to use for creating the Struct Array.
+func NewStructArrayWithFieldsAndNulls(cols []arrow.Array, fields 
[]arrow.Field, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct, 
error) {
        if len(cols) != len(fields) {
                return nil, fmt.Errorf("%w: mismatching number of fields and 
child arrays", arrow.ErrInvalid)
        }
@@ -63,15 +70,18 @@ func NewStructArrayWithFields(cols []arrow.Array, fields 
[]arrow.Field) (*Struct
                        return nil, fmt.Errorf("%w: mismatching data type for 
child #%d, field says '%s', got '%s'",
                                arrow.ErrInvalid, i, fields[i].Type, 
c.DataType())
                }
-               if !fields[i].Nullable && c.NullN() > 0 {
-                       return nil, fmt.Errorf("%w: field says not-nullable, 
child #%d has nulls",
-                               arrow.ErrInvalid, i)
-               }
 
                children[i] = c.Data()
        }
 
-       data := NewData(arrow.StructOf(fields...), length, 
[]*memory.Buffer{nil}, children, 0, 0)
+       if nullBitmap == nil {
+               if nullCount > 0 {
+                       return nil, fmt.Errorf("%w: null count is greater than 
0 but null bitmap is nil", arrow.ErrInvalid)
+               }
+               nullCount = 0
+       }
+
+       data := NewData(arrow.StructOf(fields...), length-offset, 
[]*memory.Buffer{nullBitmap}, children, nullCount, offset)
        defer data.Release()
        return NewStructData(data), nil
 }
diff --git a/arrow/array/struct_test.go b/arrow/array/struct_test.go
index a06ba83..24f522e 100644
--- a/arrow/array/struct_test.go
+++ b/arrow/array/struct_test.go
@@ -24,6 +24,7 @@ import (
        "github.com/apache/arrow-go/v18/arrow/array"
        "github.com/apache/arrow-go/v18/arrow/memory"
        "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
 )
 
 func TestStructArray(t *testing.T) {
@@ -530,3 +531,34 @@ func TestStructArrayUnmarshalJSONMissingFields(t 
*testing.T) {
                )
        }
 }
+
+func TestCreateStructWithNulls(t *testing.T) {
+       pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer pool.AssertSize(t, 0)
+
+       var (
+               fields = []arrow.Field{
+                       {Name: "f1", Type: arrow.PrimitiveTypes.Float64, 
Nullable: true},
+                       {Name: "f2", Type: arrow.PrimitiveTypes.Int32, 
Nullable: false},
+               }
+               dtype = arrow.StructOf(fields...)
+       )
+
+       sb := array.NewStructBuilder(pool, dtype)
+       defer sb.Release()
+
+       sb.AppendNulls(100)
+
+       arr := sb.NewArray().(*array.Struct)
+       defer arr.Release()
+
+       assert.EqualValues(t, 100, arr.Len())
+       assert.EqualValues(t, 100, arr.NullN())
+
+       arr2, err := array.NewStructArrayWithFieldsAndNulls(
+               []arrow.Array{arr.Field(0), arr.Field(1)}, fields, 
arr.Data().Buffers()[0], arr.NullN(), 0)
+       require.NoError(t, err)
+       defer arr2.Release()
+
+       assert.True(t, array.Equal(arr, arr2))
+}

Reply via email to