This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 9efe04b fix(arrow/array): optional struct array with required field
(#359)
9efe04b is described below
commit 9efe04b210beda9145716b9f315db9cc6348f62b
Author: Matt Topol <[email protected]>
AuthorDate: Fri Apr 25 08:39:30 2025 -0700
fix(arrow/array): optional struct array with required field (#359)
### Rationale for this change
https://github.com/apache/iceberg-go/issues/398 discovered that the
current `NewStructArrayWithFields` fails if any child is marked as
non-nullable but has nulls (as would be the case in an optional struct
array full of nulls with a required field). So we need to allow
constructing such an array.
### What changes are included in this PR?
A new function is created, `NewStructArrayWithFieldsAndNulls` which
takes in the top level struct null bitmap, the number of nulls, offset
columns and list of fields.
### Are these changes tested?
Yes, a unit test was created for it.
### Are there any user-facing changes?
The above case that would error, now will no longer error.
Co-authored-by: Matt Topol <[email protected]>
---
arrow/array/struct.go | 20 +++++++++++++++-----
arrow/array/struct_test.go | 32 ++++++++++++++++++++++++++++++++
2 files changed, 47 insertions(+), 5 deletions(-)
diff --git a/arrow/array/struct.go b/arrow/array/struct.go
index 564ae09..957947b 100644
--- a/arrow/array/struct.go
+++ b/arrow/array/struct.go
@@ -46,6 +46,13 @@ func NewStructArray(cols []arrow.Array, names []string)
(*Struct, error) {
// and provided fields. As opposed to NewStructArray, this allows you to
provide
// the full fields to utilize for the struct column instead of just the names.
func NewStructArrayWithFields(cols []arrow.Array, fields []arrow.Field)
(*Struct, error) {
+ return NewStructArrayWithFieldsAndNulls(cols, fields, nil, 0, 0)
+}
+
+// NewStructArrayWithFieldsAndNulls is like NewStructArrayWithFields as a
convenience function,
+// but also takes in a null bitmap, the number of nulls, and an optional offset
+// to use for creating the Struct Array.
+func NewStructArrayWithFieldsAndNulls(cols []arrow.Array, fields
[]arrow.Field, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct,
error) {
if len(cols) != len(fields) {
return nil, fmt.Errorf("%w: mismatching number of fields and
child arrays", arrow.ErrInvalid)
}
@@ -63,15 +70,18 @@ func NewStructArrayWithFields(cols []arrow.Array, fields
[]arrow.Field) (*Struct
return nil, fmt.Errorf("%w: mismatching data type for
child #%d, field says '%s', got '%s'",
arrow.ErrInvalid, i, fields[i].Type,
c.DataType())
}
- if !fields[i].Nullable && c.NullN() > 0 {
- return nil, fmt.Errorf("%w: field says not-nullable,
child #%d has nulls",
- arrow.ErrInvalid, i)
- }
children[i] = c.Data()
}
- data := NewData(arrow.StructOf(fields...), length,
[]*memory.Buffer{nil}, children, 0, 0)
+ if nullBitmap == nil {
+ if nullCount > 0 {
+ return nil, fmt.Errorf("%w: null count is greater than
0 but null bitmap is nil", arrow.ErrInvalid)
+ }
+ nullCount = 0
+ }
+
+ data := NewData(arrow.StructOf(fields...), length-offset,
[]*memory.Buffer{nullBitmap}, children, nullCount, offset)
defer data.Release()
return NewStructData(data), nil
}
diff --git a/arrow/array/struct_test.go b/arrow/array/struct_test.go
index a06ba83..24f522e 100644
--- a/arrow/array/struct_test.go
+++ b/arrow/array/struct_test.go
@@ -24,6 +24,7 @@ import (
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
)
func TestStructArray(t *testing.T) {
@@ -530,3 +531,34 @@ func TestStructArrayUnmarshalJSONMissingFields(t
*testing.T) {
)
}
}
+
+func TestCreateStructWithNulls(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ var (
+ fields = []arrow.Field{
+ {Name: "f1", Type: arrow.PrimitiveTypes.Float64,
Nullable: true},
+ {Name: "f2", Type: arrow.PrimitiveTypes.Int32,
Nullable: false},
+ }
+ dtype = arrow.StructOf(fields...)
+ )
+
+ sb := array.NewStructBuilder(pool, dtype)
+ defer sb.Release()
+
+ sb.AppendNulls(100)
+
+ arr := sb.NewArray().(*array.Struct)
+ defer arr.Release()
+
+ assert.EqualValues(t, 100, arr.Len())
+ assert.EqualValues(t, 100, arr.NullN())
+
+ arr2, err := array.NewStructArrayWithFieldsAndNulls(
+ []arrow.Array{arr.Field(0), arr.Field(1)}, fields,
arr.Data().Buffers()[0], arr.NullN(), 0)
+ require.NoError(t, err)
+ defer arr2.Release()
+
+ assert.True(t, array.Equal(arr, arr2))
+}