This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d27ddda2d8 GH-43012:[Go] Fixes record builder support for not-nullable
fixed-size lists (#43013)
d27ddda2d8 is described below
commit d27ddda2d860a3bb49944dbe7bd321572fd72b52
Author: Wyatt Alt <[email protected]>
AuthorDate: Tue Jun 25 11:47:19 2024 -0700
GH-43012:[Go] Fixes record builder support for not-nullable fixed-size
lists (#43013)
Prior to this commit, the builder constructor for fixed-length lists
would lose information as to whether the list elements were marked
not-nullable, which meant the data type available from the builder would
always reflect "nullable". When NewRecord was invoked, this type would
get checked against the type present in the original schema. If the
schema requested a not-nullable fixed-size array, this check would
always fail and cause a panic.
This commit introduces an alternative builder constructor
"NewFixedSizeListBuilderWithField" for fixed-size lists that takes the
entire field context, similar to what already exists for lists and large
lists.
**This PR contains a "Critical Fix".**
* GitHub Issue: #43012
---
go/arrow/array/builder.go | 2 +-
go/arrow/array/fixed_size_list.go | 37 +++++++++++++++++++++++++------------
go/arrow/array/record_test.go | 39 +++++++++++++++++++++++++++++++++++++++
3 files changed, 65 insertions(+), 13 deletions(-)
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 444ab29787..88c0ac4798 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -357,7 +357,7 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType)
Builder {
return bldr
case arrow.FIXED_SIZE_LIST:
typ := dtype.(*arrow.FixedSizeListType)
- return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
+ return NewFixedSizeListBuilderWithField(mem, typ.Len(),
typ.ElemField())
case arrow.DURATION:
typ := dtype.(*arrow.DurationType)
return NewDurationBuilder(mem, typ)
diff --git a/go/arrow/array/fixed_size_list.go
b/go/arrow/array/fixed_size_list.go
index ebc517d3b0..830361b518 100644
--- a/go/arrow/array/fixed_size_list.go
+++ b/go/arrow/array/fixed_size_list.go
@@ -162,25 +162,38 @@ func (a *FixedSizeList) MarshalJSON() ([]byte, error) {
}
type FixedSizeListBuilder struct {
- builder
-
- etype arrow.DataType // data type of the list's elements.
- n int32 // number of elements in the fixed-size list.
- values Builder // value builder for the list's elements.
+ baseListBuilder
+ n int32 // number of elements in the fixed-size list.
}
// NewFixedSizeListBuilder returns a builder, using the provided memory
allocator.
// The created list builder will create a list whose elements will be of type
etype.
func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype
arrow.DataType) *FixedSizeListBuilder {
return &FixedSizeListBuilder{
- builder: builder{refCount: 1, mem: mem},
- etype: etype,
- n: n,
- values: NewBuilder(mem, etype),
+ baseListBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, etype),
+ dt: arrow.FixedSizeListOf(n, etype),
+ },
+ n,
+ }
+}
+
+// NewFixedSizeListBuilderWithField returns a builder similarly to
+// NewFixedSizeListBuilder, but it accepts a child rather than just a datatype
+// to ensure nullability context is preserved.
+func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field
arrow.Field) *FixedSizeListBuilder {
+ return &FixedSizeListBuilder{
+ baseListBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ values: NewBuilder(mem, field.Type),
+ dt: arrow.FixedSizeListOfField(n, field),
+ },
+ n,
}
}
-func (b *FixedSizeListBuilder) Type() arrow.DataType { return
arrow.FixedSizeListOf(b.n, b.etype) }
+func (b *FixedSizeListBuilder) Type() arrow.DataType { return b.dt }
// Release decreases the reference count by 1.
// When the reference count goes to zero, the memory is freed.
@@ -296,7 +309,7 @@ func (b *FixedSizeListBuilder) newData() (data *Data) {
defer values.Release()
data = NewData(
- arrow.FixedSizeListOf(b.n, b.etype), b.length,
+ b.dt, b.length,
[]*memory.Buffer{b.nullBitmap},
[]arrow.ArrayData{values.Data()},
b.nulls,
@@ -336,7 +349,7 @@ func (b *FixedSizeListBuilder) UnmarshalOne(dec
*json.Decoder) error {
default:
return &json.UnmarshalTypeError{
Value: fmt.Sprint(t),
- Struct: arrow.FixedSizeListOf(b.n, b.etype).String(),
+ Struct: b.dt.String(),
}
}
diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go
index be6a26eb1a..6712a1c908 100644
--- a/go/arrow/array/record_test.go
+++ b/go/arrow/array/record_test.go
@@ -372,6 +372,45 @@ func TestRecordReader(t *testing.T) {
}
}
+func TestRecordBuilderRespectsFixedSizeArrayNullability(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ cases := []struct {
+ assertion string
+ fields []arrow.Field
+ }{
+ {
+ "nullable",
+ []arrow.Field{{Name: "data", Type:
arrow.FixedSizeListOf(1, arrow.PrimitiveTypes.Int32)}},
+ },
+ {
+ "not nullable",
+ []arrow.Field{{Name: "data", Type:
arrow.FixedSizeListOfNonNullable(1, arrow.PrimitiveTypes.Int32)}},
+ },
+ }
+ for _, c := range cases {
+ t.Run(c.assertion, func(t *testing.T) {
+ schema := arrow.NewSchema(c.fields, nil)
+ b := array.NewRecordBuilder(mem, schema)
+ defer b.Release()
+
+ lb := b.Field(0).(*array.FixedSizeListBuilder)
+ lb.Append(true)
+
+ vb := lb.ValueBuilder().(*array.Int32Builder)
+ vb.Append(10)
+
+ rec := b.NewRecord()
+ defer rec.Release()
+
+ if got, want := rec.Column(0).String(), "[[10]]"; got
!= want {
+ t.Fatalf("invalid record: got=%q, want=%q",
got, want)
+ }
+ })
+ }
+}
+
func TestRecordBuilder(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)