pitrou commented on code in PR #13768:
URL: https://github.com/apache/arrow/pull/13768#discussion_r936630672


##########
go/arrow/bitutil/bitmaps.go:
##########
@@ -422,3 +423,81 @@ func CopyBitmap(src []byte, srcOffset, length int, dst 
[]byte, dstOffset int) {
        dst[nbytes-1] &= ^trailMask
        dst[nbytes-1] |= lastData & trailMask
 }
+
+type bitOp struct {
+       opWord func(uint64, uint64) uint64
+       opByte func(byte, byte) byte
+}
+
+var (
+       bitAndOp = bitOp{
+               opWord: func(l, r uint64) uint64 { return l & r },
+               opByte: func(l, r byte) byte { return l & r },
+       }
+       bitOrOp = bitOp{
+               opWord: func(l, r uint64) uint64 { return l | r },
+               opByte: func(l, r byte) byte { return l | r },
+       }
+)
+
+func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out 
[]byte, outOffset int64, length int64) {
+       debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with 
unaligned offsets")
+       debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with 
unaligned output offset")
+
+       nbytes := BytesForBits(length + lOffset%8)
+       left = left[lOffset/8:]
+       right = right[rOffset/8:]
+       out = out[outOffset/8:]
+       for i := int64(0); i < nbytes; i++ {
+               out[i] = op.opByte(left[i], right[i])
+       }
+}
+
+func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, 
out []byte, outOffset int64, length int64) {
+       leftRdr := NewBitmapWordReader(left, int(lOffset), int(length))
+       rightRdr := NewBitmapWordReader(right, int(rOffset), int(length))
+       writer := NewBitmapWordWriter(out, int(outOffset), int(length))
+
+       for nwords := leftRdr.Words(); nwords > 0; nwords-- {
+               writer.PutNextWord(op.opWord(leftRdr.NextWord(), 
rightRdr.NextWord()))

Review Comment:
   Is this works word-wise, might it actually be faster than `alignedBitmapOp` 
which only works byte-wise?



##########
go/arrow/array/union.go:
##########
@@ -0,0 +1,1118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+       "bytes"
+       "errors"
+       "fmt"
+       "math"
+       "reflect"
+       "strings"
+       "sync/atomic"
+
+       "github.com/apache/arrow/go/v9/arrow"
+       "github.com/apache/arrow/go/v9/arrow/bitutil"
+       "github.com/apache/arrow/go/v9/arrow/internal/debug"
+       "github.com/apache/arrow/go/v9/arrow/memory"
+       "github.com/apache/arrow/go/v9/internal/bitutils"
+       "github.com/goccy/go-json"
+)
+
+type Union interface {
+       arrow.Array
+       Validate() error
+       ValidateFull() error
+       TypeCodes() *memory.Buffer
+       RawTypeCodes() []arrow.UnionTypeCode
+       TypeCode(i int) arrow.UnionTypeCode
+       ChildID(i int) int
+       UnionType() arrow.UnionType
+       Mode() arrow.UnionMode
+       Field(pos int) arrow.Array
+}
+
+const kMaxElems = math.MaxInt32
+
+type union struct {
+       array
+
+       unionType arrow.UnionType
+       typecodes []arrow.UnionTypeCode
+
+       children []arrow.Array
+}
+
+func (a *union) Retain() {
+       a.array.Retain()
+       for _, c := range a.children {
+               c.Retain()
+       }
+}
+
+func (a *union) Release() {
+       a.array.Release()
+       for _, c := range a.children {
+               c.Release()
+       }
+}
+
+func (a *union) NumFields() int { return len(a.unionType.Fields()) }
+
+func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() }
+
+func (a *union) UnionType() arrow.UnionType { return a.unionType }
+
+func (a *union) TypeCodes() *memory.Buffer {
+       return a.data.buffers[1]
+}
+
+func (a *union) RawTypeCodes() []arrow.UnionTypeCode {
+       if a.data.length > 0 {
+               return a.typecodes[a.data.offset:]
+       }
+       return []arrow.UnionTypeCode{}
+}
+
+func (a *union) TypeCode(i int) arrow.UnionTypeCode {
+       return a.typecodes[i+a.data.offset]
+}
+
+func (a *union) ChildID(i int) int {
+       return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]]
+}
+
+func (a *union) setData(data *Data) {
+       a.unionType = data.dtype.(arrow.UnionType)
+       debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of 
union array buffers")
+
+       if data.length > 0 {
+               a.typecodes = 
arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes())
+       } else {
+               a.typecodes = []int8{}
+       }
+       a.children = make([]arrow.Array, len(data.childData))
+       for i, child := range data.childData {
+               if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 
|| child.Len() != data.length) {
+                       child = NewSliceData(child, int64(data.offset), 
int64(data.offset+data.length))
+                       defer child.Release()
+               }
+               a.children[i] = MakeFromData(child)
+       }
+       a.array.setData(data)
+}
+
+func (a *union) Field(pos int) (result arrow.Array) {
+       if pos < 0 || pos >= len(a.children) {
+               return nil
+       }
+
+       return a.children[pos]
+}
+
+func (a *union) Validate() error {
+       fields := a.unionType.Fields()
+       for i, f := range fields {
+               fieldData := a.data.childData[i]
+               if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < 
a.data.length+a.data.offset {
+                       return fmt.Errorf("arrow/array: sparse union child 
array #%d has length smaller than expected for union array (%d < %d)",
+                               i, fieldData.Len(), a.data.length+a.data.offset)
+               }
+
+               if !arrow.TypeEqual(f.Type, fieldData.DataType()) {
+                       return fmt.Errorf("arrow/array: union child array #%d 
does not match type field %s vs %s",
+                               i, fieldData.DataType(), f.Type)
+               }
+       }
+       return nil
+}
+
+func (a *union) ValidateFull() error {
+       if err := a.Validate(); err != nil {
+               return err
+       }
+
+       childIDs := a.unionType.ChildIDs()
+       codesMap := a.unionType.TypeCodes()
+       codes := a.RawTypeCodes()
+
+       for i := 0; i < a.data.length; i++ {
+               code := codes[i]
+               if code < 0 || childIDs[code] == arrow.InvalidUnionChildID {
+                       return fmt.Errorf("arrow/array: union value at position 
%d has invalid type id %d", i, code)
+               }
+       }
+
+       if a.unionType.Mode() == arrow.DenseMode {
+               // validate offsets
+
+               // map logical typeid to child length
+               var childLengths [256]int64
+               for i := range a.unionType.Fields() {
+                       childLengths[codesMap[i]] = 
int64(a.data.childData[i].Len())
+               }
+
+               // check offsets are in bounds
+               var lastOffsets [256]int64
+               offsets := 
arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:]
+               for i := int64(0); i < int64(a.data.length); i++ {
+                       code := codes[i]
+                       offset := offsets[i]
+                       switch {
+                       case offset < 0:
+                               return fmt.Errorf("arrow/array: union value at 
position %d has negative offset %d", i, offset)
+                       case offset >= int32(childLengths[code]):
+                               return fmt.Errorf("arrow/array: union value at 
position %d has offset larger than child length (%d >= %d)",
+                                       i, offset, childLengths[code])
+                       case offset < int32(lastOffsets[code]):
+                               return fmt.Errorf("arrow/array: union value at 
position %d has non-monotonic offset %d", i, offset)
+                       }
+                       lastOffsets[code] = int64(offset)
+               }
+       }
+
+       return nil
+}
+
+type SparseUnion struct {
+       union
+}
+
+func NewSparseUnion(dt *arrow.SparseUnionType, length int, children 
[]arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion {
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+       }
+       data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 
0, offset)
+       defer data.Release()
+       return NewSparseUnionData(data)
+}
+
+func NewSparseUnionData(data arrow.ArrayData) *SparseUnion {
+       a := &SparseUnion{}
+       a.refCount = 1
+       a.setData(data.(*Data))
+       return a
+}
+
+func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, 
codes ...arrow.UnionTypeCode) (*SparseUnion, error) {
+       return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, 
[]string{}, codes)
+}
+
+func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children 
[]arrow.Array, fields []string) (*SparseUnion, error) {
+       return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, 
fields, []arrow.UnionTypeCode{})
+}
+
+func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children 
[]arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, 
error) {
+       switch {
+       case typeIDs.DataType().ID() != arrow.INT8:
+               return nil, errors.New("arrow/array: union array type ids must 
be signed int8")
+       case typeIDs.NullN() != 0:
+               return nil, errors.New("arrow/array: union type ids may not 
have nulls")
+       case len(fields) > 0 && len(fields) != len(children):
+               return nil, errors.New("arrow/array: field names must have the 
same length as children")
+       case len(codes) > 0 && len(codes) != len(children):
+               return nil, errors.New("arrow/array: type codes must have same 
length as children")
+       }
+
+       buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]}
+       ty := arrow.SparseUnionFromArrays(children, fields, codes)
+
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+               if c.Len() != typeIDs.Len() {
+                       return nil, errors.New("arrow/array: sparse union array 
must have len(child) == len(typeids) for all children")
+               }
+       }
+
+       data := NewData(ty, typeIDs.Len(), buffers, childData, 0, 
typeIDs.Data().Offset())
+       defer data.Release()
+       return NewSparseUnionData(data), nil
+}
+
+func (a *SparseUnion) setData(data *Data) {
+       a.union.setData(data)
+       debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: 
invalid data type for SparseUnion")
+       debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions 
should have exactly 2 buffers")
+       debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap 
for sparse unions should be nil")
+}
+
+func (a *SparseUnion) getOneForMarshal(i int) interface{} {
+       childID := a.ChildID(i)
+       field := a.unionType.Fields()[childID]
+       data := a.Field(childID)
+
+       if data.IsNull(i) {
+               return nil
+       }
+
+       return map[string]interface{}{field.Name: 
data.(arraymarshal).getOneForMarshal(i)}
+}
+
+func (a *SparseUnion) MarshalJSON() ([]byte, error) {
+       var buf bytes.Buffer
+       enc := json.NewEncoder(&buf)
+
+       buf.WriteByte('[')
+       for i := 0; i < a.Len(); i++ {
+               if i != 0 {
+                       buf.WriteByte(',')
+               }
+               if err := enc.Encode(a.getOneForMarshal(i)); err != nil {
+                       return nil, err
+               }
+       }
+       buf.WriteByte(']')
+       return buf.Bytes(), nil
+}
+
+func (a *SparseUnion) String() string {
+       var b strings.Builder
+       b.WriteByte('[')
+
+       fieldList := a.unionType.Fields()
+       for i := 0; i < a.Len(); i++ {
+               if i > 0 {
+                       b.WriteString(" ")
+               }
+
+               field := fieldList[a.ChildID(i)]
+               f := a.Field(a.ChildID(i))
+               fmt.Fprintf(&b, "{%s=%v}", field.Name, 
f.(arraymarshal).getOneForMarshal(i))
+       }
+       b.WriteByte(']')
+       return b.String()
+}
+
+func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) 
(arrow.Array, error) {
+       if index < 0 || index >= a.NumFields() {
+               return nil, fmt.Errorf("arrow/array: index out of range: %d", 
index)
+       }
+
+       childData := a.data.childData[index]
+       if a.data.offset != 0 || a.data.length != childData.Len() {
+               childData = NewSliceData(childData, int64(a.data.offset), 
int64(a.data.offset+a.data.length))
+               // NewSliceData doesn't break the slice reference for buffers
+               // since we're going to replace the null bitmap buffer we need 
to break the
+               // slice reference so that we don't affect a.children's 
references
+               newBufs := make([]*memory.Buffer, len(childData.Buffers()))
+               copy(newBufs, childData.(*Data).buffers)
+               childData.(*Data).buffers = newBufs
+       } else {
+               childData = childData.(*Data).Copy()
+       }
+       defer childData.Release()
+
+       // synthesize a null bitmap based on the union discriminant
+       // make sure hte bitmap has extra bits corresponding to the child's 
offset

Review Comment:
   ```suggestion
        // make sure the bitmap has extra bits corresponding to the child's 
offset
   ```



##########
go/arrow/internal/arrdata/arrdata.go:
##########
@@ -49,6 +49,7 @@ func init() {
        Records["decimal128"] = makeDecimal128sRecords()
        Records["maps"] = makeMapsRecords()
        Records["extension"] = makeExtensionRecords()
+       // Records["union"] = makeUnionRecords()

Review Comment:
   Is this supposed to be uncommented at some point?



##########
go/arrow/array/union.go:
##########
@@ -0,0 +1,1118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+       "bytes"
+       "errors"
+       "fmt"
+       "math"
+       "reflect"
+       "strings"
+       "sync/atomic"
+
+       "github.com/apache/arrow/go/v9/arrow"
+       "github.com/apache/arrow/go/v9/arrow/bitutil"
+       "github.com/apache/arrow/go/v9/arrow/internal/debug"
+       "github.com/apache/arrow/go/v9/arrow/memory"
+       "github.com/apache/arrow/go/v9/internal/bitutils"
+       "github.com/goccy/go-json"
+)
+
+type Union interface {
+       arrow.Array
+       Validate() error
+       ValidateFull() error
+       TypeCodes() *memory.Buffer
+       RawTypeCodes() []arrow.UnionTypeCode
+       TypeCode(i int) arrow.UnionTypeCode
+       ChildID(i int) int
+       UnionType() arrow.UnionType
+       Mode() arrow.UnionMode
+       Field(pos int) arrow.Array
+}
+
+const kMaxElems = math.MaxInt32
+
+type union struct {
+       array
+
+       unionType arrow.UnionType
+       typecodes []arrow.UnionTypeCode
+
+       children []arrow.Array
+}
+
+func (a *union) Retain() {
+       a.array.Retain()
+       for _, c := range a.children {
+               c.Retain()
+       }
+}
+
+func (a *union) Release() {
+       a.array.Release()
+       for _, c := range a.children {
+               c.Release()
+       }
+}
+
+func (a *union) NumFields() int { return len(a.unionType.Fields()) }
+
+func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() }
+
+func (a *union) UnionType() arrow.UnionType { return a.unionType }
+
+func (a *union) TypeCodes() *memory.Buffer {
+       return a.data.buffers[1]
+}
+
+func (a *union) RawTypeCodes() []arrow.UnionTypeCode {
+       if a.data.length > 0 {
+               return a.typecodes[a.data.offset:]
+       }
+       return []arrow.UnionTypeCode{}
+}
+
+func (a *union) TypeCode(i int) arrow.UnionTypeCode {
+       return a.typecodes[i+a.data.offset]
+}
+
+func (a *union) ChildID(i int) int {
+       return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]]
+}
+
+func (a *union) setData(data *Data) {
+       a.unionType = data.dtype.(arrow.UnionType)
+       debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of 
union array buffers")
+
+       if data.length > 0 {
+               a.typecodes = 
arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes())
+       } else {
+               a.typecodes = []int8{}
+       }
+       a.children = make([]arrow.Array, len(data.childData))
+       for i, child := range data.childData {
+               if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 
|| child.Len() != data.length) {
+                       child = NewSliceData(child, int64(data.offset), 
int64(data.offset+data.length))
+                       defer child.Release()
+               }
+               a.children[i] = MakeFromData(child)
+       }
+       a.array.setData(data)
+}
+
+func (a *union) Field(pos int) (result arrow.Array) {
+       if pos < 0 || pos >= len(a.children) {
+               return nil
+       }
+
+       return a.children[pos]
+}
+
+func (a *union) Validate() error {
+       fields := a.unionType.Fields()
+       for i, f := range fields {
+               fieldData := a.data.childData[i]
+               if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < 
a.data.length+a.data.offset {
+                       return fmt.Errorf("arrow/array: sparse union child 
array #%d has length smaller than expected for union array (%d < %d)",
+                               i, fieldData.Len(), a.data.length+a.data.offset)
+               }
+
+               if !arrow.TypeEqual(f.Type, fieldData.DataType()) {
+                       return fmt.Errorf("arrow/array: union child array #%d 
does not match type field %s vs %s",
+                               i, fieldData.DataType(), f.Type)
+               }
+       }
+       return nil
+}
+
+func (a *union) ValidateFull() error {
+       if err := a.Validate(); err != nil {
+               return err
+       }
+
+       childIDs := a.unionType.ChildIDs()
+       codesMap := a.unionType.TypeCodes()
+       codes := a.RawTypeCodes()
+
+       for i := 0; i < a.data.length; i++ {
+               code := codes[i]
+               if code < 0 || childIDs[code] == arrow.InvalidUnionChildID {
+                       return fmt.Errorf("arrow/array: union value at position 
%d has invalid type id %d", i, code)
+               }
+       }
+
+       if a.unionType.Mode() == arrow.DenseMode {
+               // validate offsets
+
+               // map logical typeid to child length
+               var childLengths [256]int64
+               for i := range a.unionType.Fields() {
+                       childLengths[codesMap[i]] = 
int64(a.data.childData[i].Len())
+               }
+
+               // check offsets are in bounds
+               var lastOffsets [256]int64
+               offsets := 
arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:]
+               for i := int64(0); i < int64(a.data.length); i++ {
+                       code := codes[i]
+                       offset := offsets[i]
+                       switch {
+                       case offset < 0:
+                               return fmt.Errorf("arrow/array: union value at 
position %d has negative offset %d", i, offset)
+                       case offset >= int32(childLengths[code]):
+                               return fmt.Errorf("arrow/array: union value at 
position %d has offset larger than child length (%d >= %d)",
+                                       i, offset, childLengths[code])
+                       case offset < int32(lastOffsets[code]):
+                               return fmt.Errorf("arrow/array: union value at 
position %d has non-monotonic offset %d", i, offset)
+                       }
+                       lastOffsets[code] = int64(offset)
+               }
+       }
+
+       return nil
+}
+
+type SparseUnion struct {
+       union
+}
+
+func NewSparseUnion(dt *arrow.SparseUnionType, length int, children 
[]arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion {
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+       }
+       data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 
0, offset)
+       defer data.Release()
+       return NewSparseUnionData(data)
+}
+
+func NewSparseUnionData(data arrow.ArrayData) *SparseUnion {
+       a := &SparseUnion{}
+       a.refCount = 1
+       a.setData(data.(*Data))
+       return a
+}
+
+func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, 
codes ...arrow.UnionTypeCode) (*SparseUnion, error) {
+       return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, 
[]string{}, codes)
+}
+
+func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children 
[]arrow.Array, fields []string) (*SparseUnion, error) {
+       return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, 
fields, []arrow.UnionTypeCode{})
+}
+
+func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children 
[]arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, 
error) {
+       switch {
+       case typeIDs.DataType().ID() != arrow.INT8:
+               return nil, errors.New("arrow/array: union array type ids must 
be signed int8")
+       case typeIDs.NullN() != 0:
+               return nil, errors.New("arrow/array: union type ids may not 
have nulls")
+       case len(fields) > 0 && len(fields) != len(children):
+               return nil, errors.New("arrow/array: field names must have the 
same length as children")
+       case len(codes) > 0 && len(codes) != len(children):
+               return nil, errors.New("arrow/array: type codes must have same 
length as children")
+       }
+
+       buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]}
+       ty := arrow.SparseUnionFromArrays(children, fields, codes)
+
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+               if c.Len() != typeIDs.Len() {
+                       return nil, errors.New("arrow/array: sparse union array 
must have len(child) == len(typeids) for all children")
+               }
+       }
+
+       data := NewData(ty, typeIDs.Len(), buffers, childData, 0, 
typeIDs.Data().Offset())
+       defer data.Release()
+       return NewSparseUnionData(data), nil
+}
+
+func (a *SparseUnion) setData(data *Data) {
+       a.union.setData(data)
+       debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: 
invalid data type for SparseUnion")
+       debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions 
should have exactly 2 buffers")
+       debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap 
for sparse unions should be nil")
+}
+
+func (a *SparseUnion) getOneForMarshal(i int) interface{} {
+       childID := a.ChildID(i)
+       field := a.unionType.Fields()[childID]
+       data := a.Field(childID)
+
+       if data.IsNull(i) {
+               return nil
+       }
+
+       return map[string]interface{}{field.Name: 
data.(arraymarshal).getOneForMarshal(i)}
+}
+
+func (a *SparseUnion) MarshalJSON() ([]byte, error) {
+       var buf bytes.Buffer
+       enc := json.NewEncoder(&buf)
+
+       buf.WriteByte('[')
+       for i := 0; i < a.Len(); i++ {
+               if i != 0 {
+                       buf.WriteByte(',')
+               }
+               if err := enc.Encode(a.getOneForMarshal(i)); err != nil {
+                       return nil, err
+               }
+       }
+       buf.WriteByte(']')
+       return buf.Bytes(), nil
+}
+
+func (a *SparseUnion) String() string {
+       var b strings.Builder
+       b.WriteByte('[')
+
+       fieldList := a.unionType.Fields()
+       for i := 0; i < a.Len(); i++ {
+               if i > 0 {
+                       b.WriteString(" ")
+               }
+
+               field := fieldList[a.ChildID(i)]
+               f := a.Field(a.ChildID(i))
+               fmt.Fprintf(&b, "{%s=%v}", field.Name, 
f.(arraymarshal).getOneForMarshal(i))
+       }
+       b.WriteByte(']')
+       return b.String()
+}
+
+func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) 
(arrow.Array, error) {
+       if index < 0 || index >= a.NumFields() {
+               return nil, fmt.Errorf("arrow/array: index out of range: %d", 
index)
+       }
+
+       childData := a.data.childData[index]
+       if a.data.offset != 0 || a.data.length != childData.Len() {
+               childData = NewSliceData(childData, int64(a.data.offset), 
int64(a.data.offset+a.data.length))
+               // NewSliceData doesn't break the slice reference for buffers
+               // since we're going to replace the null bitmap buffer we need 
to break the
+               // slice reference so that we don't affect a.children's 
references
+               newBufs := make([]*memory.Buffer, len(childData.Buffers()))
+               copy(newBufs, childData.(*Data).buffers)
+               childData.(*Data).buffers = newBufs
+       } else {
+               childData = childData.(*Data).Copy()
+       }
+       defer childData.Release()
+
+       // synthesize a null bitmap based on the union discriminant
+       // make sure hte bitmap has extra bits corresponding to the child's 
offset
+       flattenedNullBitmap := memory.NewResizableBuffer(mem)
+       flattenedNullBitmap.Resize(childData.Len() + childData.Offset())
+
+       var (
+               childNullBitmap       = childData.Buffers()[0]
+               childOffset           = childData.Offset()
+               typeCode              = a.unionType.TypeCodes()[index]
+               codes                 = a.RawTypeCodes()
+               offset          int64 = 0
+       )
+       bitutils.GenerateBitsUnrolled(flattenedNullBitmap.Bytes(), 
int64(childOffset), int64(a.data.length),
+               func() bool {
+                       b := codes[offset] == typeCode
+                       offset++
+                       return b
+               })
+
+       if childNullBitmap != nil {
+               defer childNullBitmap.Release()
+               bitutil.BitmapAnd(flattenedNullBitmap.Bytes(), 
childNullBitmap.Bytes(),
+                       int64(childOffset), int64(childOffset), 
flattenedNullBitmap.Bytes(),
+                       int64(childOffset), int64(childData.Len()))
+       }
+       childData.(*Data).buffers[0] = flattenedNullBitmap
+       childData.(*Data).nulls = childData.Len() - 
bitutil.CountSetBits(flattenedNullBitmap.Bytes(), childOffset, childData.Len())
+       return MakeFromData(childData), nil
+}
+
+func arraySparseUnionEqual(l, r *SparseUnion) bool {
+       childIDs := l.unionType.ChildIDs()
+       leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes()
+
+       for i := 0; i < l.data.length; i++ {
+               typeID := leftCodes[i]
+               if typeID != rightCodes[i] {
+                       return false
+               }
+
+               childNum := childIDs[typeID]
+               eq := SliceEqual(l.children[childNum], int64(i), int64(i+1),
+                       r.children[childNum], int64(i), int64(i+1))
+               if !eq {
+                       return false
+               }
+       }
+       return true
+}
+
+func arraySparseUnionApproxEqual(l, r *SparseUnion, opt equalOption) bool {
+       childIDs := l.unionType.ChildIDs()
+       leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes()
+
+       for i := 0; i < l.data.length; i++ {
+               typeID := leftCodes[i]
+               if typeID != rightCodes[i] {
+                       return false
+               }
+
+               childNum := childIDs[typeID]
+               eq := sliceApproxEqual(l.children[childNum], 
int64(i+l.data.offset), int64(i+l.data.offset+1),
+                       r.children[childNum], int64(i+r.data.offset), 
int64(i+r.data.offset+1), opt)
+               if !eq {
+                       return false
+               }
+       }
+       return true
+}
+
+type DenseUnion struct {
+       union
+       offsets []int32
+}
+
+func NewDenseUnion(dt *arrow.DenseUnionType, length int, children 
[]arrow.Array, typeIDs, valueOffsets *memory.Buffer, offset int) *DenseUnion {
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+       }
+
+       data := NewData(dt, length, []*memory.Buffer{nil, typeIDs, 
valueOffsets}, childData, 0, offset)
+       defer data.Release()
+       return NewDenseUnionData(data)
+}
+
+func NewDenseUnionData(data arrow.ArrayData) *DenseUnion {
+       a := &DenseUnion{}
+       a.refCount = 1
+       a.setData(data.(*Data))
+       return a
+}
+
+func NewDenseUnionFromArrays(typeIDs, offsets arrow.Array, children 
[]arrow.Array, codes ...arrow.UnionTypeCode) (*DenseUnion, error) {
+       return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, 
children, []string{}, codes)
+}
+
+func NewDenseUnionFromArraysWithFields(typeIDs, offsets arrow.Array, children 
[]arrow.Array, fields []string) (*DenseUnion, error) {
+       return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, 
children, fields, []arrow.UnionTypeCode{})
+}
+
+func NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets arrow.Array, 
children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) 
(*DenseUnion, error) {
+       switch {
+       case offsets.DataType().ID() != arrow.INT32:
+               return nil, errors.New("arrow/array: union offsets must be 
signed int32")
+       case typeIDs.DataType().ID() != arrow.INT8:
+               return nil, errors.New("arrow/array: union type_ids must be 
signed int8")
+       case typeIDs.NullN() != 0:
+               return nil, errors.New("arrow/array: union typeIDs may not have 
nulls")
+       case offsets.NullN() != 0:
+               return nil, errors.New("arrow/array: nulls are not allowed in 
offsets for NewDenseUnionFromArrays*")
+       case len(fields) > 0 && len(fields) != len(children):
+               return nil, errors.New("arrow/array: fields must be the same 
length as children")
+       case len(codes) > 0 && len(codes) != len(children):
+               return nil, errors.New("arrow/array: typecodes must have the 
same length as children")
+       }
+
+       ty := arrow.DenseUnionFromArrays(children, fields, codes)
+       buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1], 
offsets.Data().Buffers()[1]}
+
+       childData := make([]arrow.ArrayData, len(children))
+       for i, c := range children {
+               childData[i] = c.Data()
+       }
+
+       data := NewData(ty, typeIDs.Len(), buffers, childData, 0, 
typeIDs.Data().Offset())
+       defer data.Release()
+       return NewDenseUnionData(data), nil
+}
+
+func (a *DenseUnion) ValueOffsets() *memory.Buffer { return a.data.buffers[2] }
+
+func (a *DenseUnion) ValueOffset(i int) int32 { return 
a.offsets[i+a.data.offset] }
+
+func (a *DenseUnion) RawValueOffsets() []int32 { return 
a.offsets[a.data.offset:] }
+
+func (a *DenseUnion) setData(data *Data) {
+       a.union.setData(data)
+       debug.Assert(a.data.dtype.ID() == arrow.DENSE_UNION, "arrow/array: 
invalid data type for DenseUnion")
+       debug.Assert(len(a.data.buffers) == 3, "arrow/array: sparse unions 
should have exactly 3 buffers")

Review Comment:
   ```suggestion
        debug.Assert(len(a.data.buffers) == 3, "arrow/array: dense unions 
should have exactly 3 buffers")
   ```



##########
go/arrow/array/fixed_size_list.go:
##########
@@ -196,6 +198,10 @@ func (b *FixedSizeListBuilder) AppendNull() {
        b.unsafeAppendBoolToBitmap(false)
 }
 
+func (b *FixedSizeListBuilder) AppendEmptyValue() {
+       b.Append(true)

Review Comment:
   Hmmm... I don't see where it appends to the child array. Where does that 
happen?



##########
go/arrow/array/fixedsize_binarybuilder.go:
##########
@@ -81,6 +83,12 @@ func (b *FixedSizeBinaryBuilder) AppendNull() {
        b.UnsafeAppendBoolToBitmap(false)
 }
 
+func (b *FixedSizeBinaryBuilder) AppendEmptyValue() {
+       b.Reserve(1)
+       b.values.Advance(b.dtype.ByteWidth)

Review Comment:
   Valgrind helps spotting undefined data, btw, though it needs specific 
exercising in the test suite.



##########
go/arrow/array/fixedsize_binarybuilder.go:
##########
@@ -81,6 +83,12 @@ func (b *FixedSizeBinaryBuilder) AppendNull() {
        b.UnsafeAppendBoolToBitmap(false)
 }
 
+func (b *FixedSizeBinaryBuilder) AppendEmptyValue() {
+       b.Reserve(1)
+       b.values.Advance(b.dtype.ByteWidth)

Review Comment:
   Does it mean the values are undetermined? I don't know how you feel about 
this for the Go implementation, but in C++ we try to be careful not revealing 
past memory contents through Arrow buffers (because buffers can be send as-is 
over IPC).



##########
go/arrow/datatype_nested.go:
##########
@@ -329,6 +333,208 @@ func (t *MapType) Layout() DataTypeLayout {
        return t.value.Layout()
 }
 
+type (
+       UnionTypeCode = int8
+       UnionMode     int8
+)
+
+const (
+       MaxUnionTypeCode    UnionTypeCode = 127
+       InvalidUnionChildID int           = -1
+
+       SparseMode UnionMode = iota
+       DenseMode
+)
+
+type UnionType interface {
+       NestedType
+       Mode() UnionMode
+       ChildIDs() []int
+       TypeCodes() []UnionTypeCode
+       MaxTypeCode() UnionTypeCode
+}
+
+func UnionOf(mode UnionMode, fields []Field, typeCodes []UnionTypeCode) 
UnionType {
+       switch mode {
+       case SparseMode:
+               return SparseUnionOf(fields, typeCodes)
+       case DenseMode:
+               return DenseUnionOf(fields, typeCodes)
+       default:
+               panic("arrow: invalid union mode")
+       }
+}
+
+type unionType struct {
+       children  []Field
+       typeCodes []UnionTypeCode
+       childIDs  [int(MaxUnionTypeCode) + 1]int
+}
+
+func (t *unionType) init(fields []Field, typeCodes []UnionTypeCode) {
+       // initialize all child IDs to -1
+       t.childIDs[0] = InvalidUnionChildID
+       for i := 1; i < len(t.childIDs); i *= 2 {
+               copy(t.childIDs[i:], t.childIDs[:i])
+       }
+
+       t.children = fields
+       t.typeCodes = typeCodes
+
+       for i, tc := range t.typeCodes {
+               t.childIDs[tc] = i
+       }
+}
+
+func (t unionType) Fields() []Field            { return t.children }
+func (t unionType) TypeCodes() []UnionTypeCode { return t.typeCodes }
+func (t unionType) ChildIDs() []int            { return t.childIDs[:] }
+
+func (t *unionType) validate(fields []Field, typeCodes []UnionTypeCode, _ 
UnionMode) error {
+       if len(fields) != len(typeCodes) {
+               return errors.New("arrow: union types should have the same 
number of fields as type codes")
+       }
+
+       for _, c := range typeCodes {
+               if c < 0 || c > MaxUnionTypeCode {
+                       return errors.New("arrow: union type code out of 
bounds")
+               }
+       }
+       return nil
+}
+
+func (t *unionType) MaxTypeCode() (max UnionTypeCode) {
+       if len(t.typeCodes) == 0 {
+               return
+       }
+
+       max = t.typeCodes[0]
+       for _, c := range t.typeCodes[1:] {
+               if c > max {
+                       max = c
+               }
+       }
+       return
+}
+
+func (t *unionType) String() string {
+       var b strings.Builder
+       b.WriteByte('<')
+       for i := range t.typeCodes {
+               if i != 0 {
+                       b.WriteString(", ")
+               }
+               fmt.Fprintf(&b, "%s=%d", t.children[i], t.typeCodes[i])
+       }
+       b.WriteByte('>')
+       return b.String()
+}
+
+func (t *unionType) fingerprint() string {
+       var b strings.Builder
+       for _, c := range t.typeCodes {
+               fmt.Fprintf(&b, ":%d", c)
+       }
+       b.WriteString("]{")
+       for _, c := range t.children {
+               fingerprint := c.Fingerprint()
+               if len(fingerprint) == 0 {
+                       return ""
+               }
+               b.WriteString(fingerprint)
+               b.WriteByte(';')
+       }
+       b.WriteByte('}')
+       return b.String()
+}
+
+func fieldsFromArrays(arrays []Array, names ...string) (ret []Field) {
+       ret = make([]Field, len(arrays))
+       if len(names) == 0 {
+               for i, c := range arrays {
+                       ret[i] = Field{Name: strconv.Itoa(i), Type: 
c.DataType(), Nullable: true}
+               }
+       } else {
+               debug.Assert(len(names) == len(arrays), "mismatch of arrays and 
names")
+               for i, c := range arrays {
+                       ret[i] = Field{Name: names[i], Type: c.DataType(), 
Nullable: true}
+               }
+       }
+       return
+}
+
+type SparseUnionType struct {
+       unionType
+}
+
+func SparseUnionFromArrays(children []Array, fields []string, codes 
[]UnionTypeCode) *SparseUnionType {
+       if len(codes) == 0 {
+               codes = make([]UnionTypeCode, len(children))
+               for i := range children {
+                       codes[i] = UnionTypeCode(i)
+               }
+       }
+       return SparseUnionOf(fieldsFromArrays(children, fields...), codes)
+}
+
+func SparseUnionOf(fields []Field, typeCodes []UnionTypeCode) *SparseUnionType 
{
+       ret := &SparseUnionType{}
+       if err := ret.validate(fields, typeCodes, ret.Mode()); err != nil {
+               panic(err)
+       }
+       ret.init(fields, typeCodes)
+       return ret
+}
+
+func (SparseUnionType) ID() Type        { return SPARSE_UNION }
+func (SparseUnionType) Name() string    { return "sparse_union" }
+func (SparseUnionType) Mode() UnionMode { return SparseMode }
+func (t *SparseUnionType) Fingerprint() string {
+       return typeFingerprint(t) + "[s" + t.fingerprint()
+}
+func (SparseUnionType) Layout() DataTypeLayout {
+       return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull(), 
SpecFixedWidth(Uint8SizeBytes)}}

Review Comment:
   So, this is the same implementation choice as C++. This means you'll have to 
be careful when implementing the IPC and C data interface sides (I assume 
you're already aware of this).



##########
go/arrow/bitutil/bitmaps_test.go:
##########
@@ -356,3 +358,119 @@ func BenchmarkBitmapReader(b *testing.B) {
                }
        })
 }
+
+type bitmapOp struct {
+       noAlloc func(left, right []byte, lOffset, rOffset int64, out []byte, 
outOffset, length int64)
+       alloc   func(mem memory.Allocator, left, right []byte, lOffset, rOffset 
int64, length, outOffset int64) *memory.Buffer
+}
+
+type BitmapOpSuite struct {
+       suite.Suite
+}
+
+func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, 
resultBits []bool) {
+       var (
+               left, right []byte
+               out         *memory.Buffer
+               length      int64
+       )
+       for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 
65536} {
+               s.Run(fmt.Sprintf("left offset %d", lOffset), func() {
+                       left = bitmapFromSlice(leftBits, int(lOffset))
+                       length = int64(len(leftBits))
+                       for _, rOffset := range []int64{lOffset, lOffset + 8, 
lOffset + 40} {
+                               s.Run(fmt.Sprintf("right offset %d", rOffset), 
func() {
+                                       right = bitmapFromSlice(rightBits, 
int(rOffset))
+                                       for _, outOffset := range 
[]int64{lOffset, lOffset + 16, lOffset + 24} {
+                                               s.Run(fmt.Sprintf("out offset 
%d", outOffset), func() {
+                                                       out = 
op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, 
outOffset)
+                                                       rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                       assertReaderVals(s.T(), 
rdr, resultBits)
+
+                                                       memory.Set(out.Bytes(), 
0x00)
+                                                       op.noAlloc(left, right, 
lOffset, rOffset, out.Bytes(), outOffset, length)
+                                                       rdr = 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                       assertReaderVals(s.T(), 
rdr, resultBits)
+                                               })
+                                       }
+                               })
+                       }
+               })
+       }
+}
+
+func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, 
resultBits []bool) {
+       var (
+               left, right []byte
+               out         *memory.Buffer
+               length      int64
+               offsets     = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 
65536}
+       )
+
+       for _, lOffset := range offsets {
+               s.Run(fmt.Sprintf("left offset %d", lOffset), func() {
+                       left = bitmapFromSlice(leftBits, int(lOffset))
+                       length = int64(len(leftBits))
+                       for _, rOffset := range offsets {
+                               s.Run(fmt.Sprintf("right offset %d", rOffset), 
func() {
+                                       right = bitmapFromSlice(rightBits, 
int(rOffset))
+                                       for _, outOffset := range offsets {
+                                               s.Run(fmt.Sprintf("out offset 
%d", outOffset), func() {
+                                                       s.Run("alloc", func() {
+                                                               out = 
op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, 
outOffset)
+                                                               rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                               
assertReaderVals(s.T(), rdr, resultBits)
+                                                       })
+                                                       s.Run("noalloc", func() 
{
+                                                               
memory.Set(out.Bytes(), 0x00)
+                                                               
op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length)
+                                                               rdr := 
bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length))
+                                                               
assertReaderVals(s.T(), rdr, resultBits)
+                                                       })
+                                               })
+                                       }
+                               })
+                       }
+               })
+       }
+}
+
+func (s *BitmapOpSuite) TestBitmapAnd() {
+       op := bitmapOp{
+               noAlloc: bitutil.BitmapAnd,
+               alloc:   bitutil.BitmapAndAlloc,
+       }
+
+       leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}
+       rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0}
+       resultBits := []bool{false, false, true, false, false, false, false, 
false, false, true, false, false, false, false}

Review Comment:
   I would suggest to add tests with zero-length inputs as well.



##########
go/arrow/array/union_test.go:
##########
@@ -0,0 +1,952 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+       "strings"
+       "testing"
+
+       "github.com/apache/arrow/go/v9/arrow"
+       "github.com/apache/arrow/go/v9/arrow/array"
+       "github.com/apache/arrow/go/v9/arrow/memory"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/suite"
+)
+
+func uint8ArrFromSlice(ids ...uint8) arrow.Array {
+       data := array.NewData(arrow.PrimitiveTypes.Uint8, len(ids),
+               []*memory.Buffer{nil, 
memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes(ids))}, nil, 0, 0)
+       defer data.Release()
+       return array.MakeFromData(data)
+}
+
+func int32ArrFromSlice(offsets ...int32) arrow.Array {
+       data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets),
+               []*memory.Buffer{nil, 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0)
+       defer data.Release()
+       return array.MakeFromData(data)
+}
+
+func TestUnionSliceEquals(t *testing.T) {
+       unionFields := []arrow.Field{
+               {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+               {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true},
+       }
+
+       typeCodes := []arrow.UnionTypeCode{5, 10}
+       sparseType := arrow.SparseUnionOf(unionFields, typeCodes)
+       denseType := arrow.DenseUnionOf(unionFields, typeCodes)
+
+       schema := arrow.NewSchema([]arrow.Field{
+               {Name: "sparse", Type: sparseType, Nullable: true},
+               {Name: "dense", Type: denseType, Nullable: true},
+       }, nil)
+
+       sparseChildren := make([]arrow.Array, 2)
+       denseChildren := make([]arrow.Array, 2)
+
+       const length = 7
+
+       typeIDsBuffer := 
memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 
10, 5}))
+       sparseChildren[0] = int32ArrFromSlice(0, 1, 2, 3, 4, 5, 6)
+       defer sparseChildren[0].Release()
+       sparseChildren[1] = uint8ArrFromSlice(10, 11, 12, 13, 14, 15, 16)
+       defer sparseChildren[1].Release()
+
+       denseChildren[0] = int32ArrFromSlice(0, 2, 3, 7)
+       defer denseChildren[0].Release()
+       denseChildren[1] = uint8ArrFromSlice(11, 14, 15)
+       defer denseChildren[1].Release()
+
+       offsetsBuffer := 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 
3}))
+       sparse := array.NewSparseUnion(sparseType, length, sparseChildren, 
typeIDsBuffer, 0)
+       dense := array.NewDenseUnion(denseType, length, denseChildren, 
typeIDsBuffer, offsetsBuffer, 0)
+
+       defer sparse.Release()
+       defer dense.Release()
+
+       batch := array.NewRecord(schema, []arrow.Array{sparse, dense}, -1)
+       defer batch.Release()
+
+       checkUnion := func(arr arrow.Array) {
+               size := arr.Len()
+               slice := array.NewSlice(arr, 2, int64(size))
+               defer slice.Release()
+               assert.EqualValues(t, size-2, slice.Len())
+
+               slice2 := array.NewSlice(arr, 2, int64(arr.Len()))
+               defer slice2.Release()
+               assert.EqualValues(t, size-2, slice2.Len())
+
+               assert.True(t, array.Equal(slice, slice2))

Review Comment:
   Do you also sometimes check for inequality between arrays?



##########
go/arrow/datatype_nested.go:
##########
@@ -329,6 +333,208 @@ func (t *MapType) Layout() DataTypeLayout {
        return t.value.Layout()
 }
 
+type (
+       UnionTypeCode = int8
+       UnionMode     int8
+)
+
+const (
+       MaxUnionTypeCode    UnionTypeCode = 127
+       InvalidUnionChildID int           = -1
+
+       SparseMode UnionMode = iota
+       DenseMode
+)
+
+type UnionType interface {
+       NestedType
+       Mode() UnionMode
+       ChildIDs() []int
+       TypeCodes() []UnionTypeCode

Review Comment:
   You may want to add comments to describe these APIs, since unions can be 
confusing for users at first.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to