This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 47f7e13  ARROW-13967: [Go] Implement Concatenate function for 
array.Interface
47f7e13 is described below

commit 47f7e139d4d6727189047be4c56a0f060a5b7eaa
Author: Matthew Topol <[email protected]>
AuthorDate: Tue Sep 21 21:26:10 2021 -0400

    ARROW-13967: [Go] Implement Concatenate function for array.Interface
    
    This is needed for adding nested types to the `MakeArrayFromScalar` 
function in ARROW-13789 (#11024)
    
    @emkornfield @sbinet @fsaintjacques
    
    Closes #11128 from zeroshade/concat_arrays
    
    Lead-authored-by: Matthew Topol <[email protected]>
    Co-authored-by: Matt Topol <[email protected]>
    Signed-off-by: Matthew Topol <[email protected]>
---
 go/arrow/array/builder.go                         |   3 +
 go/arrow/array/concat.go                          | 367 ++++++++++++++++++++++
 go/arrow/array/concat_test.go                     | 301 ++++++++++++++++++
 go/arrow/go.mod                                   |   2 +
 go/arrow/go.sum                                   |  54 ++++
 go/arrow/internal/testing/gen/random_array_gen.go | 307 ++++++++++++++++++
 6 files changed, 1034 insertions(+)

diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 378c608..dbaad11 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -133,6 +133,9 @@ func (b *builder) resize(newBits int, init func(int)) {
 }
 
 func (b *builder) reserve(elements int, resize func(int)) {
+       if b.nullBitmap == nil {
+               b.nullBitmap = memory.NewResizableBuffer(b.mem)
+       }
        if b.length+elements > b.capacity {
                newCap := bitutil.NextPowerOf2(b.length + elements)
                resize(newCap)
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
new file mode 100644
index 0000000..de859e5
--- /dev/null
+++ b/go/arrow/array/concat.go
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+       "math"
+       "math/bits"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/bitutil"
+       "github.com/apache/arrow/go/arrow/internal/debug"
+       "github.com/apache/arrow/go/arrow/memory"
+       "golang.org/x/xerrors"
+)
+
+// Concatenate creates a new array.Interface which is the concatenation of the
+// passed in arrays. Returns nil if an error is encountered.
+//
+// The passed in arrays still need to be released manually, and will not be
+// released by this function.
+func Concatenate(arrs []Interface, mem memory.Allocator) (Interface, error) {
+       if len(arrs) == 0 {
+               return nil, xerrors.New("array/concat: must pass at least one 
array")
+       }
+
+       // gather Data of inputs
+       data := make([]*Data, len(arrs))
+       for i, ar := range arrs {
+               if !arrow.TypeEqual(ar.DataType(), arrs[0].DataType()) {
+                       return nil, xerrors.Errorf("arrays to be concatenated 
must be identically typed, but %s and %s were encountered",
+                               arrs[0].DataType(), ar.DataType())
+               }
+               data[i] = ar.Data()
+       }
+
+       out, err := concat(data, mem)
+       if err != nil {
+               return nil, err
+       }
+
+       defer out.Release()
+       return MakeFromData(out), nil
+}
+
+// simple struct to hold ranges
+type rng struct {
+       offset, len int
+}
+
+// simple bitmap struct to reference a specific slice of a bitmap where the 
range
+// offset and length are in bits
+type bitmap struct {
+       data []byte
+       rng  rng
+}
+
+// gather up the bitmaps from the passed in data objects
+func gatherBitmaps(data []*Data, idx int) []bitmap {
+       out := make([]bitmap, len(data))
+       for i, d := range data {
+               if d.buffers[idx] != nil {
+                       out[i].data = d.buffers[idx].Bytes()
+               }
+               out[i].rng.offset = d.offset
+               out[i].rng.len = d.length
+       }
+       return out
+}
+
+// gatherFixedBuffers gathers up the buffer objects of the given index, 
specifically
+// returning only the slices of the buffers which are relevant to the passed 
in arrays
+// in case they are themselves slices of other arrays. nil buffers are ignored 
and not
+// in the output slice.
+func gatherFixedBuffers(data []*Data, idx, byteWidth int) []*memory.Buffer {
+       out := make([]*memory.Buffer, 0, len(data))
+       for _, d := range data {
+               buf := d.buffers[idx]
+               if buf == nil {
+                       continue
+               }
+
+               out = append(out, 
memory.NewBufferBytes(buf.Bytes()[d.offset*byteWidth:(d.offset+d.length)*byteWidth]))
+       }
+       return out
+}
+
+// gatherBuffersFixedWidthType is like gatherFixedBuffers, but uses a datatype 
to determine the size
+// to use for determining the byte slice rather than a passed in bytewidth.
+func gatherBuffersFixedWidthType(data []*Data, idx int, fixed 
arrow.FixedWidthDataType) []*memory.Buffer {
+       return gatherFixedBuffers(data, idx, fixed.BitWidth()/8)
+}
+
+// gatherBufferRanges requires that len(ranges) == len(data) and returns a 
list of buffers
+// which represent the corresponding range of each buffer in the specified 
index of each
+// data object.
+func gatherBufferRanges(data []*Data, idx int, ranges []rng) []*memory.Buffer {
+       out := make([]*memory.Buffer, 0, len(data))
+       for i, d := range data {
+               buf := d.buffers[idx]
+               if buf == nil {
+                       debug.Assert(ranges[i].len == 0, "misaligned buffer 
value ranges")
+                       continue
+               }
+
+               out = append(out, 
memory.NewBufferBytes(buf.Bytes()[ranges[i].offset:ranges[i].offset+ranges[i].len]))
+       }
+       return out
+}
+
+// gatherChildren gathers the children data objects for child of index idx for 
all of the data objects.
+func gatherChildren(data []*Data, idx int) []*Data {
+       return gatherChildrenMultiplier(data, idx, 1)
+}
+
+// gatherChildrenMultiplier gathers the full data slice of the underlying 
values from the children data objects
+// such as the values data for a list array so that it can return a slice of 
the buffer for a given
+// index into the children.
+func gatherChildrenMultiplier(data []*Data, idx, multiplier int) []*Data {
+       out := make([]*Data, len(data))
+       for i, d := range data {
+               out[i] = NewSliceData(d.childData[idx], 
int64(d.offset*multiplier), int64(d.offset+d.length)*int64(multiplier))
+       }
+       return out
+}
+
+// gatherChildrenRanges returns a slice of Data objects which each represent 
slices of the given ranges from the
+// child in the specified index from each data object.
+func gatherChildrenRanges(data []*Data, idx int, ranges []rng) []*Data {
+       debug.Assert(len(data) == len(ranges), "mismatched children ranges for 
concat")
+       out := make([]*Data, len(data))
+       for i, d := range data {
+               out[i] = NewSliceData(d.childData[idx], 
int64(ranges[i].offset), int64(ranges[i].offset+ranges[i].len))
+       }
+       return out
+}
+
+// creates a single contiguous buffer which contains the concatenation of all 
of the passed
+// in buffer objects.
+func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer 
{
+       outLen := 0
+       for _, b := range bufs {
+               outLen += b.Len()
+       }
+       out := memory.NewResizableBuffer(mem)
+       out.Resize(outLen)
+
+       data := out.Bytes()
+       for _, b := range bufs {
+               copy(data, b.Bytes())
+               data = data[b.Len():]
+       }
+       return out
+}
+
+// concatOffsets creates a single offset buffer which represents the 
concatenation of all of the
+// offsets buffers, adjusting the offsets appropriately to their new relative 
locations.
+//
+// It also returns the list of ranges that need to be fetched for the 
corresponding value buffers
+// to construct the final concatenated value buffer.
+func concatOffsets(buffers []*memory.Buffer, mem memory.Allocator) 
(*memory.Buffer, []rng, error) {
+       outLen := 0
+       for _, b := range buffers {
+               outLen += b.Len() / arrow.Int32SizeBytes
+       }
+
+       out := memory.NewResizableBuffer(mem)
+       out.Resize(arrow.Int32Traits.BytesRequired(outLen + 1))
+
+       dst := arrow.Int32Traits.CastFromBytes(out.Bytes())
+       valuesRanges := make([]rng, len(buffers))
+       nextOffset := int32(0)
+       nextElem := int(0)
+       for i, b := range buffers {
+               if b.Len() == 0 {
+                       valuesRanges[i].offset = 0
+                       valuesRanges[i].len = 0
+                       continue
+               }
+
+               // when we gather our buffers, we sliced off the last offset 
from the buffer
+               // so that we could count the lengths accurately
+               src := arrow.Int32Traits.CastFromBytes(b.Bytes())
+               valuesRanges[i].offset = int(src[0])
+               // expand our slice to see that final offset
+               expand := src[:len(src)+1]
+               // compute the length of this range by taking the final offset 
and subtracting where we started.
+               valuesRanges[i].len = int(expand[len(src)]) - 
valuesRanges[i].offset
+
+               if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) {
+                       return nil, nil, xerrors.New("offset overflow while 
concatenating arrays")
+               }
+
+               // adjust each offset by the difference between our last ending 
point and our starting point
+               adj := nextOffset - src[0]
+               for j, o := range src {
+                       dst[nextElem+j] = adj + o
+               }
+
+               // the next index for an element in the output buffer
+               nextElem += b.Len() / arrow.Int32SizeBytes
+               // update our offset counter to be the total current length of 
our output
+               nextOffset += int32(valuesRanges[i].len)
+       }
+
+       // final offset should point to the end of the data
+       dst[outLen] = nextOffset
+       return out, valuesRanges, nil
+}
+
+// concat is the implementation for actually performing the concatenation of 
the *array.Data
+// objects that we can call internally for nested types.
+func concat(data []*Data, mem memory.Allocator) (*Data, error) {
+       out := &Data{refCount: 1, dtype: data[0].dtype, nulls: 0}
+       for _, d := range data {
+               out.length += d.length
+               if out.nulls == UnknownNullCount || d.nulls == UnknownNullCount 
{
+                       out.nulls = UnknownNullCount
+                       continue
+               }
+               out.nulls += d.nulls
+       }
+
+       out.buffers = make([]*memory.Buffer, len(data[0].buffers))
+       if out.nulls != 0 && out.dtype.ID() != arrow.NULL {
+               bm, err := concatBitmaps(gatherBitmaps(data, 0), mem)
+               if err != nil {
+                       return nil, err
+               }
+               out.buffers[0] = bm
+       }
+
+       switch dt := out.dtype.(type) {
+       case *arrow.NullType:
+       case *arrow.BooleanType:
+               bm, err := concatBitmaps(gatherBitmaps(data, 1), mem)
+               if err != nil {
+                       return nil, err
+               }
+               out.buffers[1] = bm
+       case arrow.FixedWidthDataType:
+               out.buffers[1] = 
concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem)
+       case arrow.BinaryDataType:
+               offsetBuffer, valueRanges, err := 
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+               if err != nil {
+                       return nil, err
+               }
+               out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2, 
valueRanges), mem)
+               out.buffers[1] = offsetBuffer
+       case *arrow.ListType:
+               offsetBuffer, valueRanges, err := 
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+               if err != nil {
+                       return nil, err
+               }
+               childData := gatherChildrenRanges(data, 0, valueRanges)
+               for _, c := range childData {
+                       defer c.Release()
+               }
+
+               out.buffers[1] = offsetBuffer
+               out.childData = make([]*Data, 1)
+               out.childData[0], err = concat(childData, mem)
+               if err != nil {
+                       return nil, err
+               }
+       case *arrow.FixedSizeListType:
+               childData := gatherChildrenMultiplier(data, 0, int(dt.Len()))
+               for _, c := range childData {
+                       defer c.Release()
+               }
+
+               children, err := concat(childData, mem)
+               if err != nil {
+                       return nil, err
+               }
+               out.childData = []*Data{children}
+       case *arrow.StructType:
+               out.childData = make([]*Data, len(dt.Fields()))
+               for i := range dt.Fields() {
+                       children := gatherChildren(data, i)
+                       for _, c := range children {
+                               defer c.Release()
+                       }
+
+                       childData, err := concat(children, mem)
+                       if err != nil {
+                               return nil, err
+                       }
+                       out.childData[i] = childData
+               }
+       case *arrow.MapType:
+               offsetBuffer, valueRanges, err := 
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+               if err != nil {
+                       return nil, err
+               }
+               childData := gatherChildrenRanges(data, 0, valueRanges)
+               for _, c := range childData {
+                       defer c.Release()
+               }
+
+               out.buffers[1] = offsetBuffer
+               out.childData = make([]*Data, 1)
+               out.childData[0], err = concat(childData, mem)
+               if err != nil {
+                       return nil, err
+               }
+       default:
+               return nil, xerrors.Errorf("concatenate not implemented for 
type %s", dt)
+       }
+
+       return out, nil
+}
+
+// check overflow in the addition, taken from bits.Add but adapted for signed 
integers
+// rather than unsigned integers. bits.UintSize will be either 32 or 64 based 
on
+// whether our architecture is 32 bit or 64. The operation is the same for 
both cases,
+// the only difference is how much we need to shift by 30 for 32 bit and 62 
for 64 bit.
+// Thus, bits.UintSize - 2 is how much we shift right by to check if we had an 
overflow
+// in the signed addition.
+//
+// First return is the result of the sum, the second return is true if there 
was an overflow
+func addOvf(x, y int) (int, bool) {
+       sum := x + y
+       return sum, ((x&y)|((x|y)&^sum))>>(bits.UintSize-2) == 1
+}
+
+// concatenate bitmaps together and return a buffer with the combined bitmaps
+func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) (*memory.Buffer, 
error) {
+       var (
+               outlen   int
+               overflow bool
+       )
+
+       for _, bm := range bitmaps {
+               if outlen, overflow = addOvf(outlen, bm.rng.len); overflow {
+                       return nil, xerrors.New("length overflow when 
concatenating arrays")
+               }
+       }
+
+       out := memory.NewResizableBuffer(mem)
+       out.Resize(int(bitutil.BytesForBits(int64(outlen))))
+       dst := out.Bytes()
+
+       offset := 0
+       for _, bm := range bitmaps {
+               if bm.data == nil { // if the bitmap is nil, that implies that 
the value is true for all elements
+                       bitutil.SetBitsTo(out.Bytes(), int64(offset), 
int64(bm.rng.len), true)
+               } else {
+                       bitutil.CopyBitmap(bm.data, bm.rng.offset, bm.rng.len, 
dst, offset)
+               }
+               offset += bm.rng.len
+       }
+       return out, nil
+}
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
new file mode 100644
index 0000000..9e6ab4a
--- /dev/null
+++ b/go/arrow/array/concat_test.go
@@ -0,0 +1,301 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+       "fmt"
+       "math"
+       "sort"
+       "testing"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/array"
+       "github.com/apache/arrow/go/arrow/bitutil"
+       "github.com/apache/arrow/go/arrow/internal/testing/gen"
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/suite"
+       "golang.org/x/exp/rand"
+)
+
+func TestConcatenateValueBuffersNull(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+       defer mem.AssertSize(t, 0)
+
+       inputs := make([]array.Interface, 0)
+
+       bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+       defer bldr.Release()
+
+       arr := bldr.NewArray()
+       defer arr.Release()
+       inputs = append(inputs, arr)
+
+       bldr.AppendNull()
+       arr = bldr.NewArray()
+       defer arr.Release()
+       inputs = append(inputs, arr)
+
+       actual, err := array.Concatenate(inputs, mem)
+       assert.NoError(t, err)
+       defer actual.Release()
+
+       assert.True(t, array.ArrayEqual(actual, inputs[1]))
+}
+
+func TestConcatenate(t *testing.T) {
+       tests := []struct {
+               dt arrow.DataType
+       }{
+               {arrow.FixedWidthTypes.Boolean},
+               {arrow.PrimitiveTypes.Int8},
+               {arrow.PrimitiveTypes.Uint8},
+               {arrow.PrimitiveTypes.Int16},
+               {arrow.PrimitiveTypes.Uint16},
+               {arrow.PrimitiveTypes.Int32},
+               {arrow.PrimitiveTypes.Uint32},
+               {arrow.PrimitiveTypes.Int64},
+               {arrow.PrimitiveTypes.Uint64},
+               {arrow.PrimitiveTypes.Float32},
+               {arrow.PrimitiveTypes.Float64},
+               {arrow.BinaryTypes.String},
+               {arrow.ListOf(arrow.PrimitiveTypes.Int8)},
+               {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)},
+               {arrow.StructOf()},
+               {arrow.MapOf(arrow.PrimitiveTypes.Uint16, 
arrow.PrimitiveTypes.Int8)},
+       }
+
+       for _, tt := range tests {
+               t.Run(tt.dt.Name(), func(t *testing.T) {
+                       suite.Run(t, &ConcatTestSuite{
+                               seed:      0xdeadbeef,
+                               dt:        tt.dt,
+                               nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0},
+                               sizes:     []int32{0, 1, 2, 4, 16, 31, 1234},
+                       })
+               })
+       }
+}
+
+type ConcatTestSuite struct {
+       suite.Suite
+
+       seed uint64
+       rng  gen.RandomArrayGenerator
+       dt   arrow.DataType
+
+       nullProbs []float64
+       sizes     []int32
+
+       mem *memory.CheckedAllocator
+}
+
+func (cts *ConcatTestSuite) SetupSuite() {
+       cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
+       cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem)
+}
+
+func (cts *ConcatTestSuite) TearDownSuite() {
+       cts.mem.AssertSize(cts.T(), 0)
+}
+
+func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) 
array.Interface {
+       switch cts.dt.ID() {
+       case arrow.BOOL:
+               return cts.rng.Boolean(size, 0.5, nullprob)
+       case arrow.INT8:
+               return cts.rng.Int8(size, 0, 127, nullprob)
+       case arrow.UINT8:
+               return cts.rng.Uint8(size, 0, 127, nullprob)
+       case arrow.INT16:
+               return cts.rng.Int16(size, 0, 127, nullprob)
+       case arrow.UINT16:
+               return cts.rng.Uint16(size, 0, 127, nullprob)
+       case arrow.INT32:
+               return cts.rng.Int32(size, 0, 127, nullprob)
+       case arrow.UINT32:
+               return cts.rng.Uint32(size, 0, 127, nullprob)
+       case arrow.INT64:
+               return cts.rng.Int64(size, 0, 127, nullprob)
+       case arrow.UINT64:
+               return cts.rng.Uint64(size, 0, 127, nullprob)
+       case arrow.FLOAT32:
+               return cts.rng.Float32(size, 0, 127, nullprob)
+       case arrow.FLOAT64:
+               return cts.rng.Float64(size, 0, 127, nullprob)
+       case arrow.NULL:
+               return array.NewNull(int(size))
+       case arrow.STRING:
+               return cts.rng.String(size, 0, 15, nullprob)
+       case arrow.LIST:
+               valuesSize := size * 4
+               values := cts.rng.Int8(valuesSize, 0, 127, 
nullprob).(*array.Int8)
+               defer values.Release()
+               offsetsVector := cts.offsets(int32(valuesSize), int32(size))
+               // ensure the first and last offsets encompass the whole values
+               offsetsVector[0] = 0
+               offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
+
+               bldr := array.NewListBuilder(memory.DefaultAllocator, 
arrow.PrimitiveTypes.Int8)
+               defer bldr.Release()
+
+               valid := make([]bool, len(offsetsVector)-1)
+               for i := range valid {
+                       valid[i] = true
+               }
+               bldr.AppendValues(offsetsVector, valid)
+               vb := bldr.ValueBuilder().(*array.Int8Builder)
+               for i := 0; i < values.Len(); i++ {
+                       if values.IsValid(i) {
+                               vb.Append(values.Value(i))
+                       } else {
+                               vb.AppendNull()
+                       }
+               }
+               return bldr.NewArray()
+       case arrow.FIXED_SIZE_LIST:
+               const listsize = 3
+               valuesSize := size * listsize
+               values := cts.rng.Int8(valuesSize, 0, 127, nullprob)
+               defer values.Release()
+
+               data := array.NewData(arrow.FixedSizeListOf(listsize, 
arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, 
[]*array.Data{values.Data()}, 0, 0)
+               defer data.Release()
+               return array.MakeFromData(data)
+       case arrow.STRUCT:
+               foo := cts.rng.Int8(size, 0, 127, nullprob)
+               defer foo.Release()
+               bar := cts.rng.Float64(size, 0, 127, nullprob)
+               defer bar.Release()
+               baz := cts.rng.Boolean(size, 0.5, nullprob)
+               defer baz.Release()
+
+               data := array.NewData(arrow.StructOf(
+                       arrow.Field{Name: "foo", Type: foo.DataType(), 
Nullable: true},
+                       arrow.Field{Name: "bar", Type: bar.DataType(), 
Nullable: true},
+                       arrow.Field{Name: "baz", Type: baz.DataType(), 
Nullable: true}),
+                       int(size), []*memory.Buffer{nil}, 
[]*array.Data{foo.Data(), bar.Data(), baz.Data()}, 0, 0)
+               defer data.Release()
+               return array.NewStructData(data)
+       case arrow.MAP:
+               valuesSize := size * 4
+               keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16)
+               defer keys.Release()
+               values := cts.rng.Int8(valuesSize, 0, 127, 
nullprob).(*array.Int8)
+               defer values.Release()
+
+               offsetsVector := cts.offsets(int32(valuesSize), int32(size))
+               offsetsVector[0] = 0
+               offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
+
+               bldr := array.NewMapBuilder(memory.DefaultAllocator, 
arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false)
+               defer bldr.Release()
+
+               kb := bldr.KeyBuilder().(*array.Uint16Builder)
+               vb := bldr.ItemBuilder().(*array.Int8Builder)
+
+               valid := make([]bool, len(offsetsVector)-1)
+               for i := range valid {
+                       valid[i] = true
+               }
+               bldr.AppendValues(offsetsVector, valid)
+               for i := 0; i < int(valuesSize); i++ {
+                       kb.Append(keys.Value(i))
+                       if values.IsValid(i) {
+                               vb.Append(values.Value(i))
+                       } else {
+                               vb.AppendNull()
+                       }
+               }
+               return bldr.NewArray()
+       default:
+               return nil
+       }
+}
+
+func (cts *ConcatTestSuite) slices(arr array.Interface, offsets []int32) 
[]array.Interface {
+       slices := make([]array.Interface, len(offsets)-1)
+       for i := 0; i != len(slices); i++ {
+               slices[i] = array.NewSlice(arr, int64(offsets[i]), 
int64(offsets[i+1]))
+       }
+       return slices
+}
+
+func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, 
length int64) {
+       if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 {
+               lastByte := bitmap.Bytes()[length/8]
+               cts.Equal(lastByte&preceding, lastByte, length, preceding)
+       }
+}
+
+func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 {
+       offsets := make([]int32, slicecount+1)
+       dist := rand.New(rand.NewSource(cts.seed))
+       for i := range offsets {
+               offsets[i] = dist.Int31n(length + 1)
+       }
+       sort.Slice(offsets, func(i, j int) bool { return offsets[i] < 
offsets[j] })
+       return offsets
+}
+
+func (cts *ConcatTestSuite) TestCheckConcat() {
+       for _, sz := range cts.sizes {
+               cts.Run(fmt.Sprintf("size %d", sz), func() {
+                       offsets := cts.offsets(sz, 3)
+                       for _, np := range cts.nullProbs {
+                               cts.Run(fmt.Sprintf("nullprob %0.2f", np), 
func() {
+                                       scopedMem := 
memory.NewCheckedAllocatorScope(cts.mem)
+                                       defer scopedMem.CheckSize(cts.T())
+
+                                       arr := cts.generateArr(int64(sz), np)
+                                       defer arr.Release()
+                                       expected := array.NewSlice(arr, 
int64(offsets[0]), int64(offsets[len(offsets)-1]))
+                                       defer expected.Release()
+
+                                       slices := cts.slices(arr, offsets)
+                                       for _, s := range slices {
+                                               defer s.Release()
+                                       }
+
+                                       actual, err := 
array.Concatenate(slices, cts.mem)
+                                       cts.NoError(err)
+                                       defer actual.Release()
+
+                                       cts.True(array.ArrayEqual(expected, 
actual))
+                                       if len(actual.Data().Buffers()) > 0 {
+                                               if actual.Data().Buffers()[0] 
!= nil {
+                                                       
cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len()))
+                                               }
+                                               if actual.DataType().ID() == 
arrow.BOOL {
+                                                       
cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len()))
+                                               }
+                                       }
+                               })
+                       }
+               })
+       }
+}
+
+func TestOffsetOverflow(t *testing.T) {
+       fakeOffsets := 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, math.MaxInt32}))
+       fakeArr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 
1, []*memory.Buffer{nil, fakeOffsets, memory.NewBufferBytes([]byte{})}, nil, 0, 
0))
+       var err error
+       assert.NotPanics(t, func() {
+               _, err = array.Concatenate([]array.Interface{fakeArr, fakeArr}, 
memory.DefaultAllocator)
+       })
+       assert.EqualError(t, err, "offset overflow while concatenating arrays")
+}
diff --git a/go/arrow/go.mod b/go/arrow/go.mod
index 20f2f42..b71ff89 100644
--- a/go/arrow/go.mod
+++ b/go/arrow/go.mod
@@ -26,9 +26,11 @@ require (
        github.com/klauspost/compress v1.13.1
        github.com/pierrec/lz4/v4 v4.1.8
        github.com/stretchr/testify v1.7.0
+       golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3
        golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect
        golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
        golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
+       gonum.org/v1/gonum v0.9.3
        google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 // 
indirect
        google.golang.org/grpc v1.39.0
        google.golang.org/protobuf v1.27.1
diff --git a/go/arrow/go.sum b/go/arrow/go.sum
index 24da3ea..207218e 100644
--- a/go/arrow/go.sum
+++ b/go/arrow/go.sum
@@ -1,7 +1,12 @@
 cloud.google.com/go v0.26.0/go.mod 
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod 
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod 
h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod 
h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
 github.com/BurntSushi/toml v0.3.1/go.mod 
h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod 
h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod 
h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
 github.com/antihax/optional v1.0.0/go.mod 
h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
+github.com/boombuler/barcode v1.0.0/go.mod 
h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod 
h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/client9/misspell v0.3.4/go.mod 
h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod 
h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -17,7 +22,16 @@ github.com/envoyproxy/go-control-plane 
v0.9.9-0.20201210154907-fd9021fe5dad/go.m
 github.com/envoyproxy/go-control-plane 
v0.9.9-0.20210217033140-668b12f5399d/go.mod 
h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
 github.com/envoyproxy/go-control-plane 
v0.9.9-0.20210512163311-63b5d3c536b0/go.mod 
h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod 
h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod 
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/fogleman/gg v1.3.0/go.mod 
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
 github.com/ghodss/yaml v1.0.0/go.mod 
h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-fonts/dejavu v0.1.0/go.mod 
h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
+github.com/go-fonts/latin-modern v0.2.0/go.mod 
h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
+github.com/go-fonts/liberation v0.1.1/go.mod 
h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
+github.com/go-fonts/stix v0.1.0/go.mod 
h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod 
h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod 
h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod 
h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod 
h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/mock v1.1.1/go.mod 
h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.2.0/go.mod 
h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -48,28 +62,54 @@ github.com/google/go-cmp v0.5.6 
h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
 github.com/google/go-cmp v0.5.6/go.mod 
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/uuid v1.1.2/go.mod 
h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod 
h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod 
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod 
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
 github.com/klauspost/compress v1.13.1 
h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
 github.com/klauspost/compress v1.13.1/go.mod 
h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/phpdave11/gofpdf v1.4.2/go.mod 
h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
+github.com/phpdave11/gofpdi v1.0.12/go.mod 
h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
 github.com/pierrec/lz4/v4 v4.1.8 
h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
 github.com/pierrec/lz4/v4 v4.1.8/go.mod 
h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pkg/errors v0.8.1/go.mod 
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1/go.mod 
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod 
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod 
h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod 
h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod 
h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
 github.com/stretchr/objx v0.1.0/go.mod 
h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod 
h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.5.1/go.mod 
h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.7.0 
h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod 
h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/yuin/goldmark v1.3.5/go.mod 
h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.opentelemetry.io/proto/otlp v0.7.0/go.mod 
h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod 
h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod 
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod 
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod 
h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 
h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod 
h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod 
h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod 
h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod 
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod 
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod 
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod 
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
 golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod 
h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod 
h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
 golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod 
h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod 
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -92,9 +132,11 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod 
h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod 
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod 
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod 
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod 
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod 
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -106,11 +148,14 @@ golang.org/x/text v0.3.3/go.mod 
h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod 
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod 
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod 
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod 
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod 
h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
 golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod 
h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod 
h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod 
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod 
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod 
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
 golang.org/x/tools v0.1.4/go.mod 
h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
@@ -119,6 +164,14 @@ golang.org/x/xerrors 
v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 
h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod 
h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2/go.mod 
h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s=
+gonum.org/v1/gonum v0.9.3/go.mod 
h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 
h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod 
h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod 
h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
+gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
 google.golang.org/appengine v1.1.0/go.mod 
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod 
h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod 
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
@@ -157,3 +210,4 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c 
h1:dUUwHk2QECo/6vqA44rthZ8ie
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod 
h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod 
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
 honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod 
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/go/arrow/internal/testing/gen/random_array_gen.go 
b/go/arrow/internal/testing/gen/random_array_gen.go
new file mode 100644
index 0000000..975117a
--- /dev/null
+++ b/go/arrow/internal/testing/gen/random_array_gen.go
@@ -0,0 +1,307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gen
+
+import (
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/array"
+       "github.com/apache/arrow/go/arrow/bitutil"
+       "github.com/apache/arrow/go/arrow/memory"
+       "golang.org/x/exp/rand"
+       "gonum.org/v1/gonum/stat/distuv"
+)
+
+// RandomArrayGenerator is a struct used for constructing Random Arrow arrays
+// for use with testing.
+type RandomArrayGenerator struct {
+       seed     uint64
+       extra    uint64
+       src      rand.Source
+       seedRand *rand.Rand
+       mem      memory.Allocator
+}
+
+// NewRandomArrayGenerator constructs a new generator with the requested Seed
+func NewRandomArrayGenerator(seed uint64, mem memory.Allocator) 
RandomArrayGenerator {
+       src := rand.NewSource(seed)
+       return RandomArrayGenerator{seed, 0, src, rand.New(src), mem}
+}
+
+// GenerateBitmap generates a bitmap of n bits and stores it into buffer. Prob 
is the probability
+// that a given bit will be zero, with 1-prob being the probability it will be 
1. The return value
+// is the number of bits that were left unset. The assumption being that 
buffer is currently
+// zero initialized as this function does not clear any bits, it only sets 1s.
+func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob 
float64) int64 {
+       count := int64(0)
+       r.extra++
+
+       // bernoulli distribution uses P to determine the probabitiliy of a 0 
or a 1,
+       // which we'll use to generate the bitmap.
+       dist := distuv.Bernoulli{P: 1 - prob, Src: rand.NewSource(r.seed + 
r.extra)}
+       for i := 0; int64(i) < n; i++ {
+               if dist.Rand() != float64(0.0) {
+                       bitutil.SetBit(buffer, i)
+               } else {
+                       count++
+               }
+       }
+
+       return count
+}
+
+func (r *RandomArrayGenerator) Boolean(size int64, prob, nullProb float64) 
array.Interface {
+       buffers := make([]*memory.Buffer, 2)
+       nullcount := int64(0)
+
+       buffers[0] = memory.NewResizableBuffer(r.mem)
+       buffers[0].Resize(int(bitutil.BytesForBits(size)))
+       defer buffers[0].Release()
+       nullcount = r.GenerateBitmap(buffers[0].Bytes(), size, nullProb)
+
+       buffers[1] = memory.NewResizableBuffer(r.mem)
+       buffers[1].Resize(int(bitutil.BytesForBits(size)))
+       defer buffers[1].Release()
+       r.GenerateBitmap(buffers[1].Bytes(), size, prob)
+
+       data := array.NewData(arrow.FixedWidthTypes.Boolean, int(size), 
buffers, nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewBooleanData(data)
+}
+
+func (r *RandomArrayGenerator) baseGenPrimitive(size int64, prob float64, 
byteWidth int) ([]*memory.Buffer, int64) {
+       buffers := make([]*memory.Buffer, 2)
+       nullCount := int64(0)
+
+       buffers[0] = memory.NewResizableBuffer(r.mem)
+       buffers[0].Resize(int(bitutil.BytesForBits(size)))
+       nullCount = r.GenerateBitmap(buffers[0].Bytes(), size, prob)
+
+       buffers[1] = memory.NewResizableBuffer(r.mem)
+       buffers[1].Resize(int(size) * byteWidth)
+
+       return buffers, nullCount
+}
+
+func (r *RandomArrayGenerator) Int8(size int64, min, max int8, prob float64) 
array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Int8SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Int8Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = int8(dist.Intn(int(max)-int(min+1))) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Int8, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewInt8Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64) 
array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Uint8SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Uint8Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = uint8(dist.Intn(int(max-min+1))) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Uint8, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewUint8Data(data)
+}
+
+func (r *RandomArrayGenerator) Int16(size int64, min, max int16, prob float64) 
array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Int16SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Int16Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = int16(dist.Intn(int(max-min+1))) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Int16, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewInt16Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint16(size int64, min, max uint16, prob 
float64) array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Uint16SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Uint16Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = uint16(dist.Intn(int(max-min+1))) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Uint16, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewUint16Data(data)
+}
+
+func (r *RandomArrayGenerator) Int32(size int64, min, max int32, prob float64) 
array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Int32SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = dist.Int31n(max-min+1) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewInt32Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint32(size int64, min, max uint32, prob 
float64) array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Uint32SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Uint32Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = uint32(dist.Uint64n(uint64(max-min+1))) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Uint32, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewUint32Data(data)
+}
+
+func (r *RandomArrayGenerator) Int64(size int64, min, max int64, prob float64) 
array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Int64SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = dist.Int63n(max-min+1) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Int64, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewInt64Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint64(size int64, min, max uint64, prob 
float64) array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Uint64SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Uint64Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = dist.Uint64n(max-min+1) + min
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Uint64, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewUint64Data(data)
+}
+
+func (r *RandomArrayGenerator) Float32(size int64, min, max float32, prob 
float64) array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Float32SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Float32Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = min + dist.Float32()*(max+1-min)
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Float32, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewFloat32Data(data)
+}
+
+func (r *RandomArrayGenerator) Float64(size int64, min, max float64, prob 
float64) array.Interface {
+       buffers, nullcount := r.baseGenPrimitive(size, prob, 
arrow.Float64SizeBytes)
+       for _, b := range buffers {
+               defer b.Release()
+       }
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+       out := arrow.Float64Traits.CastFromBytes(buffers[1].Bytes())
+       for i := int64(0); i < size; i++ {
+               out[i] = dist.NormFloat64() + (max - min)
+       }
+
+       data := array.NewData(arrow.PrimitiveTypes.Float64, int(size), buffers, 
nil, int(nullcount), 0)
+       defer data.Release()
+       return array.NewFloat64Data(data)
+}
+
+func (r *RandomArrayGenerator) String(size int64, minLength, maxLength int, 
nullprob float64) array.Interface {
+       lengths := r.Int32(size, int32(minLength), int32(maxLength), 
nullprob).(*array.Int32)
+       defer lengths.Release()
+
+       bldr := array.NewStringBuilder(r.mem)
+       defer bldr.Release()
+
+       r.extra++
+       dist := rand.New(rand.NewSource(r.seed + r.extra))
+
+       buf := make([]byte, 0, maxLength)
+       gen := func(n int32) string {
+               out := buf[:n]
+               for i := range out {
+                       out[i] = uint8(dist.Int31n(int32('z')-int32('A')+1) + 
int32('A'))
+               }
+               return string(out)
+       }
+
+       for i := 0; i < lengths.Len(); i++ {
+               if lengths.IsValid(i) {
+                       bldr.Append(gen(lengths.Value(i)))
+               } else {
+                       bldr.AppendNull()
+               }
+       }
+
+       return bldr.NewArray()
+}

Reply via email to