This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 47f7e13 ARROW-13967: [Go] Implement Concatenate function for
array.Interface
47f7e13 is described below
commit 47f7e139d4d6727189047be4c56a0f060a5b7eaa
Author: Matthew Topol <[email protected]>
AuthorDate: Tue Sep 21 21:26:10 2021 -0400
ARROW-13967: [Go] Implement Concatenate function for array.Interface
This is needed for adding nested types to the `MakeArrayFromScalar`
function in ARROW-13789 (#11024)
@emkornfield @sbinet @fsaintjacques
Closes #11128 from zeroshade/concat_arrays
Lead-authored-by: Matthew Topol <[email protected]>
Co-authored-by: Matt Topol <[email protected]>
Signed-off-by: Matthew Topol <[email protected]>
---
go/arrow/array/builder.go | 3 +
go/arrow/array/concat.go | 367 ++++++++++++++++++++++
go/arrow/array/concat_test.go | 301 ++++++++++++++++++
go/arrow/go.mod | 2 +
go/arrow/go.sum | 54 ++++
go/arrow/internal/testing/gen/random_array_gen.go | 307 ++++++++++++++++++
6 files changed, 1034 insertions(+)
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 378c608..dbaad11 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -133,6 +133,9 @@ func (b *builder) resize(newBits int, init func(int)) {
}
func (b *builder) reserve(elements int, resize func(int)) {
+ if b.nullBitmap == nil {
+ b.nullBitmap = memory.NewResizableBuffer(b.mem)
+ }
if b.length+elements > b.capacity {
newCap := bitutil.NextPowerOf2(b.length + elements)
resize(newCap)
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
new file mode 100644
index 0000000..de859e5
--- /dev/null
+++ b/go/arrow/array/concat.go
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+ "math"
+ "math/bits"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/bitutil"
+ "github.com/apache/arrow/go/arrow/internal/debug"
+ "github.com/apache/arrow/go/arrow/memory"
+ "golang.org/x/xerrors"
+)
+
+// Concatenate creates a new array.Interface which is the concatenation of the
+// passed in arrays. Returns nil if an error is encountered.
+//
+// The passed in arrays still need to be released manually, and will not be
+// released by this function.
+func Concatenate(arrs []Interface, mem memory.Allocator) (Interface, error) {
+ if len(arrs) == 0 {
+ return nil, xerrors.New("array/concat: must pass at least one
array")
+ }
+
+ // gather Data of inputs
+ data := make([]*Data, len(arrs))
+ for i, ar := range arrs {
+ if !arrow.TypeEqual(ar.DataType(), arrs[0].DataType()) {
+ return nil, xerrors.Errorf("arrays to be concatenated
must be identically typed, but %s and %s were encountered",
+ arrs[0].DataType(), ar.DataType())
+ }
+ data[i] = ar.Data()
+ }
+
+ out, err := concat(data, mem)
+ if err != nil {
+ return nil, err
+ }
+
+ defer out.Release()
+ return MakeFromData(out), nil
+}
+
+// simple struct to hold ranges
+type rng struct {
+ offset, len int
+}
+
+// simple bitmap struct to reference a specific slice of a bitmap where the
range
+// offset and length are in bits
+type bitmap struct {
+ data []byte
+ rng rng
+}
+
+// gather up the bitmaps from the passed in data objects
+func gatherBitmaps(data []*Data, idx int) []bitmap {
+ out := make([]bitmap, len(data))
+ for i, d := range data {
+ if d.buffers[idx] != nil {
+ out[i].data = d.buffers[idx].Bytes()
+ }
+ out[i].rng.offset = d.offset
+ out[i].rng.len = d.length
+ }
+ return out
+}
+
+// gatherFixedBuffers gathers up the buffer objects of the given index,
specifically
+// returning only the slices of the buffers which are relevant to the passed
in arrays
+// in case they are themselves slices of other arrays. nil buffers are ignored
and not
+// in the output slice.
+func gatherFixedBuffers(data []*Data, idx, byteWidth int) []*memory.Buffer {
+ out := make([]*memory.Buffer, 0, len(data))
+ for _, d := range data {
+ buf := d.buffers[idx]
+ if buf == nil {
+ continue
+ }
+
+ out = append(out,
memory.NewBufferBytes(buf.Bytes()[d.offset*byteWidth:(d.offset+d.length)*byteWidth]))
+ }
+ return out
+}
+
+// gatherBuffersFixedWidthType is like gatherFixedBuffers, but uses a datatype
to determine the size
+// to use for determining the byte slice rather than a passed in bytewidth.
+func gatherBuffersFixedWidthType(data []*Data, idx int, fixed
arrow.FixedWidthDataType) []*memory.Buffer {
+ return gatherFixedBuffers(data, idx, fixed.BitWidth()/8)
+}
+
+// gatherBufferRanges requires that len(ranges) == len(data) and returns a
list of buffers
+// which represent the corresponding range of each buffer in the specified
index of each
+// data object.
+func gatherBufferRanges(data []*Data, idx int, ranges []rng) []*memory.Buffer {
+ out := make([]*memory.Buffer, 0, len(data))
+ for i, d := range data {
+ buf := d.buffers[idx]
+ if buf == nil {
+ debug.Assert(ranges[i].len == 0, "misaligned buffer
value ranges")
+ continue
+ }
+
+ out = append(out,
memory.NewBufferBytes(buf.Bytes()[ranges[i].offset:ranges[i].offset+ranges[i].len]))
+ }
+ return out
+}
+
+// gatherChildren gathers the children data objects for child of index idx for
all of the data objects.
+func gatherChildren(data []*Data, idx int) []*Data {
+ return gatherChildrenMultiplier(data, idx, 1)
+}
+
+// gatherChildrenMultiplier gathers the full data slice of the underlying
values from the children data objects
+// such as the values data for a list array so that it can return a slice of
the buffer for a given
+// index into the children.
+func gatherChildrenMultiplier(data []*Data, idx, multiplier int) []*Data {
+ out := make([]*Data, len(data))
+ for i, d := range data {
+ out[i] = NewSliceData(d.childData[idx],
int64(d.offset*multiplier), int64(d.offset+d.length)*int64(multiplier))
+ }
+ return out
+}
+
+// gatherChildrenRanges returns a slice of Data objects which each represent
slices of the given ranges from the
+// child in the specified index from each data object.
+func gatherChildrenRanges(data []*Data, idx int, ranges []rng) []*Data {
+ debug.Assert(len(data) == len(ranges), "mismatched children ranges for
concat")
+ out := make([]*Data, len(data))
+ for i, d := range data {
+ out[i] = NewSliceData(d.childData[idx],
int64(ranges[i].offset), int64(ranges[i].offset+ranges[i].len))
+ }
+ return out
+}
+
+// creates a single contiguous buffer which contains the concatenation of all
of the passed
+// in buffer objects.
+func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer
{
+ outLen := 0
+ for _, b := range bufs {
+ outLen += b.Len()
+ }
+ out := memory.NewResizableBuffer(mem)
+ out.Resize(outLen)
+
+ data := out.Bytes()
+ for _, b := range bufs {
+ copy(data, b.Bytes())
+ data = data[b.Len():]
+ }
+ return out
+}
+
+// concatOffsets creates a single offset buffer which represents the
concatenation of all of the
+// offsets buffers, adjusting the offsets appropriately to their new relative
locations.
+//
+// It also returns the list of ranges that need to be fetched for the
corresponding value buffers
+// to construct the final concatenated value buffer.
+func concatOffsets(buffers []*memory.Buffer, mem memory.Allocator)
(*memory.Buffer, []rng, error) {
+ outLen := 0
+ for _, b := range buffers {
+ outLen += b.Len() / arrow.Int32SizeBytes
+ }
+
+ out := memory.NewResizableBuffer(mem)
+ out.Resize(arrow.Int32Traits.BytesRequired(outLen + 1))
+
+ dst := arrow.Int32Traits.CastFromBytes(out.Bytes())
+ valuesRanges := make([]rng, len(buffers))
+ nextOffset := int32(0)
+ nextElem := int(0)
+ for i, b := range buffers {
+ if b.Len() == 0 {
+ valuesRanges[i].offset = 0
+ valuesRanges[i].len = 0
+ continue
+ }
+
+ // when we gather our buffers, we sliced off the last offset
from the buffer
+ // so that we could count the lengths accurately
+ src := arrow.Int32Traits.CastFromBytes(b.Bytes())
+ valuesRanges[i].offset = int(src[0])
+ // expand our slice to see that final offset
+ expand := src[:len(src)+1]
+ // compute the length of this range by taking the final offset
and subtracting where we started.
+ valuesRanges[i].len = int(expand[len(src)]) -
valuesRanges[i].offset
+
+ if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) {
+ return nil, nil, xerrors.New("offset overflow while
concatenating arrays")
+ }
+
+ // adjust each offset by the difference between our last ending
point and our starting point
+ adj := nextOffset - src[0]
+ for j, o := range src {
+ dst[nextElem+j] = adj + o
+ }
+
+ // the next index for an element in the output buffer
+ nextElem += b.Len() / arrow.Int32SizeBytes
+ // update our offset counter to be the total current length of
our output
+ nextOffset += int32(valuesRanges[i].len)
+ }
+
+ // final offset should point to the end of the data
+ dst[outLen] = nextOffset
+ return out, valuesRanges, nil
+}
+
+// concat is the implementation for actually performing the concatenation of
the *array.Data
+// objects that we can call internally for nested types.
+func concat(data []*Data, mem memory.Allocator) (*Data, error) {
+ out := &Data{refCount: 1, dtype: data[0].dtype, nulls: 0}
+ for _, d := range data {
+ out.length += d.length
+ if out.nulls == UnknownNullCount || d.nulls == UnknownNullCount
{
+ out.nulls = UnknownNullCount
+ continue
+ }
+ out.nulls += d.nulls
+ }
+
+ out.buffers = make([]*memory.Buffer, len(data[0].buffers))
+ if out.nulls != 0 && out.dtype.ID() != arrow.NULL {
+ bm, err := concatBitmaps(gatherBitmaps(data, 0), mem)
+ if err != nil {
+ return nil, err
+ }
+ out.buffers[0] = bm
+ }
+
+ switch dt := out.dtype.(type) {
+ case *arrow.NullType:
+ case *arrow.BooleanType:
+ bm, err := concatBitmaps(gatherBitmaps(data, 1), mem)
+ if err != nil {
+ return nil, err
+ }
+ out.buffers[1] = bm
+ case arrow.FixedWidthDataType:
+ out.buffers[1] =
concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem)
+ case arrow.BinaryDataType:
+ offsetBuffer, valueRanges, err :=
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+ if err != nil {
+ return nil, err
+ }
+ out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2,
valueRanges), mem)
+ out.buffers[1] = offsetBuffer
+ case *arrow.ListType:
+ offsetBuffer, valueRanges, err :=
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+ if err != nil {
+ return nil, err
+ }
+ childData := gatherChildrenRanges(data, 0, valueRanges)
+ for _, c := range childData {
+ defer c.Release()
+ }
+
+ out.buffers[1] = offsetBuffer
+ out.childData = make([]*Data, 1)
+ out.childData[0], err = concat(childData, mem)
+ if err != nil {
+ return nil, err
+ }
+ case *arrow.FixedSizeListType:
+ childData := gatherChildrenMultiplier(data, 0, int(dt.Len()))
+ for _, c := range childData {
+ defer c.Release()
+ }
+
+ children, err := concat(childData, mem)
+ if err != nil {
+ return nil, err
+ }
+ out.childData = []*Data{children}
+ case *arrow.StructType:
+ out.childData = make([]*Data, len(dt.Fields()))
+ for i := range dt.Fields() {
+ children := gatherChildren(data, i)
+ for _, c := range children {
+ defer c.Release()
+ }
+
+ childData, err := concat(children, mem)
+ if err != nil {
+ return nil, err
+ }
+ out.childData[i] = childData
+ }
+ case *arrow.MapType:
+ offsetBuffer, valueRanges, err :=
concatOffsets(gatherFixedBuffers(data, 1, arrow.Int32SizeBytes), mem)
+ if err != nil {
+ return nil, err
+ }
+ childData := gatherChildrenRanges(data, 0, valueRanges)
+ for _, c := range childData {
+ defer c.Release()
+ }
+
+ out.buffers[1] = offsetBuffer
+ out.childData = make([]*Data, 1)
+ out.childData[0], err = concat(childData, mem)
+ if err != nil {
+ return nil, err
+ }
+ default:
+ return nil, xerrors.Errorf("concatenate not implemented for
type %s", dt)
+ }
+
+ return out, nil
+}
+
+// check overflow in the addition, taken from bits.Add but adapted for signed
integers
+// rather than unsigned integers. bits.UintSize will be either 32 or 64 based
on
+// whether our architecture is 32 bit or 64. The operation is the same for
both cases,
+// the only difference is how much we need to shift by 30 for 32 bit and 62
for 64 bit.
+// Thus, bits.UintSize - 2 is how much we shift right by to check if we had an
overflow
+// in the signed addition.
+//
+// First return is the result of the sum, the second return is true if there
was an overflow
+func addOvf(x, y int) (int, bool) {
+ sum := x + y
+ return sum, ((x&y)|((x|y)&^sum))>>(bits.UintSize-2) == 1
+}
+
+// concatenate bitmaps together and return a buffer with the combined bitmaps
+func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) (*memory.Buffer,
error) {
+ var (
+ outlen int
+ overflow bool
+ )
+
+ for _, bm := range bitmaps {
+ if outlen, overflow = addOvf(outlen, bm.rng.len); overflow {
+ return nil, xerrors.New("length overflow when
concatenating arrays")
+ }
+ }
+
+ out := memory.NewResizableBuffer(mem)
+ out.Resize(int(bitutil.BytesForBits(int64(outlen))))
+ dst := out.Bytes()
+
+ offset := 0
+ for _, bm := range bitmaps {
+ if bm.data == nil { // if the bitmap is nil, that implies that
the value is true for all elements
+ bitutil.SetBitsTo(out.Bytes(), int64(offset),
int64(bm.rng.len), true)
+ } else {
+ bitutil.CopyBitmap(bm.data, bm.rng.offset, bm.rng.len,
dst, offset)
+ }
+ offset += bm.rng.len
+ }
+ return out, nil
+}
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
new file mode 100644
index 0000000..9e6ab4a
--- /dev/null
+++ b/go/arrow/array/concat_test.go
@@ -0,0 +1,301 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+ "fmt"
+ "math"
+ "sort"
+ "testing"
+
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/array"
+ "github.com/apache/arrow/go/arrow/bitutil"
+ "github.com/apache/arrow/go/arrow/internal/testing/gen"
+ "github.com/apache/arrow/go/arrow/memory"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/suite"
+ "golang.org/x/exp/rand"
+)
+
+func TestConcatenateValueBuffersNull(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ inputs := make([]array.Interface, 0)
+
+ bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer bldr.Release()
+
+ arr := bldr.NewArray()
+ defer arr.Release()
+ inputs = append(inputs, arr)
+
+ bldr.AppendNull()
+ arr = bldr.NewArray()
+ defer arr.Release()
+ inputs = append(inputs, arr)
+
+ actual, err := array.Concatenate(inputs, mem)
+ assert.NoError(t, err)
+ defer actual.Release()
+
+ assert.True(t, array.ArrayEqual(actual, inputs[1]))
+}
+
+func TestConcatenate(t *testing.T) {
+ tests := []struct {
+ dt arrow.DataType
+ }{
+ {arrow.FixedWidthTypes.Boolean},
+ {arrow.PrimitiveTypes.Int8},
+ {arrow.PrimitiveTypes.Uint8},
+ {arrow.PrimitiveTypes.Int16},
+ {arrow.PrimitiveTypes.Uint16},
+ {arrow.PrimitiveTypes.Int32},
+ {arrow.PrimitiveTypes.Uint32},
+ {arrow.PrimitiveTypes.Int64},
+ {arrow.PrimitiveTypes.Uint64},
+ {arrow.PrimitiveTypes.Float32},
+ {arrow.PrimitiveTypes.Float64},
+ {arrow.BinaryTypes.String},
+ {arrow.ListOf(arrow.PrimitiveTypes.Int8)},
+ {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)},
+ {arrow.StructOf()},
+ {arrow.MapOf(arrow.PrimitiveTypes.Uint16,
arrow.PrimitiveTypes.Int8)},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.dt.Name(), func(t *testing.T) {
+ suite.Run(t, &ConcatTestSuite{
+ seed: 0xdeadbeef,
+ dt: tt.dt,
+ nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0},
+ sizes: []int32{0, 1, 2, 4, 16, 31, 1234},
+ })
+ })
+ }
+}
+
+type ConcatTestSuite struct {
+ suite.Suite
+
+ seed uint64
+ rng gen.RandomArrayGenerator
+ dt arrow.DataType
+
+ nullProbs []float64
+ sizes []int32
+
+ mem *memory.CheckedAllocator
+}
+
+func (cts *ConcatTestSuite) SetupSuite() {
+ cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator)
+ cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem)
+}
+
+func (cts *ConcatTestSuite) TearDownSuite() {
+ cts.mem.AssertSize(cts.T(), 0)
+}
+
+func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64)
array.Interface {
+ switch cts.dt.ID() {
+ case arrow.BOOL:
+ return cts.rng.Boolean(size, 0.5, nullprob)
+ case arrow.INT8:
+ return cts.rng.Int8(size, 0, 127, nullprob)
+ case arrow.UINT8:
+ return cts.rng.Uint8(size, 0, 127, nullprob)
+ case arrow.INT16:
+ return cts.rng.Int16(size, 0, 127, nullprob)
+ case arrow.UINT16:
+ return cts.rng.Uint16(size, 0, 127, nullprob)
+ case arrow.INT32:
+ return cts.rng.Int32(size, 0, 127, nullprob)
+ case arrow.UINT32:
+ return cts.rng.Uint32(size, 0, 127, nullprob)
+ case arrow.INT64:
+ return cts.rng.Int64(size, 0, 127, nullprob)
+ case arrow.UINT64:
+ return cts.rng.Uint64(size, 0, 127, nullprob)
+ case arrow.FLOAT32:
+ return cts.rng.Float32(size, 0, 127, nullprob)
+ case arrow.FLOAT64:
+ return cts.rng.Float64(size, 0, 127, nullprob)
+ case arrow.NULL:
+ return array.NewNull(int(size))
+ case arrow.STRING:
+ return cts.rng.String(size, 0, 15, nullprob)
+ case arrow.LIST:
+ valuesSize := size * 4
+ values := cts.rng.Int8(valuesSize, 0, 127,
nullprob).(*array.Int8)
+ defer values.Release()
+ offsetsVector := cts.offsets(int32(valuesSize), int32(size))
+ // ensure the first and last offsets encompass the whole values
+ offsetsVector[0] = 0
+ offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
+
+ bldr := array.NewListBuilder(memory.DefaultAllocator,
arrow.PrimitiveTypes.Int8)
+ defer bldr.Release()
+
+ valid := make([]bool, len(offsetsVector)-1)
+ for i := range valid {
+ valid[i] = true
+ }
+ bldr.AppendValues(offsetsVector, valid)
+ vb := bldr.ValueBuilder().(*array.Int8Builder)
+ for i := 0; i < values.Len(); i++ {
+ if values.IsValid(i) {
+ vb.Append(values.Value(i))
+ } else {
+ vb.AppendNull()
+ }
+ }
+ return bldr.NewArray()
+ case arrow.FIXED_SIZE_LIST:
+ const listsize = 3
+ valuesSize := size * listsize
+ values := cts.rng.Int8(valuesSize, 0, 127, nullprob)
+ defer values.Release()
+
+ data := array.NewData(arrow.FixedSizeListOf(listsize,
arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil},
[]*array.Data{values.Data()}, 0, 0)
+ defer data.Release()
+ return array.MakeFromData(data)
+ case arrow.STRUCT:
+ foo := cts.rng.Int8(size, 0, 127, nullprob)
+ defer foo.Release()
+ bar := cts.rng.Float64(size, 0, 127, nullprob)
+ defer bar.Release()
+ baz := cts.rng.Boolean(size, 0.5, nullprob)
+ defer baz.Release()
+
+ data := array.NewData(arrow.StructOf(
+ arrow.Field{Name: "foo", Type: foo.DataType(),
Nullable: true},
+ arrow.Field{Name: "bar", Type: bar.DataType(),
Nullable: true},
+ arrow.Field{Name: "baz", Type: baz.DataType(),
Nullable: true}),
+ int(size), []*memory.Buffer{nil},
[]*array.Data{foo.Data(), bar.Data(), baz.Data()}, 0, 0)
+ defer data.Release()
+ return array.NewStructData(data)
+ case arrow.MAP:
+ valuesSize := size * 4
+ keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16)
+ defer keys.Release()
+ values := cts.rng.Int8(valuesSize, 0, 127,
nullprob).(*array.Int8)
+ defer values.Release()
+
+ offsetsVector := cts.offsets(int32(valuesSize), int32(size))
+ offsetsVector[0] = 0
+ offsetsVector[len(offsetsVector)-1] = int32(valuesSize)
+
+ bldr := array.NewMapBuilder(memory.DefaultAllocator,
arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false)
+ defer bldr.Release()
+
+ kb := bldr.KeyBuilder().(*array.Uint16Builder)
+ vb := bldr.ItemBuilder().(*array.Int8Builder)
+
+ valid := make([]bool, len(offsetsVector)-1)
+ for i := range valid {
+ valid[i] = true
+ }
+ bldr.AppendValues(offsetsVector, valid)
+ for i := 0; i < int(valuesSize); i++ {
+ kb.Append(keys.Value(i))
+ if values.IsValid(i) {
+ vb.Append(values.Value(i))
+ } else {
+ vb.AppendNull()
+ }
+ }
+ return bldr.NewArray()
+ default:
+ return nil
+ }
+}
+
+func (cts *ConcatTestSuite) slices(arr array.Interface, offsets []int32)
[]array.Interface {
+ slices := make([]array.Interface, len(offsets)-1)
+ for i := 0; i != len(slices); i++ {
+ slices[i] = array.NewSlice(arr, int64(offsets[i]),
int64(offsets[i+1]))
+ }
+ return slices
+}
+
+func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer,
length int64) {
+ if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 {
+ lastByte := bitmap.Bytes()[length/8]
+ cts.Equal(lastByte&preceding, lastByte, length, preceding)
+ }
+}
+
+func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 {
+ offsets := make([]int32, slicecount+1)
+ dist := rand.New(rand.NewSource(cts.seed))
+ for i := range offsets {
+ offsets[i] = dist.Int31n(length + 1)
+ }
+ sort.Slice(offsets, func(i, j int) bool { return offsets[i] <
offsets[j] })
+ return offsets
+}
+
+func (cts *ConcatTestSuite) TestCheckConcat() {
+ for _, sz := range cts.sizes {
+ cts.Run(fmt.Sprintf("size %d", sz), func() {
+ offsets := cts.offsets(sz, 3)
+ for _, np := range cts.nullProbs {
+ cts.Run(fmt.Sprintf("nullprob %0.2f", np),
func() {
+ scopedMem :=
memory.NewCheckedAllocatorScope(cts.mem)
+ defer scopedMem.CheckSize(cts.T())
+
+ arr := cts.generateArr(int64(sz), np)
+ defer arr.Release()
+ expected := array.NewSlice(arr,
int64(offsets[0]), int64(offsets[len(offsets)-1]))
+ defer expected.Release()
+
+ slices := cts.slices(arr, offsets)
+ for _, s := range slices {
+ defer s.Release()
+ }
+
+ actual, err :=
array.Concatenate(slices, cts.mem)
+ cts.NoError(err)
+ defer actual.Release()
+
+ cts.True(array.ArrayEqual(expected,
actual))
+ if len(actual.Data().Buffers()) > 0 {
+ if actual.Data().Buffers()[0]
!= nil {
+
cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len()))
+ }
+ if actual.DataType().ID() ==
arrow.BOOL {
+
cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len()))
+ }
+ }
+ })
+ }
+ })
+ }
+}
+
+func TestOffsetOverflow(t *testing.T) {
+ fakeOffsets :=
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, math.MaxInt32}))
+ fakeArr := array.NewStringData(array.NewData(arrow.BinaryTypes.String,
1, []*memory.Buffer{nil, fakeOffsets, memory.NewBufferBytes([]byte{})}, nil, 0,
0))
+ var err error
+ assert.NotPanics(t, func() {
+ _, err = array.Concatenate([]array.Interface{fakeArr, fakeArr},
memory.DefaultAllocator)
+ })
+ assert.EqualError(t, err, "offset overflow while concatenating arrays")
+}
diff --git a/go/arrow/go.mod b/go/arrow/go.mod
index 20f2f42..b71ff89 100644
--- a/go/arrow/go.mod
+++ b/go/arrow/go.mod
@@ -26,9 +26,11 @@ require (
github.com/klauspost/compress v1.13.1
github.com/pierrec/lz4/v4 v4.1.8
github.com/stretchr/testify v1.7.0
+ golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3
golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
+ gonum.org/v1/gonum v0.9.3
google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79 //
indirect
google.golang.org/grpc v1.39.0
google.golang.org/protobuf v1.27.1
diff --git a/go/arrow/go.sum b/go/arrow/go.sum
index 24da3ea..207218e 100644
--- a/go/arrow/go.sum
+++ b/go/arrow/go.sum
@@ -1,7 +1,12 @@
cloud.google.com/go v0.26.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod
h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod
h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod
h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
github.com/BurntSushi/toml v0.3.1/go.mod
h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod
h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod
h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/antihax/optional v1.0.0/go.mod
h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
+github.com/boombuler/barcode v1.0.0/go.mod
h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod
h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/client9/misspell v0.3.4/go.mod
h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod
h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@@ -17,7 +22,16 @@ github.com/envoyproxy/go-control-plane
v0.9.9-0.20201210154907-fd9021fe5dad/go.m
github.com/envoyproxy/go-control-plane
v0.9.9-0.20210217033140-668b12f5399d/go.mod
h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane
v0.9.9-0.20210512163311-63b5d3c536b0/go.mod
h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod
h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/fogleman/gg v1.3.0/go.mod
h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
github.com/ghodss/yaml v1.0.0/go.mod
h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/go-fonts/dejavu v0.1.0/go.mod
h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
+github.com/go-fonts/latin-modern v0.2.0/go.mod
h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
+github.com/go-fonts/liberation v0.1.1/go.mod
h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
+github.com/go-fonts/stix v0.1.0/go.mod
h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod
h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod
h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod
h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod
h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/mock v1.1.1/go.mod
h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod
h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@@ -48,28 +62,54 @@ github.com/google/go-cmp v0.5.6
h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.1.2/go.mod
h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod
h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod
h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/klauspost/compress v1.13.1
h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
github.com/klauspost/compress v1.13.1/go.mod
h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
+github.com/phpdave11/gofpdf v1.4.2/go.mod
h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
+github.com/phpdave11/gofpdi v1.0.12/go.mod
h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
github.com/pierrec/lz4/v4 v4.1.8
h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
github.com/pierrec/lz4/v4 v4.1.8/go.mod
h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pkg/errors v0.8.1/go.mod
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1/go.mod
h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod
h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/fastuuid v1.2.0/go.mod
h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
+github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod
h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
github.com/stretchr/objx v0.1.0/go.mod
h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod
h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod
h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0
h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod
h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/yuin/goldmark v1.3.5/go.mod
h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod
h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod
h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod
h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod
h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3
h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod
h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod
h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod
h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod
h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod
h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod
h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod
h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod
h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -92,9 +132,11 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod
h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod
h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -106,11 +148,14 @@ golang.org/x/text v0.3.3/go.mod
h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod
h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod
h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod
h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod
h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod
h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.1.4/go.mod
h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
@@ -119,6 +164,14 @@ golang.org/x/xerrors
v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod
h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2/go.mod
h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s=
+gonum.org/v1/gonum v0.9.3/go.mod
h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0
h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod
h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod
h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
+gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
google.golang.org/appengine v1.1.0/go.mod
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod
h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
@@ -157,3 +210,4 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
h1:dUUwHk2QECo/6vqA44rthZ8ie
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod
h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod
h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/go/arrow/internal/testing/gen/random_array_gen.go
b/go/arrow/internal/testing/gen/random_array_gen.go
new file mode 100644
index 0000000..975117a
--- /dev/null
+++ b/go/arrow/internal/testing/gen/random_array_gen.go
@@ -0,0 +1,307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gen
+
+import (
+ "github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/array"
+ "github.com/apache/arrow/go/arrow/bitutil"
+ "github.com/apache/arrow/go/arrow/memory"
+ "golang.org/x/exp/rand"
+ "gonum.org/v1/gonum/stat/distuv"
+)
+
+// RandomArrayGenerator is a struct used for constructing Random Arrow arrays
+// for use with testing.
+type RandomArrayGenerator struct {
+ seed uint64
+ extra uint64
+ src rand.Source
+ seedRand *rand.Rand
+ mem memory.Allocator
+}
+
+// NewRandomArrayGenerator constructs a new generator with the requested Seed
+func NewRandomArrayGenerator(seed uint64, mem memory.Allocator)
RandomArrayGenerator {
+ src := rand.NewSource(seed)
+ return RandomArrayGenerator{seed, 0, src, rand.New(src), mem}
+}
+
+// GenerateBitmap generates a bitmap of n bits and stores it into buffer. Prob
is the probability
+// that a given bit will be zero, with 1-prob being the probability it will be
1. The return value
+// is the number of bits that were left unset. The assumption being that
buffer is currently
+// zero initialized as this function does not clear any bits, it only sets 1s.
+func (r *RandomArrayGenerator) GenerateBitmap(buffer []byte, n int64, prob
float64) int64 {
+ count := int64(0)
+ r.extra++
+
+ // bernoulli distribution uses P to determine the probabitiliy of a 0
or a 1,
+ // which we'll use to generate the bitmap.
+ dist := distuv.Bernoulli{P: 1 - prob, Src: rand.NewSource(r.seed +
r.extra)}
+ for i := 0; int64(i) < n; i++ {
+ if dist.Rand() != float64(0.0) {
+ bitutil.SetBit(buffer, i)
+ } else {
+ count++
+ }
+ }
+
+ return count
+}
+
+func (r *RandomArrayGenerator) Boolean(size int64, prob, nullProb float64)
array.Interface {
+ buffers := make([]*memory.Buffer, 2)
+ nullcount := int64(0)
+
+ buffers[0] = memory.NewResizableBuffer(r.mem)
+ buffers[0].Resize(int(bitutil.BytesForBits(size)))
+ defer buffers[0].Release()
+ nullcount = r.GenerateBitmap(buffers[0].Bytes(), size, nullProb)
+
+ buffers[1] = memory.NewResizableBuffer(r.mem)
+ buffers[1].Resize(int(bitutil.BytesForBits(size)))
+ defer buffers[1].Release()
+ r.GenerateBitmap(buffers[1].Bytes(), size, prob)
+
+ data := array.NewData(arrow.FixedWidthTypes.Boolean, int(size),
buffers, nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewBooleanData(data)
+}
+
+func (r *RandomArrayGenerator) baseGenPrimitive(size int64, prob float64,
byteWidth int) ([]*memory.Buffer, int64) {
+ buffers := make([]*memory.Buffer, 2)
+ nullCount := int64(0)
+
+ buffers[0] = memory.NewResizableBuffer(r.mem)
+ buffers[0].Resize(int(bitutil.BytesForBits(size)))
+ nullCount = r.GenerateBitmap(buffers[0].Bytes(), size, prob)
+
+ buffers[1] = memory.NewResizableBuffer(r.mem)
+ buffers[1].Resize(int(size) * byteWidth)
+
+ return buffers, nullCount
+}
+
+func (r *RandomArrayGenerator) Int8(size int64, min, max int8, prob float64)
array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Int8SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Int8Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = int8(dist.Intn(int(max)-int(min+1))) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Int8, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewInt8Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint8(size int64, min, max uint8, prob float64)
array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Uint8SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Uint8Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = uint8(dist.Intn(int(max-min+1))) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Uint8, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewUint8Data(data)
+}
+
+func (r *RandomArrayGenerator) Int16(size int64, min, max int16, prob float64)
array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Int16SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Int16Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = int16(dist.Intn(int(max-min+1))) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Int16, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewInt16Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint16(size int64, min, max uint16, prob
float64) array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Uint16SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Uint16Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = uint16(dist.Intn(int(max-min+1))) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Uint16, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewUint16Data(data)
+}
+
+func (r *RandomArrayGenerator) Int32(size int64, min, max int32, prob float64)
array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Int32SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = dist.Int31n(max-min+1) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Int32, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewInt32Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint32(size int64, min, max uint32, prob
float64) array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Uint32SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Uint32Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = uint32(dist.Uint64n(uint64(max-min+1))) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Uint32, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewUint32Data(data)
+}
+
+func (r *RandomArrayGenerator) Int64(size int64, min, max int64, prob float64)
array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Int64SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = dist.Int63n(max-min+1) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Int64, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewInt64Data(data)
+}
+
+func (r *RandomArrayGenerator) Uint64(size int64, min, max uint64, prob
float64) array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Uint64SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Uint64Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = dist.Uint64n(max-min+1) + min
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Uint64, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewUint64Data(data)
+}
+
+func (r *RandomArrayGenerator) Float32(size int64, min, max float32, prob
float64) array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Float32SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Float32Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = min + dist.Float32()*(max+1-min)
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Float32, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewFloat32Data(data)
+}
+
+func (r *RandomArrayGenerator) Float64(size int64, min, max float64, prob
float64) array.Interface {
+ buffers, nullcount := r.baseGenPrimitive(size, prob,
arrow.Float64SizeBytes)
+ for _, b := range buffers {
+ defer b.Release()
+ }
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+ out := arrow.Float64Traits.CastFromBytes(buffers[1].Bytes())
+ for i := int64(0); i < size; i++ {
+ out[i] = dist.NormFloat64() + (max - min)
+ }
+
+ data := array.NewData(arrow.PrimitiveTypes.Float64, int(size), buffers,
nil, int(nullcount), 0)
+ defer data.Release()
+ return array.NewFloat64Data(data)
+}
+
+func (r *RandomArrayGenerator) String(size int64, minLength, maxLength int,
nullprob float64) array.Interface {
+ lengths := r.Int32(size, int32(minLength), int32(maxLength),
nullprob).(*array.Int32)
+ defer lengths.Release()
+
+ bldr := array.NewStringBuilder(r.mem)
+ defer bldr.Release()
+
+ r.extra++
+ dist := rand.New(rand.NewSource(r.seed + r.extra))
+
+ buf := make([]byte, 0, maxLength)
+ gen := func(n int32) string {
+ out := buf[:n]
+ for i := range out {
+ out[i] = uint8(dist.Int31n(int32('z')-int32('A')+1) +
int32('A'))
+ }
+ return string(out)
+ }
+
+ for i := 0; i < lengths.Len(); i++ {
+ if lengths.IsValid(i) {
+ bldr.Append(gen(lengths.Value(i)))
+ } else {
+ bldr.AppendNull()
+ }
+ }
+
+ return bldr.NewArray()
+}