This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 771ad83a6d ARROW-16831: [Go] panic in ipc.Reader when string array 
offsets are invalid
771ad83a6d is described below

commit 771ad83a6d11aa32f58434c02c73646837412fc4
Author: Chris Casola <[email protected]>
AuthorDate: Thu Jun 16 13:03:17 2022 -0400

    ARROW-16831: [Go] panic in ipc.Reader when string array offsets are invalid
    
    Add a check for invalid offsets in a string array and panic. This
    prevents panic'ing later when accessing the column data or
    attempting to write it with ipc.Writer.
    
    Closes #13381 from chriscasola/choff--go-validate-string-offsets
    
    Authored-by: Chris Casola <[email protected]>
    Signed-off-by: Matthew Topol <[email protected]>
---
 go/arrow/array/binary.go      | 13 ++++++++
 go/arrow/array/binary_test.go | 71 +++++++++++++++++++++++++++++++++++++++++++
 go/arrow/array/concat.go      | 16 +++++++---
 go/arrow/array/concat_test.go | 11 -------
 go/arrow/array/string.go      | 13 ++++++++
 go/arrow/array/string_test.go | 71 +++++++++++++++++++++++++++++++++++++++++++
 go/arrow/ipc/reader.go        | 15 +++++++--
 go/arrow/ipc/reader_test.go   | 58 +++++++++++++++++++++++++++++++++++
 go/arrow/ipc/writer.go        | 10 ++++--
 go/arrow/ipc/writer_test.go   | 23 ++++++++++++++
 10 files changed, 281 insertions(+), 20 deletions(-)

diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index e33181cf1b..b2281f4df9 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -116,6 +116,19 @@ func (a *Binary) setData(data *Data) {
        if valueOffsets := data.buffers[1]; valueOffsets != nil {
                a.valueOffsets = 
arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes())
        }
+
+       if a.array.data.length < 1 {
+               return
+       }
+
+       expNumOffsets := a.array.data.offset + a.array.data.length + 1
+       if len(a.valueOffsets) < expNumOffsets {
+               panic(fmt.Errorf("arrow/array: binary offset buffer must have 
at least %d values", expNumOffsets))
+       }
+
+       if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) {
+               panic("arrow/array: binary offsets out of bounds of data 
buffer")
+       }
 }
 
 func (a *Binary) getOneForMarshal(i int) interface{} {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index ddcc903178..776d679acf 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -23,6 +23,7 @@ import (
        "github.com/stretchr/testify/assert"
 
        "github.com/apache/arrow/go/v9/arrow"
+       "github.com/apache/arrow/go/v9/arrow/bitutil"
        "github.com/apache/arrow/go/v9/arrow/memory"
 )
 
@@ -428,3 +429,73 @@ func TestBinaryStringer(t *testing.T) {
                t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want)
        }
 }
+
+func TestBinaryInvalidOffsets(t *testing.T) {
+       const expectedPanic = "arrow/array: binary offsets out of bounds of 
data buffer"
+
+       makeBuffers := func(valids []bool, offsets []int32, data string) 
[]*memory.Buffer {
+               offsetBuf := 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))
+               var nullBufBytes []byte
+               var nullBuf *memory.Buffer
+               if valids != nil {
+                       nullBufBytes = make([]byte, 
bitutil.BytesForBits(int64(len(valids))))
+                       for i, v := range valids {
+                               bitutil.SetBitTo(nullBufBytes, i, v)
+                       }
+                       nullBuf = memory.NewBufferBytes(nullBufBytes)
+               }
+               return []*memory.Buffer{nullBuf, offsetBuf, 
memory.NewBufferBytes([]byte(data))}
+       }
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{}, "")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, 
nil, 0, 0))
+       }, "empty array with no offsets")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 5}, "")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, 
nil, 0, 0))
+       }, "empty array, offsets ignored")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, 
nil, 0, 2))
+       }, "data has offset and value offsets are valid")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678")
+               arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, 
buffers, nil, 0, 0))
+               if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) 
{
+                       assert.EqualValues(t, "012", arr.Value(0))
+                       assert.EqualValues(t, "345", arr.Value(1))
+                       assert.EqualValues(t, "678", arr.Value(2))
+                       assert.EqualValues(t, "", arr.Value(3), "trailing empty 
binary value will have offset past end")
+               }
+       }, "simple valid case")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers([]bool{true, false, true, false}, 
[]int32{0, 3, 4, 9, 9}, "oooabcdef")
+               arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, 
buffers, nil, 2, 0))
+               if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, 
arr.NullN()) {
+                       assert.EqualValues(t, "ooo", arr.Value(0))
+                       assert.True(t, arr.IsNull(1))
+                       assert.EqualValues(t, "bcdef", arr.Value(2))
+                       assert.True(t, arr.IsNull(3))
+               }
+       }, "simple valid case with nulls")
+
+       assert.PanicsWithValue(t, expectedPanic, func() {
+               buffers := makeBuffers(nil, []int32{0, 5}, "abc")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, 
nil, 0, 0))
+       }, "last offset is overflowing")
+
+       assert.PanicsWithError(t, "arrow/array: binary offset buffer must have 
at least 2 values", func() {
+               buffers := makeBuffers(nil, []int32{0}, "abc")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, 
nil, 0, 0))
+       }, "last offset is missing")
+
+       assert.PanicsWithValue(t, expectedPanic, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef")
+               NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, 
nil, 0, 2))
+       }, "data has offset and value offset is overflowing")
+}
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
index 38344cfda8..2842757859 100644
--- a/go/arrow/array/concat.go
+++ b/go/arrow/array/concat.go
@@ -17,6 +17,7 @@
 package array
 
 import (
+       "errors"
        "fmt"
        "math"
        "math/bits"
@@ -25,7 +26,6 @@ import (
        "github.com/apache/arrow/go/v9/arrow/bitutil"
        "github.com/apache/arrow/go/v9/arrow/internal/debug"
        "github.com/apache/arrow/go/v9/arrow/memory"
-       "golang.org/x/xerrors"
 )
 
 // Concatenate creates a new arrow.Array which is the concatenation of the
@@ -33,11 +33,17 @@ import (
 //
 // The passed in arrays still need to be released manually, and will not be
 // released by this function.
-func Concatenate(arrs []arrow.Array, mem memory.Allocator) (arrow.Array, 
error) {
+func Concatenate(arrs []arrow.Array, mem memory.Allocator) (result 
arrow.Array, err error) {
        if len(arrs) == 0 {
-               return nil, xerrors.New("array/concat: must pass at least one 
array")
+               return nil, errors.New("array/concat: must pass at least one 
array")
        }
 
+       defer func() {
+               if pErr := recover(); pErr != nil {
+                       err = fmt.Errorf("arrow/concat: unknown error: %v", 
pErr)
+               }
+       }()
+
        // gather Data of inputs
        data := make([]arrow.ArrayData, len(arrs))
        for i, ar := range arrs {
@@ -202,7 +208,7 @@ func concatOffsets(buffers []*memory.Buffer, mem 
memory.Allocator) (*memory.Buff
                valuesRanges[i].len = int(expand[len(src)]) - 
valuesRanges[i].offset
 
                if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) {
-                       return nil, nil, xerrors.New("offset overflow while 
concatenating arrays")
+                       return nil, nil, errors.New("offset overflow while 
concatenating arrays")
                }
 
                // adjust each offset by the difference between our last ending 
point and our starting point
@@ -347,7 +353,7 @@ func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) 
(*memory.Buffer, erro
 
        for _, bm := range bitmaps {
                if outlen, overflow = addOvf(outlen, bm.rng.len); overflow {
-                       return nil, xerrors.New("length overflow when 
concatenating arrays")
+                       return nil, errors.New("length overflow when 
concatenating arrays")
                }
        }
 
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
index 4ad1abbf2d..8beee43306 100644
--- a/go/arrow/array/concat_test.go
+++ b/go/arrow/array/concat_test.go
@@ -18,7 +18,6 @@ package array_test
 
 import (
        "fmt"
-       "math"
        "sort"
        "testing"
 
@@ -289,13 +288,3 @@ func (cts *ConcatTestSuite) TestCheckConcat() {
                })
        }
 }
-
-func TestOffsetOverflow(t *testing.T) {
-       fakeOffsets := 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, math.MaxInt32}))
-       fakeArr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 
1, []*memory.Buffer{nil, fakeOffsets, memory.NewBufferBytes([]byte{})}, nil, 0, 
0))
-       var err error
-       assert.NotPanics(t, func() {
-               _, err = array.Concatenate([]arrow.Array{fakeArr, fakeArr}, 
memory.DefaultAllocator)
-       })
-       assert.EqualError(t, err, "offset overflow while concatenating arrays")
-}
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index ee7a07813e..4c033118a6 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -113,6 +113,19 @@ func (a *String) setData(data *Data) {
        if offsets := data.buffers[1]; offsets != nil {
                a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes())
        }
+
+       if a.array.data.length < 1 {
+               return
+       }
+
+       expNumOffsets := a.array.data.offset + a.array.data.length + 1
+       if len(a.offsets) < expNumOffsets {
+               panic(fmt.Errorf("arrow/array: string offset buffer must have 
at least %d values", expNumOffsets))
+       }
+
+       if int(a.offsets[expNumOffsets-1]) > len(a.values) {
+               panic("arrow/array: string offsets out of bounds of data 
buffer")
+       }
 }
 
 func (a *String) getOneForMarshal(i int) interface{} {
diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go
index 2d841eeba9..f0e0325772 100644
--- a/go/arrow/array/string_test.go
+++ b/go/arrow/array/string_test.go
@@ -23,6 +23,7 @@ import (
 
        "github.com/apache/arrow/go/v9/arrow"
        "github.com/apache/arrow/go/v9/arrow/array"
+       "github.com/apache/arrow/go/v9/arrow/bitutil"
        "github.com/apache/arrow/go/v9/arrow/memory"
        "github.com/stretchr/testify/assert"
 )
@@ -206,3 +207,73 @@ func TestStringReset(t *testing.T) {
 
        assert.Equal(t, "string1", string2.Value(0))
 }
+
+func TestStringInvalidOffsets(t *testing.T) {
+       const expectedPanic = "arrow/array: string offsets out of bounds of 
data buffer"
+
+       makeBuffers := func(valids []bool, offsets []int32, data string) 
[]*memory.Buffer {
+               offsetBuf := 
memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))
+               var nullBufBytes []byte
+               var nullBuf *memory.Buffer
+               if valids != nil {
+                       nullBufBytes = make([]byte, 
bitutil.BytesForBits(int64(len(valids))))
+                       for i, v := range valids {
+                               bitutil.SetBitTo(nullBufBytes, i, v)
+                       }
+                       nullBuf = memory.NewBufferBytes(nullBufBytes)
+               }
+               return []*memory.Buffer{nullBuf, offsetBuf, 
memory.NewBufferBytes([]byte(data))}
+       }
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{}, "")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, 
buffers, nil, 0, 0))
+       }, "empty array with no offsets")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 5}, "")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, 
buffers, nil, 0, 0))
+       }, "empty array, offsets ignored")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, 
buffers, nil, 0, 2))
+       }, "data has offset and value offsets are valid")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678")
+               arr := 
array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 0, 
0))
+               if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) 
{
+                       assert.Equal(t, "012", arr.Value(0))
+                       assert.Equal(t, "345", arr.Value(1))
+                       assert.Equal(t, "678", arr.Value(2))
+                       assert.Equal(t, "", arr.Value(3), "trailing empty 
string value will have offset past end")
+               }
+       }, "simple valid case")
+
+       assert.NotPanics(t, func() {
+               buffers := makeBuffers([]bool{true, false, true, false}, 
[]int32{0, 3, 4, 9, 9}, "oooabcdef")
+               arr := 
array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 2, 
0))
+               if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, 
arr.NullN()) {
+                       assert.Equal(t, "ooo", arr.Value(0))
+                       assert.True(t, arr.IsNull(1))
+                       assert.Equal(t, "bcdef", arr.Value(2))
+                       assert.True(t, arr.IsNull(3))
+               }
+       }, "simple valid case with nulls")
+
+       assert.PanicsWithValue(t, expectedPanic, func() {
+               buffers := makeBuffers(nil, []int32{0, 5}, "abc")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, 
buffers, nil, 0, 0))
+       }, "last offset is overflowing")
+
+       assert.PanicsWithError(t, "arrow/array: string offset buffer must have 
at least 2 values", func() {
+               buffers := makeBuffers(nil, []int32{0}, "abc")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, 
buffers, nil, 0, 0))
+       }, "last offset is missing")
+
+       assert.PanicsWithValue(t, expectedPanic, func() {
+               buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef")
+               array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, 
buffers, nil, 0, 2))
+       }, "data has offset and value offset is overflowing")
+}
diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go
index 200160f7a9..69f1097eac 100644
--- a/go/arrow/ipc/reader.go
+++ b/go/arrow/ipc/reader.go
@@ -54,7 +54,12 @@ type Reader struct {
 // NewReaderFromMessageReader allows constructing a new reader object with the
 // provided MessageReader allowing injection of reading messages other than
 // by simple streaming bytes such as Arrow Flight which receives a protobuf 
message
-func NewReaderFromMessageReader(r MessageReader, opts ...Option) (*Reader, 
error) {
+func NewReaderFromMessageReader(r MessageReader, opts ...Option) (reader 
*Reader, err error) {
+       defer func() {
+               if pErr := recover(); pErr != nil {
+                       err = fmt.Errorf("arrow/ipc: unknown error while 
reading: %v", pErr)
+               }
+       }()
        cfg := newConfig()
        for _, opt := range opts {
                opt(cfg)
@@ -68,7 +73,7 @@ func NewReaderFromMessageReader(r MessageReader, opts 
...Option) (*Reader, error
                mem:  cfg.alloc,
        }
 
-       err := rr.readSchema(cfg.schema)
+       err = rr.readSchema(cfg.schema)
        if err != nil {
                return nil, fmt.Errorf("arrow/ipc: could not read schema from 
stream: %w", err)
        }
@@ -186,6 +191,12 @@ func (r *Reader) getInitialDicts() bool {
 }
 
 func (r *Reader) next() bool {
+       defer func() {
+               if pErr := recover(); pErr != nil {
+                       r.err = fmt.Errorf("arrow/ipc: unknown error while 
reading: %v", pErr)
+               }
+       }()
+
        if !r.readInitialDicts && !r.getInitialDicts() {
                return false
        }
diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go
new file mode 100644
index 0000000000..76ef0c139a
--- /dev/null
+++ b/go/arrow/ipc/reader_test.go
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipc
+
+import (
+       "bytes"
+       "testing"
+
+       "github.com/apache/arrow/go/v9/arrow"
+       "github.com/apache/arrow/go/v9/arrow/array"
+       "github.com/apache/arrow/go/v9/arrow/memory"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
+)
+
+func TestReaderCatchPanic(t *testing.T) {
+       alloc := memory.NewGoAllocator()
+       schema := arrow.NewSchema([]arrow.Field{
+               {Name: "s", Type: arrow.BinaryTypes.String},
+       }, nil)
+
+       b := array.NewRecordBuilder(alloc, schema)
+       defer b.Release()
+
+       b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", 
"baz"}, nil)
+       rec := b.NewRecord()
+       defer rec.Release()
+
+       buf := new(bytes.Buffer)
+       writer := NewWriter(buf, WithSchema(schema))
+       require.NoError(t, writer.Write(rec))
+
+       for i := buf.Len() - 100; i < buf.Len(); i++ {
+               buf.Bytes()[i] = 0
+       }
+
+       reader, err := NewReader(buf)
+       require.NoError(t, err)
+
+       _, err = reader.Read()
+       if assert.Error(t, err) {
+               assert.Contains(t, err.Error(), "arrow/ipc: unknown error while 
reading")
+       }
+}
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index f022422320..7a288af258 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -141,7 +141,13 @@ func (w *Writer) Close() error {
        return nil
 }
 
-func (w *Writer) Write(rec arrow.Record) error {
+func (w *Writer) Write(rec arrow.Record) (err error) {
+       defer func() {
+               if pErr := recover(); pErr != nil {
+                       err = fmt.Errorf("arrow/ipc: unknown error while 
writing: %v", pErr)
+               }
+       }()
+
        if !w.started {
                err := w.start()
                if err != nil {
@@ -161,7 +167,7 @@ func (w *Writer) Write(rec arrow.Record) error {
        )
        defer data.Release()
 
-       err := writeDictionaryPayloads(w.mem, rec, false, w.emitDictDeltas, 
&w.mapper, w.lastWrittenDicts, w.pw, enc)
+       err = writeDictionaryPayloads(w.mem, rec, false, w.emitDictDeltas, 
&w.mapper, w.lastWrittenDicts, w.pw, enc)
        if err != nil {
                return fmt.Errorf("arrow/ipc: failure writing dictionary 
batches: %w", err)
        }
diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go
index 7d4ce41c8d..ae15e838b6 100644
--- a/go/arrow/ipc/writer_test.go
+++ b/go/arrow/ipc/writer_test.go
@@ -121,3 +121,26 @@ func TestGetZeroBasedValueOffsets(t *testing.T) {
        defer offsets.Release()
        assert.Equal(t, 20, offsets.Len(), "trim trailing offsets after slice")
 }
+
+func TestWriterCatchPanic(t *testing.T) {
+       alloc := memory.NewGoAllocator()
+       schema := arrow.NewSchema([]arrow.Field{
+               {Name: "s", Type: arrow.BinaryTypes.String},
+       }, nil)
+
+       b := array.NewRecordBuilder(alloc, schema)
+       defer b.Release()
+
+       b.Field(0).(*array.StringBuilder).AppendValues([]string{"foo", "bar", 
"baz"}, nil)
+       rec := b.NewRecord()
+       defer rec.Release()
+
+       // mess up the first offset for the string column
+       offsetBuf := rec.Column(0).Data().Buffers()[1]
+       bitutil.SetBitsTo(offsetBuf.Bytes(), 0, 32, true)
+
+       buf := new(bytes.Buffer)
+
+       writer := NewWriter(buf, WithSchema(schema))
+       assert.EqualError(t, writer.Write(rec), "arrow/ipc: unknown error while 
writing: runtime error: slice bounds out of range [-1:]")
+}

Reply via email to