This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d49654  ARROW-12072: [Go] Fix panics in ipc writer for sliced records
3d49654 is described below

commit 3d496543a84f8faf01c2415780c25526e9339e9e
Author: Matthew Topol <[email protected]>
AuthorDate: Fri Oct 15 17:13:54 2021 -0400

    ARROW-12072: [Go] Fix panics in ipc writer for sliced records
    
    Figuring out the length of the truncated values array and offsets was being 
calculated incorrectly by mixing the length of elements and raw byte length and 
not utilizing the data offset when creating the slice for shifting the 
zero-based offsets. This fixes both issues so that writing sliced records of 
FixedWidthDataType arrays and arrays with offsets now works correctly.
    
    Closes #11421 from zeroshade/arrow-12072-ipc-slicing
    
    Authored-by: Matthew Topol <[email protected]>
    Signed-off-by: Matthew Topol <[email protected]>
---
 go/arrow/ipc/ipc_test.go | 100 +++++++++++++++++++++++++++++++++++++++++++++++
 go/arrow/ipc/writer.go   |   4 +-
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go
new file mode 100644
index 0000000..5b33106
--- /dev/null
+++ b/go/arrow/ipc/ipc_test.go
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ipc_test
+
+import (
+       "bytes"
+       "math/rand"
+       "strconv"
+       "testing"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/array"
+       "github.com/apache/arrow/go/arrow/ipc"
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/stretchr/testify/assert"
+)
+
+func TestArrow12072(t *testing.T) {
+       schema := arrow.NewSchema(
+               []arrow.Field{
+                       {Name: "idx", Type: arrow.PrimitiveTypes.Int64},
+                       {Name: "A", Type: arrow.PrimitiveTypes.Int64},
+                       {Name: "B", Type: arrow.PrimitiveTypes.Int64},
+                       {Name: "C", Type: arrow.BinaryTypes.String},
+               },
+               nil, // no metadata
+       )
+       mem := memory.NewGoAllocator()
+       counter := int64(0)
+
+       b := array.NewRecordBuilder(mem, schema)
+       defer b.Release()
+
+       const size = 3
+       for i := 0; i < size; i++ {
+               b.Field(0).(*array.Int64Builder).AppendValues([]int64{counter}, 
nil)
+               counter++
+               b.Field(1).(*array.Int64Builder).AppendValues(
+                       []int64{int64(rand.Intn(100))}, nil)
+               b.Field(2).(*array.Int64Builder).AppendValues(
+                       []int64{int64(rand.Intn(100))}, nil)
+               b.Field(3).(*array.StringBuilder).AppendValues(
+                       []string{strconv.Itoa(rand.Intn(100))}, nil)
+       }
+
+       rec := b.NewRecord()
+       defer rec.Release()
+
+       tbl := array.NewTableFromRecords(schema, []array.Record{rec})
+       defer tbl.Release()
+
+       tr := array.NewTableReader(tbl, 1)
+       defer tr.Release()
+
+       data := []array.Record{}
+       for tr.Next() {
+               rec := tr.Record()
+               rec.Retain()
+               defer rec.Release()
+               data = append(data, rec)
+       }
+
+       // tests writing out and then reading back in slices of the same record 
of length 1 each
+       // testing the bug that was reported in ARROW-12072 involving offsets 
for string arrays
+       // and correct truncation of slices when writing ipc FixedWidthDataType
+       for _, rec := range data {
+               var buf []byte
+               assert.NotPanics(t, func() {
+                       var output bytes.Buffer
+                       w := ipc.NewWriter(&output, 
ipc.WithSchema(rec.Schema()))
+                       assert.NoError(t, w.Write(rec))
+                       assert.NoError(t, w.Close())
+                       buf = output.Bytes()
+               })
+
+               assert.NotPanics(t, func() {
+                       rdr, err := ipc.NewReader(bytes.NewReader(buf))
+                       assert.NoError(t, err)
+                       for rdr.Next() {
+                               out := rdr.Record()
+                               assert.Truef(t, array.RecordEqual(rec, out), 
"expected: %s\ngot: %s\n", rec, out)
+                       }
+                       assert.NoError(t, rdr.Err())
+               })
+       }
+}
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index e1f0b4d..a6f4848 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -372,7 +372,7 @@ func (w *recordEncoder) visit(p *Payload, arr 
array.Interface) error {
                        // non-zero offset: slice the buffer
                        offset := int64(data.Offset()) * typeWidth
                        // send padding if available
-                       len := minI64(bitutil.CeilByte64(arrLen*typeWidth), 
int64(data.Len())-offset)
+                       len := minI64(bitutil.CeilByte64(arrLen*typeWidth), 
int64(values.Len())-offset)
                        values = memory.NewBufferBytes(values.Bytes()[offset : 
offset+len])
                default:
                        if values != nil {
@@ -564,7 +564,7 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr 
array.Interface) (*memory.B
                
shiftedOffsets.Resize(arrow.Int32Traits.BytesRequired(data.Len() + 1))
 
                dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes())
-               offsets := 
arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Len()+2]
+               offsets := 
arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : 
data.Offset()+data.Len()+1]
 
                startOffset := offsets[0]
                for i, o := range offsets {

Reply via email to