shaoting-huang opened a new issue, #43276:
URL: https://github.com/apache/arrow/issues/43276

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   It causes panic when reading larger than batchsize written with delta binary 
packed encoding with Nullable.
   
   To reproduce:
   ```func TestDeltaBinaryPackedEncodingWithNull(t *testing.T) {
        t.Run("test", func(t *testing.T) {
                size := 10
                buf := new(bytes.Buffer)
                mem := memory.NewGoAllocator()
   
                // Define the schema for the test data
                fields := []arrow.Field{
                        {Name: "int64", Type: arrow.PrimitiveTypes.Int64, 
Nullable: true},
                }
                schema := arrow.NewSchema(fields, nil)
   
                // Create a record batch with the test data
                b := array.NewRecordBuilder(mem, schema)
                defer b.Release()
   
                for i := 0; i < size; i++ {
                        b.Field(0).(*array.Int64Builder).Append(int64(i))
                }
                rec := b.NewRecord()
                defer rec.Release()
   
                // Write the data to Parquet using the file writer
                props := parquet.NewWriterProperties(
                        parquet.WithCompression(compress.Codecs.Zstd),
                        parquet.WithCompressionLevel(3),
                        parquet.WithDictionaryDefault(false),
                        
parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked))
                writerProps := pqarrow.DefaultWriterProps()
                pw, err := pqarrow.NewFileWriter(schema, buf, props, 
writerProps)
                assert.NoError(t, err)
                pw.Write(rec)
                pw.Close()
   
                // Read the data back from the Parquet file
                reader, err := 
file.NewParquetReader(bytes.NewReader(buf.Bytes()))
                assert.NoError(t, err)
                defer reader.Close()
   
                pr, err := pqarrow.NewFileReader(reader, 
pqarrow.ArrowReadProperties{BatchSize: 5}, memory.DefaultAllocator)
                assert.NoError(t, err)
   
                rr, err := pr.GetRecordReader(context.Background(), nil, nil)
                assert.NoError(t, err)
   
                totalRows := 0
                for rr.Next() {
                        rec := rr.Record()
                        for i := 0; i < int(rec.NumRows()); i++ {
                                col := rec.Column(0).(*array.Int64)
   
                                val := col.Value(i)
                                assert.Equal(t, val, int64(totalRows+i))
                        }
                        totalRows += int(rec.NumRows())
                }
   
                if totalRows != size {
                        t.Fatalf("Expected %d rows, but got %d rows", size, 
totalRows)
                }
        })
   ```
   
   Cause the follow error:
   ```
   panic: runtime error: slice bounds out of range [4:0] [recovered]
           panic: runtime error: slice bounds out of range [4:0]
   
   goroutine 178 [running]:
   testing.tRunner.func1.2({0x103f56e00, 0x14000aa40a8})
           
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/testing/testing.go:1545 +0x1c4
   testing.tRunner.func1()
           
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/testing/testing.go:1548 +0x360
   panic({0x103f56e00?, 0x14000aa40a8?})
           
/opt/homebrew/Cellar/[email protected]/1.21.11/libexec/src/runtime/panic.go:914 +0x218
   
github.com/apache/arrow/go/v12/parquet/internal/encoding.(*DeltaBitPackInt64Decoder).Decode(0x140008af618,
 {0x140005e8680?, 0x0?, 0x0?})
           
github.com/apache/arrow/go/[email protected]/parquet/internal/encoding/delta_bit_packing.go:273
 +0x240
   
github.com/apache/arrow/go/v12/parquet/internal/encoding.DeltaBitPackInt64Decoder.DecodeSpaced({0x1400001f1e0,
 {0x0, 0x0, 0x0}}, {0x140005e8680, 0x5, 0x8}, 0x0, {0x140005e8700, 0x1, ...}, 
...)
           
github.com/apache/arrow/go/[email protected]/parquet/internal/encoding/delta_bit_packing.go:291
 +0x74
   
github.com/apache/arrow/go/v12/parquet/file.(*primitiveRecordReader).ReadValuesSpaced(0x140005e8380?,
 0x5, 0x20?)
           
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:284 +0x2e4
   
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecordData(0x14000b8a9c0,
 0x5)
           
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:548 +0x288
   
github.com/apache/arrow/go/v12/parquet/file.(*recordReader).ReadRecords(0x14000b8a9c0,
 0x5)
           
github.com/apache/arrow/go/[email protected]/parquet/file/record_reader.go:574 +0x44
   
github.com/apache/arrow/go/v12/parquet/pqarrow.(*leafReader).LoadBatch(0x14000b8aa20,
 0x5)
           
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/column_readers.go:109 
+0xe0
   
github.com/apache/arrow/go/v12/parquet/pqarrow.(*ColumnReader).NextBatch(0x14000a9c160,
 0x103e7de80?)
           
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:131 +0x34
   
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next.func2(0x0, 
0x0?)
           
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:665 +0x40
   
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).next(0x14000b8ab40)
           
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:685 +0x1d0
   
github.com/apache/arrow/go/v12/parquet/pqarrow.(*recordReader).Next(0x14000b8ab40?)
           
github.com/apache/arrow/go/[email protected]/parquet/pqarrow/file_reader.go:760 +0x74
   github.com/milvus-io/milvus/internal/storage.Test.func1(0x14000185a00)
   ```
   
   ### Component(s)
   
   Go


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to