This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6608347d61 GH-35202: [Go][Parquet] Fix panic reading nested empty list 
(#35276)
6608347d61 is described below

commit 6608347d61b047987c29542002663fe02cb5158e
Author: Matt Topol <[email protected]>
AuthorDate: Fri May 5 12:39:19 2023 -0500

    GH-35202: [Go][Parquet] Fix panic reading nested empty list (#35276)
    
    
    ### What changes are included in this PR?
    A simple check in the struct reader to validate that we're trying to read 
something with at least one child before we start trying to build a nullbitmap 
that wouldn't exist if there are 0 elements in the actual array.
    
    ### Are these changes tested?
    Yes a unit test is included in this change.
    
    * Closes: #35202
    
    Authored-by: Matt Topol <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/arrow/memory/internal/cgoalloc/allocator.go |  3 +-
 go/parquet/file/level_conversion.go            |  2 +-
 go/parquet/pqarrow/column_readers.go           | 50 ++++++++++---------
 go/parquet/pqarrow/encode_arrow_test.go        | 68 ++++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 26 deletions(-)

diff --git a/go/arrow/memory/internal/cgoalloc/allocator.go 
b/go/arrow/memory/internal/cgoalloc/allocator.go
index 213e75999d..48f34d8626 100644
--- a/go/arrow/memory/internal/cgoalloc/allocator.go
+++ b/go/arrow/memory/internal/cgoalloc/allocator.go
@@ -14,12 +14,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build ccalloc
 // +build ccalloc
 
 package cgoalloc
 
 // #cgo !windows pkg-config: arrow
-// #cgo CXXFLAGS: -std=c++14
+// #cgo CXXFLAGS: -std=c++17
 // #cgo windows LDFLAGS:  -larrow
 // #include "allocator.h"
 import "C"
diff --git a/go/parquet/file/level_conversion.go 
b/go/parquet/file/level_conversion.go
index 17031ba90f..9d4862b5e1 100755
--- a/go/parquet/file/level_conversion.go
+++ b/go/parquet/file/level_conversion.go
@@ -170,7 +170,7 @@ func defLevelsBatchToBitmap(defLevels []int16, 
remainingUpperBound int64, info L
 
 // create a bitmap out of the definition Levels
 func defLevelsToBitmapInternal(defLevels []int16, info LevelInfo, out 
*ValidityBitmapInputOutput, hasRepeatedParent bool) {
-       wr := utils.NewFirstTimeBitmapWriter(out.ValidBits, 
out.ValidBitsOffset, int64(len(defLevels)))
+       wr := utils.NewFirstTimeBitmapWriter(out.ValidBits, 
out.ValidBitsOffset, int64(out.ReadUpperBound))
        defer wr.Finish()
        setCount := defLevelsBatchToBitmap(defLevels, out.ReadUpperBound, info, 
wr, hasRepeatedParent)
        out.Read = int64(wr.Pos())
diff --git a/go/parquet/pqarrow/column_readers.go 
b/go/parquet/pqarrow/column_readers.go
index 2ab673266f..df02b2c949 100644
--- a/go/parquet/pqarrow/column_readers.go
+++ b/go/parquet/pqarrow/column_readers.go
@@ -250,33 +250,35 @@ func (sr *structReader) BuildArray(lenBound int64) 
(*arrow.Chunked, error) {
 
        var nullBitmap *memory.Buffer
 
-       if sr.hasRepeatedChild {
-               nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
-               nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
-               validityIO.ValidBits = nullBitmap.Bytes()
-               defLevels, err := sr.GetDefLevels()
-               if err != nil {
-                       return nil, err
-               }
-               repLevels, err := sr.GetRepLevels()
-               if err != nil {
-                       return nil, err
-               }
+       if lenBound > 0 {
+               if sr.hasRepeatedChild {
+                       nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
+                       nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
+                       validityIO.ValidBits = nullBitmap.Bytes()
+                       defLevels, err := sr.GetDefLevels()
+                       if err != nil {
+                               return nil, err
+                       }
+                       repLevels, err := sr.GetRepLevels()
+                       if err != nil {
+                               return nil, err
+                       }
 
-               if err := file.DefRepLevelsToBitmap(defLevels, repLevels, 
sr.levelInfo, &validityIO); err != nil {
-                       return nil, err
-               }
+                       if err := file.DefRepLevelsToBitmap(defLevels, 
repLevels, sr.levelInfo, &validityIO); err != nil {
+                               return nil, err
+                       }
 
-       } else if sr.filtered.Nullable {
-               nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
-               nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
-               validityIO.ValidBits = nullBitmap.Bytes()
-               defLevels, err := sr.GetDefLevels()
-               if err != nil {
-                       return nil, err
-               }
+               } else if sr.filtered.Nullable {
+                       nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
+                       nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
+                       validityIO.ValidBits = nullBitmap.Bytes()
+                       defLevels, err := sr.GetDefLevels()
+                       if err != nil {
+                               return nil, err
+                       }
 
-               file.DefLevelsToBitmap(defLevels, sr.levelInfo, &validityIO)
+                       file.DefLevelsToBitmap(defLevels, sr.levelInfo, 
&validityIO)
+               }
        }
 
        if nullBitmap != nil {
diff --git a/go/parquet/pqarrow/encode_arrow_test.go 
b/go/parquet/pqarrow/encode_arrow_test.go
index 3cb0646e9b..f9c1165a6f 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -1335,6 +1335,74 @@ func (ps *ParquetIOTestSuite) TestNestedNullableField() {
        ps.roundTripTable(mem, tbl, false)
 }
 
+func (ps *ParquetIOTestSuite) TestNestedEmptyList() {
+       mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+       defer mem.AssertSize(ps.T(), 0)
+
+       bldr := array.NewStructBuilder(mem, arrow.StructOf(
+               arrow.Field{
+                       Name: "root",
+                       Type: arrow.StructOf(
+                               arrow.Field{
+                                       Name: "child1",
+                                       Type: arrow.ListOf(arrow.StructOf(
+                                               arrow.Field{
+                                                       Name: "child2",
+                                                       Type: 
arrow.ListOf(arrow.StructOf(
+                                                               arrow.Field{
+                                                                       Name: 
"name",
+                                                                       Type: 
arrow.BinaryTypes.String,
+                                                               },
+                                                       )),
+                                               },
+                                       )),
+                               },
+                       ),
+               },
+       ))
+       defer bldr.Release()
+
+       rootBldr := bldr.FieldBuilder(0).(*array.StructBuilder)
+       child1Bldr := rootBldr.FieldBuilder(0).(*array.ListBuilder)
+       child1ElBldr := child1Bldr.ValueBuilder().(*array.StructBuilder)
+       child2Bldr := child1ElBldr.FieldBuilder(0).(*array.ListBuilder)
+       leafBldr := child2Bldr.ValueBuilder().(*array.StructBuilder)
+       nameBldr := leafBldr.FieldBuilder(0).(*array.StringBuilder)
+
+       // target structure 8 times
+       // {
+       //   "root": {
+       //     "child1": [
+       //       { "child2": [{ "name": "foo" }] },
+       //       { "child2": [] }
+       //     ]
+       //   }
+       // }
+
+       for i := 0; i < 8; i++ {
+               bldr.Append(true)
+               rootBldr.Append(true)
+               child1Bldr.Append(true)
+
+               child1ElBldr.Append(true)
+               child2Bldr.Append(true)
+               leafBldr.Append(true)
+               nameBldr.Append("foo")
+
+               child1ElBldr.Append(true)
+               child2Bldr.Append(true)
+       }
+
+       arr := bldr.NewArray()
+       defer arr.Release()
+
+       field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
+       expected := 
array.NewTableFromSlice(arrow.NewSchema([]arrow.Field{field}, nil), 
[][]arrow.Array{{arr}})
+       defer expected.Release()
+
+       ps.roundTripTable(mem, expected, false)
+}
+
 func (ps *ParquetIOTestSuite) TestCanonicalNestedRoundTrip() {
        mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
        defer mem.AssertSize(ps.T(), 0)

Reply via email to