This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6608347d61 GH-35202: [Go][Parquet] Fix panic reading nested empty list
(#35276)
6608347d61 is described below
commit 6608347d61b047987c29542002663fe02cb5158e
Author: Matt Topol <[email protected]>
AuthorDate: Fri May 5 12:39:19 2023 -0500
GH-35202: [Go][Parquet] Fix panic reading nested empty list (#35276)
### What changes are included in this PR?
A simple check in the struct reader to validate that we're trying to read
something with at least one child before we start trying to build a nullbitmap
that wouldn't exist if there are 0 elements in the actual array.
### Are these changes tested?
Yes a unit test is included in this change.
* Closes: #35202
Authored-by: Matt Topol <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
go/arrow/memory/internal/cgoalloc/allocator.go | 3 +-
go/parquet/file/level_conversion.go | 2 +-
go/parquet/pqarrow/column_readers.go | 50 ++++++++++---------
go/parquet/pqarrow/encode_arrow_test.go | 68 ++++++++++++++++++++++++++
4 files changed, 97 insertions(+), 26 deletions(-)
diff --git a/go/arrow/memory/internal/cgoalloc/allocator.go
b/go/arrow/memory/internal/cgoalloc/allocator.go
index 213e75999d..48f34d8626 100644
--- a/go/arrow/memory/internal/cgoalloc/allocator.go
+++ b/go/arrow/memory/internal/cgoalloc/allocator.go
@@ -14,12 +14,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build ccalloc
// +build ccalloc
package cgoalloc
// #cgo !windows pkg-config: arrow
-// #cgo CXXFLAGS: -std=c++14
+// #cgo CXXFLAGS: -std=c++17
// #cgo windows LDFLAGS: -larrow
// #include "allocator.h"
import "C"
diff --git a/go/parquet/file/level_conversion.go
b/go/parquet/file/level_conversion.go
index 17031ba90f..9d4862b5e1 100755
--- a/go/parquet/file/level_conversion.go
+++ b/go/parquet/file/level_conversion.go
@@ -170,7 +170,7 @@ func defLevelsBatchToBitmap(defLevels []int16,
remainingUpperBound int64, info L
// create a bitmap out of the definition Levels
func defLevelsToBitmapInternal(defLevels []int16, info LevelInfo, out
*ValidityBitmapInputOutput, hasRepeatedParent bool) {
- wr := utils.NewFirstTimeBitmapWriter(out.ValidBits,
out.ValidBitsOffset, int64(len(defLevels)))
+ wr := utils.NewFirstTimeBitmapWriter(out.ValidBits,
out.ValidBitsOffset, int64(out.ReadUpperBound))
defer wr.Finish()
setCount := defLevelsBatchToBitmap(defLevels, out.ReadUpperBound, info,
wr, hasRepeatedParent)
out.Read = int64(wr.Pos())
diff --git a/go/parquet/pqarrow/column_readers.go
b/go/parquet/pqarrow/column_readers.go
index 2ab673266f..df02b2c949 100644
--- a/go/parquet/pqarrow/column_readers.go
+++ b/go/parquet/pqarrow/column_readers.go
@@ -250,33 +250,35 @@ func (sr *structReader) BuildArray(lenBound int64)
(*arrow.Chunked, error) {
var nullBitmap *memory.Buffer
- if sr.hasRepeatedChild {
- nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
- nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
- validityIO.ValidBits = nullBitmap.Bytes()
- defLevels, err := sr.GetDefLevels()
- if err != nil {
- return nil, err
- }
- repLevels, err := sr.GetRepLevels()
- if err != nil {
- return nil, err
- }
+ if lenBound > 0 {
+ if sr.hasRepeatedChild {
+ nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
+ nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
+ validityIO.ValidBits = nullBitmap.Bytes()
+ defLevels, err := sr.GetDefLevels()
+ if err != nil {
+ return nil, err
+ }
+ repLevels, err := sr.GetRepLevels()
+ if err != nil {
+ return nil, err
+ }
- if err := file.DefRepLevelsToBitmap(defLevels, repLevels,
sr.levelInfo, &validityIO); err != nil {
- return nil, err
- }
+ if err := file.DefRepLevelsToBitmap(defLevels,
repLevels, sr.levelInfo, &validityIO); err != nil {
+ return nil, err
+ }
- } else if sr.filtered.Nullable {
- nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
- nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
- validityIO.ValidBits = nullBitmap.Bytes()
- defLevels, err := sr.GetDefLevels()
- if err != nil {
- return nil, err
- }
+ } else if sr.filtered.Nullable {
+ nullBitmap = memory.NewResizableBuffer(sr.rctx.mem)
+ nullBitmap.Resize(int(bitutil.BytesForBits(lenBound)))
+ validityIO.ValidBits = nullBitmap.Bytes()
+ defLevels, err := sr.GetDefLevels()
+ if err != nil {
+ return nil, err
+ }
- file.DefLevelsToBitmap(defLevels, sr.levelInfo, &validityIO)
+ file.DefLevelsToBitmap(defLevels, sr.levelInfo,
&validityIO)
+ }
}
if nullBitmap != nil {
diff --git a/go/parquet/pqarrow/encode_arrow_test.go
b/go/parquet/pqarrow/encode_arrow_test.go
index 3cb0646e9b..f9c1165a6f 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -1335,6 +1335,74 @@ func (ps *ParquetIOTestSuite) TestNestedNullableField() {
ps.roundTripTable(mem, tbl, false)
}
+func (ps *ParquetIOTestSuite) TestNestedEmptyList() {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(ps.T(), 0)
+
+ bldr := array.NewStructBuilder(mem, arrow.StructOf(
+ arrow.Field{
+ Name: "root",
+ Type: arrow.StructOf(
+ arrow.Field{
+ Name: "child1",
+ Type: arrow.ListOf(arrow.StructOf(
+ arrow.Field{
+ Name: "child2",
+ Type:
arrow.ListOf(arrow.StructOf(
+ arrow.Field{
+ Name:
"name",
+ Type:
arrow.BinaryTypes.String,
+ },
+ )),
+ },
+ )),
+ },
+ ),
+ },
+ ))
+ defer bldr.Release()
+
+ rootBldr := bldr.FieldBuilder(0).(*array.StructBuilder)
+ child1Bldr := rootBldr.FieldBuilder(0).(*array.ListBuilder)
+ child1ElBldr := child1Bldr.ValueBuilder().(*array.StructBuilder)
+ child2Bldr := child1ElBldr.FieldBuilder(0).(*array.ListBuilder)
+ leafBldr := child2Bldr.ValueBuilder().(*array.StructBuilder)
+ nameBldr := leafBldr.FieldBuilder(0).(*array.StringBuilder)
+
+ // target structure 8 times
+ // {
+ // "root": {
+ // "child1": [
+ // { "child2": [{ "name": "foo" }] },
+ // { "child2": [] }
+ // ]
+ // }
+ // }
+
+ for i := 0; i < 8; i++ {
+ bldr.Append(true)
+ rootBldr.Append(true)
+ child1Bldr.Append(true)
+
+ child1ElBldr.Append(true)
+ child2Bldr.Append(true)
+ leafBldr.Append(true)
+ nameBldr.Append("foo")
+
+ child1ElBldr.Append(true)
+ child2Bldr.Append(true)
+ }
+
+ arr := bldr.NewArray()
+ defer arr.Release()
+
+ field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true}
+ expected :=
array.NewTableFromSlice(arrow.NewSchema([]arrow.Field{field}, nil),
[][]arrow.Array{{arr}})
+ defer expected.Release()
+
+ ps.roundTripTable(mem, expected, false)
+}
+
func (ps *ParquetIOTestSuite) TestCanonicalNestedRoundTrip() {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(ps.T(), 0)