This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 5ac5d69e [Go][Parquet] Fix FixedSizeList nullable elements read as
NULL (#585)
5ac5d69e is described below
commit 5ac5d69e120b4078c54cce1c0e75406bd5727d59
Author: Rick Morgans <[email protected]>
AuthorDate: Sun Nov 30 10:38:47 2025 +1030
[Go][Parquet] Fix FixedSizeList nullable elements read as NULL (#585)
Fixes #584
The `FIXED_SIZE_LIST` case in `pathBuilder.Visit` was missing the
`nullableInParent` assignment that the `LIST` case has, causing non-null
values to be read back as NULL.
**Fix:** Add one line to set `nullableInParent` before visiting child
values.
**Test:** Added `TestFixedSizeListNullableElements` roundtrip test.
---
parquet/pqarrow/encode_arrow_test.go | 31 +++++++++++++++++++++++++++++++
parquet/pqarrow/path_builder.go | 1 +
2 files changed, 32 insertions(+)
diff --git a/parquet/pqarrow/encode_arrow_test.go
b/parquet/pqarrow/encode_arrow_test.go
index 3ce4c98e..8b11e75a 100644
--- a/parquet/pqarrow/encode_arrow_test.go
+++ b/parquet/pqarrow/encode_arrow_test.go
@@ -1871,6 +1871,37 @@ func (ps *ParquetIOTestSuite) TestFixedSizeList() {
ps.roundTripTable(mem, tbl, true)
}
+// TestFixedSizeListNullableElements verifies that FixedSizeList with nullable
+// element type correctly round-trips non-null values through Parquet.
+// This is a regression test for https://github.com/apache/arrow-go/issues/584
+func (ps *ParquetIOTestSuite) TestFixedSizeListNullableElements() {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(ps.T(), 0)
+
+ // FixedSizeList<float32>[8] with nullable elements (the default)
+ bldr := array.NewFixedSizeListBuilder(mem, 8,
arrow.PrimitiveTypes.Float32)
+ defer bldr.Release()
+
+ vb := bldr.ValueBuilder().(*array.Float32Builder)
+
+ // Single row with non-null values [1, 2, 3, 4, 5, 6, 7, 8]
+ bldr.Append(true)
+ vb.AppendValues([]float32{1, 2, 3, 4, 5, 6, 7, 8}, nil)
+
+ data := bldr.NewArray()
+ defer data.Release()
+
+ field := arrow.Field{Name: "embedding", Type: data.DataType(),
Nullable: true}
+ cnk := arrow.NewChunked(field.Type, []arrow.Array{data})
+ defer data.Release()
+
+ tbl := array.NewTable(arrow.NewSchema([]arrow.Field{field}, nil),
[]arrow.Column{*arrow.NewColumn(field, cnk)}, -1)
+ defer cnk.Release()
+ defer tbl.Release()
+
+ ps.roundTripTable(mem, tbl, true)
+}
+
func (ps *ParquetIOTestSuite) TestNull() {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(ps.T(), 0)
diff --git a/parquet/pqarrow/path_builder.go b/parquet/pqarrow/path_builder.go
index 784a2c21..5fd3ffca 100644
--- a/parquet/pqarrow/path_builder.go
+++ b/parquet/pqarrow/path_builder.go
@@ -412,6 +412,7 @@ func (p *pathBuilder) Visit(arr arrow.Array) error {
defLevelIfEmpty: p.info.maxDefLevel,
})
// if arr.data.offset > 0, slice?
+ p.nullableInParent =
arr.DataType().(*arrow.FixedSizeListType).ElemField().Nullable
return p.Visit(larr.ListValues())
case arrow.DICTIONARY:
// only currently handle dictionaryarray where the dictionary