This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 154b3f56 [Go][Parquet] Refactor: extract visitListLike helper for
list-like types (#586)
154b3f56 is described below
commit 154b3f56c05370694eb78637124308256f09168b
Author: Rick Morgans <[email protected]>
AuthorDate: Tue Dec 2 02:42:42 2025 +1030
[Go][Parquet] Refactor: extract visitListLike helper for list-like types
(#586)
## Summary
- Extracts common logic for LIST, MAP, and FIXED_SIZE_LIST into a shared
`visitListLike` helper function
- Ensures `nullableInParent` is always set correctly before visiting
child values, making it impossible to forget this step when adding new
list-like type handling
- Uses `defLevelOffset` parameter to calculate `defLevelIfEmpty` after
all increments
## Rationale
This refactoring follows the principle of "making invalid states
unrepresentable". The original bug in #584 (fixed in #585) was caused by
the FIXED_SIZE_LIST case forgetting to set `nullableInParent`. By
extracting the common pattern into a helper function, future additions
of list-like types cannot omit this critical step.
## Test plan
- [x] All existing tests pass
- [x] Regression test from #585 (`TestFixedSizeListNullableElements`)
continues to pass
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-authored-by: Claude <[email protected]>
---
parquet/pqarrow/path_builder.go | 52 ++++++++++++++++++++---------------------
1 file changed, 25 insertions(+), 27 deletions(-)
diff --git a/parquet/pqarrow/path_builder.go b/parquet/pqarrow/path_builder.go
index 5fd3ffca..63e4de43 100644
--- a/parquet/pqarrow/path_builder.go
+++ b/parquet/pqarrow/path_builder.go
@@ -377,43 +377,41 @@ func fixup(info pathInfo) pathInfo {
return info
}
+// visitListLike handles the common logic for LIST, MAP, and FIXED_SIZE_LIST
types.
+// Extracting this ensures nullableInParent is always set before visiting
children.
+// defLevelOffset is applied to maxDefLevel AFTER all increments to compute
defLevelIfEmpty.
+func (p *pathBuilder) visitListLike(arr arrow.Array, selector rangeSelector,
defLevelOffset int16, childValues arrow.Array) error {
+ p.maybeAddNullable(arr)
+ p.info.maxDefLevel++
+ p.info.maxRepLevel++
+ p.info.path = append(p.info.path, &listNode{
+ selector: selector,
+ prevRepLevel: p.info.maxRepLevel - 1,
+ repLevel: p.info.maxRepLevel,
+ defLevelIfEmpty: p.info.maxDefLevel + defLevelOffset,
+ })
+ p.nullableInParent =
arr.DataType().(arrow.ListLikeType).ElemField().Nullable
+ return p.Visit(childValues)
+}
+
func (p *pathBuilder) Visit(arr arrow.Array) error {
switch arr.DataType().ID() {
case arrow.LIST, arrow.MAP:
- p.maybeAddNullable(arr)
- // increment necessary due to empty lists
- p.info.maxDefLevel++
- p.info.maxRepLevel++
larr, ok := arr.(*array.List)
if !ok {
larr = arr.(*array.Map).List
}
-
- p.info.path = append(p.info.path, &listNode{
- selector:
varRangeSelector{larr.Offsets()[larr.Data().Offset():]},
- prevRepLevel: p.info.maxRepLevel - 1,
- repLevel: p.info.maxRepLevel,
- defLevelIfEmpty: p.info.maxDefLevel - 1,
- })
- p.nullableInParent =
arr.DataType().(arrow.ListLikeType).ElemField().Nullable
- return p.Visit(larr.ListValues())
+ return p.visitListLike(arr,
+ varRangeSelector{larr.Offsets()[larr.Data().Offset():]},
+ -1, // defLevelIfEmpty = maxDefLevel - 1 (after all
increments)
+ larr.ListValues())
case arrow.FIXED_SIZE_LIST:
- p.maybeAddNullable(arr)
larr := arr.(*array.FixedSizeList)
listSize := larr.DataType().(*arrow.FixedSizeListType).Len()
- // technically we could encode fixed sized lists with two level
encodings
- // but we always use 3 level encoding, so we increment def
levels as well
- p.info.maxDefLevel++
- p.info.maxRepLevel++
- p.info.path = append(p.info.path, &listNode{
- selector: fixedSizeRangeSelector{listSize},
- prevRepLevel: p.info.maxRepLevel - 1,
- repLevel: p.info.maxRepLevel,
- defLevelIfEmpty: p.info.maxDefLevel,
- })
- // if arr.data.offset > 0, slice?
- p.nullableInParent =
arr.DataType().(*arrow.FixedSizeListType).ElemField().Nullable
- return p.Visit(larr.ListValues())
+ return p.visitListLike(arr,
+ fixedSizeRangeSelector{listSize},
+ 0, // defLevelIfEmpty = maxDefLevel (after all
increments)
+ larr.ListValues())
case arrow.DICTIONARY:
// only currently handle dictionaryarray where the dictionary
// is a primitive type