This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new ca0aa6ac fix(arrow/compute): take on record/array with nested struct
(#653)
ca0aa6ac is described below
commit ca0aa6ac00cbcddf826bdad7be24668dfc3e26ba
Author: Matt Topol <[email protected]>
AuthorDate: Fri Jan 30 04:18:03 2026 -0500
fix(arrow/compute): take on record/array with nested struct (#653)
### Rationale for this change
fixes #644
### What changes are included in this PR?
Fixes handling of children during spans in the Selection code for
children.
### Are these changes tested?
Tests are added to cover the issue
---
arrow/compute/cast.go | 2 +-
arrow/compute/exec/span.go | 22 +++++------
arrow/compute/selection.go | 2 +-
arrow/compute/vector_selection_test.go | 67 ++++++++++++++++++++++++++++++++++
4 files changed, 80 insertions(+), 13 deletions(-)
diff --git a/arrow/compute/cast.go b/arrow/compute/cast.go
index bd239b58..95f2f8f1 100644
--- a/arrow/compute/cast.go
+++ b/arrow/compute/cast.go
@@ -301,7 +301,7 @@ func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan,
out *exec.ExecResult)
out.Buffers[0].Buf, 0)
}
- out.Children = make([]exec.ArraySpan, outFieldCount)
+ out.ResizeChildren(outFieldCount)
for outFieldIndex, idx := range fieldsToSelect {
values := input.Children[idx].MakeArray()
defer values.Release()
diff --git a/arrow/compute/exec/span.go b/arrow/compute/exec/span.go
index 2585d9a6..a235d42b 100644
--- a/arrow/compute/exec/span.go
+++ b/arrow/compute/exec/span.go
@@ -241,7 +241,7 @@ func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer {
// convenience function to resize the children slice if necessary,
// or just shrink the slice without re-allocating if there's enough
// capacity already.
-func (a *ArraySpan) resizeChildren(i int) {
+func (a *ArraySpan) ResizeChildren(i int) {
if cap(a.Children) >= i {
a.Children = a.Children[:i]
} else {
@@ -295,7 +295,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
a.Buffers[1].Buf = sc.Data()
a.Buffers[1].Owner = nil
a.Buffers[1].SelfAlloc = false
- a.resizeChildren(1)
+ a.ResizeChildren(1)
a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data())
case arrow.IsBaseBinary(typeID):
sc := val.(scalar.BinaryScalar)
@@ -334,7 +334,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
case arrow.IsListLike(typeID):
sc := val.(scalar.ListScalar)
valueLen := 0
- a.resizeChildren(1)
+ a.ResizeChildren(1)
if sc.GetList() != nil {
a.Children[0].SetMembers(sc.GetList().Data())
@@ -364,7 +364,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
a.Buffers[1].Buf = nil
a.Buffers[1].Owner = nil
a.Buffers[1].SelfAlloc = false
- a.resizeChildren(len(sc.Value))
+ a.ResizeChildren(len(sc.Value))
for i, v := range sc.Value {
a.Children[i].FillFromScalar(v)
}
@@ -378,7 +378,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
a.Buffers[1].SelfAlloc = false
codes :=
unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1)
- a.resizeChildren(len(a.Type.(arrow.UnionType).Fields()))
+ a.ResizeChildren(len(a.Type.(arrow.UnionType).Fields()))
switch sc := val.(type) {
case *scalar.DenseUnion:
codes[0] = sc.TypeCode
@@ -421,7 +421,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
}
func (a *ArraySpan) SetDictionary(span *ArraySpan) {
- a.resizeChildren(1)
+ a.ResizeChildren(1)
a.Children[0].Release()
a.Children[0] = *span
}
@@ -468,13 +468,13 @@ func (a *ArraySpan) TakeOwnership(data arrow.ArrayData) {
}
if typeID == arrow.DICTIONARY {
- a.resizeChildren(1)
+ a.ResizeChildren(1)
dict := data.Dictionary()
if dict != (*array.Data)(nil) {
a.Children[0].TakeOwnership(dict)
}
} else {
- a.resizeChildren(len(data.Children()))
+ a.ResizeChildren(len(data.Children()))
for i, c := range data.Children() {
a.Children[i].TakeOwnership(c)
}
@@ -522,7 +522,7 @@ func (a *ArraySpan) SetMembers(data arrow.ArrayData) {
}
if typeID == arrow.DICTIONARY {
- a.resizeChildren(1)
+ a.ResizeChildren(1)
dict := data.Dictionary()
if dict != (*array.Data)(nil) {
a.Children[0].SetMembers(dict)
@@ -603,7 +603,7 @@ func FillZeroLength(dt arrow.DataType, span *ArraySpan) {
}
if dt.ID() == arrow.DICTIONARY {
- span.resizeChildren(1)
+ span.ResizeChildren(1)
FillZeroLength(dt.(*arrow.DictionaryType).ValueType,
&span.Children[0])
return
}
@@ -616,7 +616,7 @@ func FillZeroLength(dt arrow.DataType, span *ArraySpan) {
return
}
- span.resizeChildren(nt.NumFields())
+ span.ResizeChildren(nt.NumFields())
for i, f := range nt.Fields() {
FillZeroLength(f.Type, &span.Children[i])
}
diff --git a/arrow/compute/selection.go b/arrow/compute/selection.go
index 5c0a9759..a225febd 100644
--- a/arrow/compute/selection.go
+++ b/arrow/compute/selection.go
@@ -514,7 +514,7 @@ func structTake(ctx *exec.KernelCtx, batch *exec.ExecSpan,
out *exec.ExecResult)
defer values.Release()
// select from children without bounds checking
- out.Children = make([]exec.ArraySpan, values.NumField())
+ out.ResizeChildren(values.NumField())
eg, cctx := errgroup.WithContext(ctx.Ctx)
eg.SetLimit(GetExecCtx(ctx.Ctx).NumParallel)
diff --git a/arrow/compute/vector_selection_test.go
b/arrow/compute/vector_selection_test.go
index 45601f44..1b275f9d 100644
--- a/arrow/compute/vector_selection_test.go
+++ b/arrow/compute/vector_selection_test.go
@@ -1432,6 +1432,73 @@ func (tk *TakeKernelStruct) TestStruct() {
tk.assertNoValidityBitmapUnknownNullCountJSON(tk.dt, `[{"a": 1}, {"a":
2, "b": "hello"}]`, `[0, 1, 0]`)
}
+func (tk *TakeKernelStruct) TestNestedStruct() {
+ // Define nested struct type: struct<a: int32, b: struct<x: int32, y:
string>>
+ innerStruct := arrow.StructOf(
+ arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32,
Nullable: true},
+ arrow.Field{Name: "y", Type: arrow.BinaryTypes.String,
Nullable: true},
+ )
+ outerStruct := arrow.StructOf(
+ arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32,
Nullable: true},
+ arrow.Field{Name: "b", Type: innerStruct, Nullable: true},
+ )
+
+ nestedJSON := `[
+ {"a": 1, "b": {"x": 10, "y": "hello"}},
+ {"a": 2, "b": {"x": 20, "y": "world"}},
+ null,
+ {"a": 4, "b": null}
+ ]`
+
+ // Test basic reordering
+ tk.checkTake(outerStruct, nestedJSON, `[1, 0, 3]`, `[
+ {"a": 2, "b": {"x": 20, "y": "world"}},
+ {"a": 1, "b": {"x": 10, "y": "hello"}},
+ {"a": 4, "b": null}
+ ]`)
+
+ // Test with nulls at different levels
+ tk.checkTake(outerStruct, nestedJSON, `[2, 3, 0]`, `[
+ null,
+ {"a": 4, "b": null},
+ {"a": 1, "b": {"x": 10, "y": "hello"}}
+ ]`)
+
+ // Test with duplicates
+ tk.checkTake(outerStruct, nestedJSON, `[0, 0, 1, 1]`, `[
+ {"a": 1, "b": {"x": 10, "y": "hello"}},
+ {"a": 1, "b": {"x": 10, "y": "hello"}},
+ {"a": 2, "b": {"x": 20, "y": "world"}},
+ {"a": 2, "b": {"x": 20, "y": "world"}}
+ ]`)
+}
+
+func (tk *TakeKernelStruct) TestDeeplyNestedStruct() {
+ // struct<a: int32, b: struct<x: int32, y: struct<z: string>>>
+ innermostStruct := arrow.StructOf(
+ arrow.Field{Name: "z", Type: arrow.BinaryTypes.String,
Nullable: true},
+ )
+ middleStruct := arrow.StructOf(
+ arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32,
Nullable: true},
+ arrow.Field{Name: "y", Type: innermostStruct, Nullable: true},
+ )
+ outerStruct := arrow.StructOf(
+ arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32,
Nullable: true},
+ arrow.Field{Name: "b", Type: middleStruct, Nullable: true},
+ )
+
+ deeplyNestedJSON := `[
+ {"a": 1, "b": {"x": 10, "y": {"z": "deep"}}},
+ {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}}
+ ]`
+
+ tk.checkTake(outerStruct, deeplyNestedJSON, `[1, 0, 1]`, `[
+ {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}},
+ {"a": 1, "b": {"x": 10, "y": {"z": "deep"}}},
+ {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}}
+ ]`)
+}
+
type TakeKernelTestChunked struct {
TakeKernelTestTyped
}