This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new ca0aa6ac fix(arrow/compute): take on record/array with nested struct 
(#653)
ca0aa6ac is described below

commit ca0aa6ac00cbcddf826bdad7be24668dfc3e26ba
Author: Matt Topol <[email protected]>
AuthorDate: Fri Jan 30 04:18:03 2026 -0500

    fix(arrow/compute): take on record/array with nested struct (#653)
    
    ### Rationale for this change
    fixes #644
    
    ### What changes are included in this PR?
    Fixes handling of children during spans in the Selection code for
    children.
    
    ### Are these changes tested?
    Tests are added to cover the issue
---
 arrow/compute/cast.go                  |  2 +-
 arrow/compute/exec/span.go             | 22 +++++------
 arrow/compute/selection.go             |  2 +-
 arrow/compute/vector_selection_test.go | 67 ++++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/arrow/compute/cast.go b/arrow/compute/cast.go
index bd239b58..95f2f8f1 100644
--- a/arrow/compute/cast.go
+++ b/arrow/compute/cast.go
@@ -301,7 +301,7 @@ func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, 
out *exec.ExecResult)
                        out.Buffers[0].Buf, 0)
        }
 
-       out.Children = make([]exec.ArraySpan, outFieldCount)
+       out.ResizeChildren(outFieldCount)
        for outFieldIndex, idx := range fieldsToSelect {
                values := input.Children[idx].MakeArray()
                defer values.Release()
diff --git a/arrow/compute/exec/span.go b/arrow/compute/exec/span.go
index 2585d9a6..a235d42b 100644
--- a/arrow/compute/exec/span.go
+++ b/arrow/compute/exec/span.go
@@ -241,7 +241,7 @@ func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer {
 // convenience function to resize the children slice if necessary,
 // or just shrink the slice without re-allocating if there's enough
 // capacity already.
-func (a *ArraySpan) resizeChildren(i int) {
+func (a *ArraySpan) ResizeChildren(i int) {
        if cap(a.Children) >= i {
                a.Children = a.Children[:i]
        } else {
@@ -295,7 +295,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
                a.Buffers[1].Buf = sc.Data()
                a.Buffers[1].Owner = nil
                a.Buffers[1].SelfAlloc = false
-               a.resizeChildren(1)
+               a.ResizeChildren(1)
                
a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data())
        case arrow.IsBaseBinary(typeID):
                sc := val.(scalar.BinaryScalar)
@@ -334,7 +334,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
        case arrow.IsListLike(typeID):
                sc := val.(scalar.ListScalar)
                valueLen := 0
-               a.resizeChildren(1)
+               a.ResizeChildren(1)
 
                if sc.GetList() != nil {
                        a.Children[0].SetMembers(sc.GetList().Data())
@@ -364,7 +364,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
                a.Buffers[1].Buf = nil
                a.Buffers[1].Owner = nil
                a.Buffers[1].SelfAlloc = false
-               a.resizeChildren(len(sc.Value))
+               a.ResizeChildren(len(sc.Value))
                for i, v := range sc.Value {
                        a.Children[i].FillFromScalar(v)
                }
@@ -378,7 +378,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
                a.Buffers[1].SelfAlloc = false
                codes := 
unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1)
 
-               a.resizeChildren(len(a.Type.(arrow.UnionType).Fields()))
+               a.ResizeChildren(len(a.Type.(arrow.UnionType).Fields()))
                switch sc := val.(type) {
                case *scalar.DenseUnion:
                        codes[0] = sc.TypeCode
@@ -421,7 +421,7 @@ func (a *ArraySpan) FillFromScalar(val scalar.Scalar) {
 }
 
 func (a *ArraySpan) SetDictionary(span *ArraySpan) {
-       a.resizeChildren(1)
+       a.ResizeChildren(1)
        a.Children[0].Release()
        a.Children[0] = *span
 }
@@ -468,13 +468,13 @@ func (a *ArraySpan) TakeOwnership(data arrow.ArrayData) {
        }
 
        if typeID == arrow.DICTIONARY {
-               a.resizeChildren(1)
+               a.ResizeChildren(1)
                dict := data.Dictionary()
                if dict != (*array.Data)(nil) {
                        a.Children[0].TakeOwnership(dict)
                }
        } else {
-               a.resizeChildren(len(data.Children()))
+               a.ResizeChildren(len(data.Children()))
                for i, c := range data.Children() {
                        a.Children[i].TakeOwnership(c)
                }
@@ -522,7 +522,7 @@ func (a *ArraySpan) SetMembers(data arrow.ArrayData) {
        }
 
        if typeID == arrow.DICTIONARY {
-               a.resizeChildren(1)
+               a.ResizeChildren(1)
                dict := data.Dictionary()
                if dict != (*array.Data)(nil) {
                        a.Children[0].SetMembers(dict)
@@ -603,7 +603,7 @@ func FillZeroLength(dt arrow.DataType, span *ArraySpan) {
        }
 
        if dt.ID() == arrow.DICTIONARY {
-               span.resizeChildren(1)
+               span.ResizeChildren(1)
                FillZeroLength(dt.(*arrow.DictionaryType).ValueType, 
&span.Children[0])
                return
        }
@@ -616,7 +616,7 @@ func FillZeroLength(dt arrow.DataType, span *ArraySpan) {
                return
        }
 
-       span.resizeChildren(nt.NumFields())
+       span.ResizeChildren(nt.NumFields())
        for i, f := range nt.Fields() {
                FillZeroLength(f.Type, &span.Children[i])
        }
diff --git a/arrow/compute/selection.go b/arrow/compute/selection.go
index 5c0a9759..a225febd 100644
--- a/arrow/compute/selection.go
+++ b/arrow/compute/selection.go
@@ -514,7 +514,7 @@ func structTake(ctx *exec.KernelCtx, batch *exec.ExecSpan, 
out *exec.ExecResult)
        defer values.Release()
 
        // select from children without bounds checking
-       out.Children = make([]exec.ArraySpan, values.NumField())
+       out.ResizeChildren(values.NumField())
        eg, cctx := errgroup.WithContext(ctx.Ctx)
        eg.SetLimit(GetExecCtx(ctx.Ctx).NumParallel)
 
diff --git a/arrow/compute/vector_selection_test.go 
b/arrow/compute/vector_selection_test.go
index 45601f44..1b275f9d 100644
--- a/arrow/compute/vector_selection_test.go
+++ b/arrow/compute/vector_selection_test.go
@@ -1432,6 +1432,73 @@ func (tk *TakeKernelStruct) TestStruct() {
        tk.assertNoValidityBitmapUnknownNullCountJSON(tk.dt, `[{"a": 1}, {"a": 
2, "b": "hello"}]`, `[0, 1, 0]`)
 }
 
+func (tk *TakeKernelStruct) TestNestedStruct() {
+       // Define nested struct type: struct<a: int32, b: struct<x: int32, y: 
string>>
+       innerStruct := arrow.StructOf(
+               arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32, 
Nullable: true},
+               arrow.Field{Name: "y", Type: arrow.BinaryTypes.String, 
Nullable: true},
+       )
+       outerStruct := arrow.StructOf(
+               arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32, 
Nullable: true},
+               arrow.Field{Name: "b", Type: innerStruct, Nullable: true},
+       )
+
+       nestedJSON := `[
+               {"a": 1, "b": {"x": 10, "y": "hello"}},
+               {"a": 2, "b": {"x": 20, "y": "world"}},
+               null,
+               {"a": 4, "b": null}
+       ]`
+
+       // Test basic reordering
+       tk.checkTake(outerStruct, nestedJSON, `[1, 0, 3]`, `[
+               {"a": 2, "b": {"x": 20, "y": "world"}},
+               {"a": 1, "b": {"x": 10, "y": "hello"}},
+               {"a": 4, "b": null}
+       ]`)
+
+       // Test with nulls at different levels
+       tk.checkTake(outerStruct, nestedJSON, `[2, 3, 0]`, `[
+               null,
+               {"a": 4, "b": null},
+               {"a": 1, "b": {"x": 10, "y": "hello"}}
+       ]`)
+
+       // Test with duplicates
+       tk.checkTake(outerStruct, nestedJSON, `[0, 0, 1, 1]`, `[
+               {"a": 1, "b": {"x": 10, "y": "hello"}},
+               {"a": 1, "b": {"x": 10, "y": "hello"}},
+               {"a": 2, "b": {"x": 20, "y": "world"}},
+               {"a": 2, "b": {"x": 20, "y": "world"}}
+       ]`)
+}
+
+func (tk *TakeKernelStruct) TestDeeplyNestedStruct() {
+       // struct<a: int32, b: struct<x: int32, y: struct<z: string>>>
+       innermostStruct := arrow.StructOf(
+               arrow.Field{Name: "z", Type: arrow.BinaryTypes.String, 
Nullable: true},
+       )
+       middleStruct := arrow.StructOf(
+               arrow.Field{Name: "x", Type: arrow.PrimitiveTypes.Int32, 
Nullable: true},
+               arrow.Field{Name: "y", Type: innermostStruct, Nullable: true},
+       )
+       outerStruct := arrow.StructOf(
+               arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32, 
Nullable: true},
+               arrow.Field{Name: "b", Type: middleStruct, Nullable: true},
+       )
+
+       deeplyNestedJSON := `[
+               {"a": 1, "b": {"x": 10, "y": {"z": "deep"}}},
+               {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}}
+       ]`
+
+       tk.checkTake(outerStruct, deeplyNestedJSON, `[1, 0, 1]`, `[
+               {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}},
+               {"a": 1, "b": {"x": 10, "y": {"z": "deep"}}},
+               {"a": 2, "b": {"x": 20, "y": {"z": "deeper"}}}
+       ]`)
+}
+
 type TakeKernelTestChunked struct {
        TakeKernelTestTyped
 }

Reply via email to