This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new f26f59ec fix: correctly initialize SchemaField.ColIndex (#591)
f26f59ec is described below

commit f26f59ec289e488f8b4ddf0d64eafd7cdd8e322c
Author: James Guthrie <[email protected]>
AuthorDate: Tue Dec 2 15:52:48 2025 +0000

    fix: correctly initialize SchemaField.ColIndex (#591)
    
    ### Rationale for this change
    
    The documentation for SchemaField says:
    
    > ColIndex is only populated (not -1) when it is a leaf column.
    
    But the implementation never actually initializes the value to -1, so it
    is incorrectly set to 0 on non-leaf nodes.
    
    ### What changes are included in this PR?
    
    This commit adds a helper function to correctly initialize SchemaField,
    and comprehensive test cases to cover all existing branches.
    
    ### Are these changes tested?
    
    Yes, see test cases.
    
    ### Are there any user-facing changes?
---
 parquet/pqarrow/schema.go      |  24 ++--
 parquet/pqarrow/schema_test.go | 310 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 326 insertions(+), 8 deletions(-)

diff --git a/parquet/pqarrow/schema.go b/parquet/pqarrow/schema.go
index 7c56e333..aac64825 100644
--- a/parquet/pqarrow/schema.go
+++ b/parquet/pqarrow/schema.go
@@ -685,7 +685,7 @@ func listToSchemaField(n *schema.GroupNode, currentLevels 
file.LevelInfo, ctx *s
 
        currentLevels.Increment(n)
 
-       out.Children = make([]SchemaField, n.NumFields())
+       out.Children = makeSchemaFields(n.NumFields())
        ctx.LinkParent(out, parent)
        ctx.LinkParent(&out.Children[0], out)
 
@@ -774,7 +774,7 @@ func listToSchemaField(n *schema.GroupNode, currentLevels 
file.LevelInfo, ctx *s
 
 func groupToStructField(n *schema.GroupNode, currentLevels file.LevelInfo, ctx 
*schemaTree, out *SchemaField) error {
        arrowFields := make([]arrow.Field, 0, n.NumFields())
-       out.Children = make([]SchemaField, n.NumFields())
+       out.Children = makeSchemaFields(n.NumFields())
 
        for i := 0; i < n.NumFields(); i++ {
                if err := nodeToSchemaField(n.Field(i), currentLevels, ctx, 
out, &out.Children[i]); err != nil {
@@ -825,10 +825,10 @@ func mapToSchemaField(n *schema.GroupNode, currentLevels 
file.LevelInfo, ctx *sc
 
        currentLevels.Increment(n)
        repeatedAncestorDef := currentLevels.IncrementRepeated()
-       out.Children = make([]SchemaField, 1)
+       out.Children = makeSchemaFields(1)
 
        kvfield := &out.Children[0]
-       kvfield.Children = make([]SchemaField, 2)
+       kvfield.Children = makeSchemaFields(2)
 
        keyField := &kvfield.Children[0]
        valueField := &kvfield.Children[1]
@@ -876,7 +876,7 @@ func variantToSchemaField(n *schema.GroupNode, 
currentLevels file.LevelInfo, ctx
 
        if n.RepetitionType() == parquet.Repetitions.Repeated {
                // list of variants
-               out.Children = make([]SchemaField, 1)
+               out.Children = makeSchemaFields(1)
                repeatedAncestorDef := currentLevels.IncrementRepeated()
                if err := groupToStructField(n, currentLevels, ctx, 
&out.Children[0]); err != nil {
                        return err
@@ -925,7 +925,7 @@ func groupToSchemaField(n *schema.GroupNode, currentLevels 
file.LevelInfo, ctx *
                //   r/o TYPE[0] f0
                //   r/o TYPE[1] f1
                // }
-               out.Children = make([]SchemaField, 1)
+               out.Children = makeSchemaFields(1)
                repeatedAncestorDef := currentLevels.IncrementRepeated()
                if err := groupToStructField(n, currentLevels, ctx, 
&out.Children[0]); err != nil {
                        return err
@@ -987,7 +987,7 @@ func nodeToSchemaField(n schema.Node, currentLevels 
file.LevelInfo, ctx *schemaT
        if primitive.RepetitionType() == parquet.Repetitions.Repeated {
                // one-level list encoding e.g. a: repeated int32;
                repeatedAncestorDefLevel := currentLevels.IncrementRepeated()
-               out.Children = make([]SchemaField, 1)
+               out.Children = makeSchemaFields(1)
                child := arrow.Field{Name: primitive.Name(), Type: arrowType, 
Nullable: false}
                populateLeaf(colIndex, &child, currentLevels, ctx, out, 
&out.Children[0])
                out.Field = &arrow.Field{Name: primitive.Name(), Type: 
arrow.ListOf(child.Type), Nullable: false,
@@ -1206,7 +1206,7 @@ func NewSchemaManifest(sc *schema.Schema, meta 
metadata.KeyValueMetadata, props
                ColIndexToField: make(map[int]*SchemaField),
                ChildToParent:   make(map[*SchemaField]*SchemaField),
                descr:           sc,
-               Fields:          make([]SchemaField, sc.Root().NumFields()),
+               Fields:          makeSchemaFields(sc.Root().NumFields()),
        }
        ctx.props = props
        if ctx.props == nil {
@@ -1258,3 +1258,11 @@ func FromParquet(sc *schema.Schema, props 
*ArrowReadProperties, kv metadata.KeyV
        }
        return arrow.NewSchema(fields, manifest.SchemaMeta), nil
 }
+
+func makeSchemaFields(n int) []SchemaField {
+       fields := make([]SchemaField, n)
+       for i := range fields {
+               fields[i].ColIndex = -1
+       }
+       return fields
+}
diff --git a/parquet/pqarrow/schema_test.go b/parquet/pqarrow/schema_test.go
index 6f5d14c7..e6c7a5a3 100644
--- a/parquet/pqarrow/schema_test.go
+++ b/parquet/pqarrow/schema_test.go
@@ -426,6 +426,316 @@ func TestListStructBackwardCompatible(t *testing.T) {
        assert.True(t, arrowSchema.Equal(arrsc), arrowSchema.String(), 
arrsc.String())
 }
 
+func TestNestedFieldIndex(t *testing.T) {
+       cases := []struct {
+               name string
+               s    *schema.Schema
+       }{
+               {
+                       name: "RepeatedPrimitive",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                               "tags",
+                                               parquet.Repetitions.Repeated,
+                                               schema.StringLogicalType{},
+                                               parquet.Types.ByteArray,
+                                               0,
+                                               -1,
+                                       )),
+                               }, -1),
+                       )),
+               }, {
+                       name: "SimpleList",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.ListOf(
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "element",
+                                                       
parquet.Repetitions.Required,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       0,
+                                                       -1,
+                                               )),
+                                               parquet.Repetitions.Required,
+                                               -1,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "ListOfOptionalElements",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.ListOf(
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "element",
+                                                       
parquet.Repetitions.Optional,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       1,
+                                                       -1,
+                                               )),
+                                               parquet.Repetitions.Required,
+                                               -1,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "NestedStruct",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                               "name",
+                                               parquet.Repetitions.Required,
+                                               schema.StringLogicalType{},
+                                               parquet.Types.ByteArray,
+                                               0,
+                                               -1,
+                                       )),
+                                       
schema.MustGroup(schema.NewGroupNode("address", parquet.Repetitions.Required, 
schema.FieldList{
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "city",
+                                                       
parquet.Repetitions.Required,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       1,
+                                                       -1,
+                                               )),
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "street",
+                                                       
parquet.Repetitions.Optional,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       2,
+                                                       -1,
+                                               )),
+                                       }, 3)),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "ListOfStructs",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.ListOf(
+                                               
schema.MustGroup(schema.NewGroupNode("contact", parquet.Repetitions.Required, 
schema.FieldList{
+                                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                               "email",
+                                                               
parquet.Repetitions.Required,
+                                                               
schema.StringLogicalType{},
+                                                               
parquet.Types.ByteArray,
+                                                               1,
+                                                               -1,
+                                                       )),
+                                               }, 2)),
+                                               parquet.Repetitions.Required,
+                                               -1,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "MapRequiredValues",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.MapOf(
+                                               "attrs",
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "key",
+                                                       
parquet.Repetitions.Required,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       1,
+                                                       -1,
+                                               )),
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "value",
+                                                       
parquet.Repetitions.Required,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       2,
+                                                       -1,
+                                               )),
+                                               parquet.Repetitions.Required,
+                                               0,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "RepeatedGroup",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       
schema.MustGroup(schema.NewGroupNode("items", parquet.Repetitions.Repeated, 
schema.FieldList{
+                                               
schema.Must(schema.NewPrimitiveNode(
+                                                       "id",
+                                                       
parquet.Repetitions.Required,
+                                                       parquet.Types.Int32,
+                                                       0,
+                                                       -1,
+                                               )),
+                                               
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                       "tag",
+                                                       
parquet.Repetitions.Optional,
+                                                       
schema.StringLogicalType{},
+                                                       parquet.Types.ByteArray,
+                                                       1,
+                                                       -1,
+                                               )),
+                                       }, 2)),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "DeeplyNested",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       
schema.MustGroup(schema.NewGroupNode("level1", parquet.Repetitions.Required, 
schema.FieldList{
+                                               
schema.MustGroup(schema.NewGroupNode("level2", parquet.Repetitions.Required, 
schema.FieldList{
+                                                       
schema.MustGroup(schema.ListOf(
+                                                               
schema.Must(schema.NewPrimitiveNode(
+                                                                       
"element",
+                                                                       
parquet.Repetitions.Required,
+                                                                       
parquet.Types.Int32,
+                                                                       2,
+                                                                       -1,
+                                                               )),
+                                                               
parquet.Repetitions.Required,
+                                                               -1,
+                                                       )),
+                                               }, 1)),
+                                       }, 0)),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "ListOfMaps",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.ListOf(
+                                               schema.MustGroup(schema.MapOf(
+                                                       "map_item",
+                                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                               "key",
+                                                               
parquet.Repetitions.Required,
+                                                               
schema.StringLogicalType{},
+                                                               
parquet.Types.ByteArray,
+                                                               1,
+                                                               -1,
+                                                       )),
+                                                       
schema.Must(schema.NewPrimitiveNode(
+                                                               "value",
+                                                               
parquet.Repetitions.Required,
+                                                               
parquet.Types.Int32,
+                                                               2,
+                                                               -1,
+                                                       )),
+                                                       
parquet.Repetitions.Required,
+                                                       0,
+                                               )),
+                                               parquet.Repetitions.Required,
+                                               -1,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "RepeatedVariant",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       
schema.MustGroup(schema.NewGroupNodeLogical(
+                                               "variant_list",
+                                               parquet.Repetitions.Repeated,
+                                               schema.FieldList{
+                                                       
schema.Must(schema.NewPrimitiveNode(
+                                                               "metadata",
+                                                               
parquet.Repetitions.Required,
+                                                               
parquet.Types.ByteArray,
+                                                               1,
+                                                               -1,
+                                                       )),
+                                                       
schema.Must(schema.NewPrimitiveNode(
+                                                               "value",
+                                                               
parquet.Repetitions.Optional,
+                                                               
parquet.Types.ByteArray,
+                                                               2,
+                                                               -1,
+                                                       )),
+                                               },
+                                               schema.VariantLogicalType{},
+                                               0,
+                                       )),
+                               }, -1),
+                       )),
+               },
+               {
+                       name: "MapNestedKeyAndValue",
+                       s: schema.NewSchema(schema.MustGroup(
+                               schema.NewGroupNode("schema", 
parquet.Repetitions.Required, schema.FieldList{
+                                       schema.MustGroup(schema.MapOf(
+                                               "attrs",
+                                               // Nested key: struct with id 
and name
+                                               
schema.MustGroup(schema.NewGroupNode("key_struct", 
parquet.Repetitions.Required, schema.FieldList{
+                                                       
schema.Must(schema.NewPrimitiveNode(
+                                                               "id",
+                                                               
parquet.Repetitions.Required,
+                                                               
parquet.Types.Int32,
+                                                               1,
+                                                               -1,
+                                                       )),
+                                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                               "name",
+                                                               
parquet.Repetitions.Required,
+                                                               
schema.StringLogicalType{},
+                                                               
parquet.Types.ByteArray,
+                                                               2,
+                                                               -1,
+                                                       )),
+                                               }, 3)),
+                                               // Nested value: list of strings
+                                               schema.MustGroup(schema.ListOf(
+                                                       
schema.Must(schema.NewPrimitiveNodeLogical(
+                                                               "element",
+                                                               
parquet.Repetitions.Required,
+                                                               
schema.StringLogicalType{},
+                                                               
parquet.Types.ByteArray,
+                                                               5,
+                                                               -1,
+                                                       )),
+                                                       
parquet.Repetitions.Required,
+                                                       -1,
+                                               )),
+                                               parquet.Repetitions.Required,
+                                               0,
+                                       )),
+                               }, -1),
+                       )),
+               },
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       manifest, err := pqarrow.NewSchemaManifest(tc.s, 
metadata.KeyValueMetadata{}, nil)
+                       assert.NoError(t, err)
+                       assertFieldColIndexCorrect(t, manifest.Fields)
+               })
+       }
+}
+
+func assertFieldColIndexCorrect(t *testing.T, fields []pqarrow.SchemaField) {
+       for _, field := range fields {
+               if field.Children != nil {
+                       assert.Equal(t, -1, field.ColIndex)
+                       assertFieldColIndexCorrect(t, field.Children)
+               } else {
+                       assert.NotEqual(t, -1, field.Children)
+               }
+       }
+}
+
 // TestUnsupportedTypes tests the error message for unsupported types. This 
test should be updated
 // when support for these types is added.
 func TestUnsupportedTypes(t *testing.T) {

Reply via email to