This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 8b0c895  fix(table): Handle nullable struct with Required Field (#408)
8b0c895 is described below

commit 8b0c895f91f32ea99e8ab3303ecb3f38dbfc8523
Author: Matt Topol <[email protected]>
AuthorDate: Wed Apr 30 13:29:50 2025 -0400

    fix(table): Handle nullable struct with Required Field (#408)
    
    fixes #398
    
    Have `toRequestedSchema` use the new `NewStructArrayWithFieldsAndNulls`
    function to correctly propagate null info and handle required fields
    within a nullable struct.
---
 go.mod               |  6 ++--
 go.sum               | 16 +++++-----
 table/arrow_utils.go | 31 ++++++++++++++++++-
 table/table_test.go  | 84 +++++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 118 insertions(+), 19 deletions(-)

diff --git a/go.mod b/go.mod
index a1e84b7..d096f04 100644
--- a/go.mod
+++ b/go.mod
@@ -23,7 +23,7 @@ toolchain go1.24.2
 
 require (
        github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.1
-       github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de
+       github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357
        github.com/aws/aws-sdk-go-v2 v1.36.3
        github.com/aws/aws-sdk-go-v2/config v1.29.14
        github.com/aws/aws-sdk-go-v2/credentials v1.17.67
@@ -297,7 +297,7 @@ require (
        go.opentelemetry.io/proto/otlp v1.3.1 // indirect
        go.uber.org/mock v0.5.0 // indirect
        golang.org/x/crypto v0.37.0 // indirect
-       golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
+       golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
        golang.org/x/mod v0.24.0 // indirect
        golang.org/x/net v0.39.0 // indirect
        golang.org/x/oauth2 v0.29.0 // indirect
@@ -305,7 +305,7 @@ require (
        golang.org/x/term v0.31.0 // indirect
        golang.org/x/text v0.24.0 // indirect
        golang.org/x/time v0.11.0 // indirect
-       golang.org/x/tools v0.31.0 // indirect
+       golang.org/x/tools v0.32.0 // indirect
        google.golang.org/genproto v0.0.0-20250324211829-b45e905df463 // 
indirect
        google.golang.org/genproto/googleapis/api 
v0.0.0-20250324211829-b45e905df463 // indirect
        google.golang.org/genproto/googleapis/rpc 
v0.0.0-20250414145226-207652e42e2e // indirect
diff --git a/go.sum b/go.sum
index 64f8652..969e51a 100644
--- a/go.sum
+++ b/go.sum
@@ -103,8 +103,8 @@ github.com/andybalholm/brotli v1.1.1 
h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7X
 github.com/andybalholm/brotli v1.1.1/go.mod 
h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
 github.com/antlr4-go/antlr/v4 v4.13.1 
h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
 github.com/antlr4-go/antlr/v4 v4.13.1/go.mod 
h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
-github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de 
h1:wklDQWXjXSjWDoYXJvlPKii9nx6nXC8dEY/a4XRQOvI=
-github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de/go.mod 
h1:EOkJNffq8UnNiioDTUIuynsY5mDoSojZHQZ5ELgGnWM=
+github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357 
h1:Lm+F4evdybvTwpnILZTne33EE+iIdAxt5O1B4L6Irrk=
+github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357/go.mod 
h1:726FKYtoaZ2qLvPq3SK3fbiQmWV7H+rqUS7oDs6PS1U=
 github.com/apache/thrift v0.21.0 
h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE=
 github.com/apache/thrift v0.21.0/go.mod 
h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw=
 github.com/apparentlymart/go-textseg/v15 v15.0.0 
h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY=
@@ -843,8 +843,8 @@ golang.org/x/crypto v0.18.0/go.mod 
h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1m
 golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
 golang.org/x/crypto v0.37.0/go.mod 
h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 
h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw=
-golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod 
h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 
h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod 
h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod 
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod 
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod 
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
@@ -961,16 +961,16 @@ golang.org/x/tools v0.1.12/go.mod 
h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
 golang.org/x/tools v0.6.0/go.mod 
h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.13.0/go.mod 
h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
 golang.org/x/tools v0.17.0/go.mod 
h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
-golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
-golang.org/x/tools v0.31.0/go.mod 
h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
+golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU=
+golang.org/x/tools v0.32.0/go.mod 
h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod 
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da 
h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
 golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod 
h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
-gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
-gonum.org/v1/gonum v0.15.1/go.mod 
h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod 
h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
 google.golang.org/api v0.230.0 h1:2u1hni3E+UXAXrONrrkfWpi/V6cyKVAbfGVeGtC3OxM=
 google.golang.org/api v0.230.0/go.mod 
h1:aqvtoMk7YkiXx+6U12arQFExiRV9D/ekvMCwCd/TksQ=
 google.golang.org/appengine v1.1.0/go.mod 
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
diff --git a/table/arrow_utils.go b/table/arrow_utils.go
index 4743175..8085698 100644
--- a/table/arrow_utils.go
+++ b/table/arrow_utils.go
@@ -27,6 +27,7 @@ import (
 
        "github.com/apache/arrow-go/v18/arrow"
        "github.com/apache/arrow-go/v18/arrow/array"
+       "github.com/apache/arrow-go/v18/arrow/bitutil"
        "github.com/apache/arrow-go/v18/arrow/compute"
        "github.com/apache/arrow-go/v18/arrow/extensions"
        "github.com/apache/arrow-go/v18/arrow/memory"
@@ -417,6 +418,15 @@ func ArrowSchemaToIceberg(sc *arrow.Schema, 
downcastNsTimestamp bool, nameMappin
        }
 }
 
+func ArrowSchemaToIcebergWithFreshIDs(sc *arrow.Schema, downcastNsTimestamp 
bool) (*iceberg.Schema, error) {
+       schemaWithoutIDs, err := arrowToSchemaWithoutIDs(sc, 
downcastNsTimestamp)
+       if err != nil {
+               return nil, err
+       }
+
+       return iceberg.AssignFreshSchemaIDs(schemaWithoutIDs, nil)
+}
+
 func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool) 
(*iceberg.Schema, error) {
        withoutIDs, err := VisitArrowSchema(sc, convertToIceberg{
                downcastTimestamp: downcastNsTimestamp,
@@ -813,7 +823,26 @@ func (a *arrowProjectionVisitor) Struct(st 
iceberg.StructType, structArr arrow.A
                }
        }
 
-       return retOrPanic(array.NewStructArrayWithFields(fieldArrs, fields))
+       var nullBitmap *memory.Buffer
+       if structArr.NullN() > 0 {
+               if structArr.Data().Offset() > 0 {
+                       // the children already accounted for any offset 
because we used the `Field` method
+                       // on the struct array in the FieldPartner accessor. So 
we just need to adjust the
+                       // bitmap to account for the offset.
+                       nullBitmap = 
memory.NewResizableBuffer(compute.GetAllocator(a.ctx))
+                       defer nullBitmap.Release()
+                       
nullBitmap.Resize(int(bitutil.BytesForBits(int64(structArr.Len()))))
+
+                       bitutil.CopyBitmap(structArr.NullBitmapBytes(), 
structArr.Data().Offset(), structArr.Len(),
+                               nullBitmap.Bytes(), 0)
+
+               } else {
+                       nullBitmap = structArr.Data().Buffers()[0]
+               }
+       }
+
+       return retOrPanic(array.NewStructArrayWithFieldsAndNulls(fieldArrs, 
fields,
+               nullBitmap, structArr.NullN(), 0))
 }
 
 func (a *arrowProjectionVisitor) Field(_ iceberg.NestedField, _ arrow.Array, 
fieldArr arrow.Array) arrow.Array {
diff --git a/table/table_test.go b/table/table_test.go
index 7f5e66a..3a8338f 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -38,6 +38,7 @@ import (
        "github.com/apache/iceberg-go/table"
        "github.com/google/uuid"
        "github.com/pterm/pterm"
+       "github.com/stretchr/testify/require"
        "github.com/stretchr/testify/suite"
        "github.com/uptrace/bun/driver/sqliteshim"
 )
@@ -320,12 +321,12 @@ func (t *TableWritingTestSuite) 
TestAddFilesUnpartitioned() {
                        Operation: table.OpAppend,
                        Properties: iceberg.Properties{
                                "added-data-files":       "5",
-                               "added-files-size":       "3660",
+                               "added-files-size":       "3600",
                                "added-records":          "5",
                                "total-data-files":       "5",
                                "total-delete-files":     "0",
                                "total-equality-deletes": "0",
-                               "total-files-size":       "3660",
+                               "total-files-size":       "3600",
                                "total-position-deletes": "0",
                                "total-records":          "5",
                        },
@@ -460,13 +461,13 @@ func (t *TableWritingTestSuite) 
TestAddFilesPartitionedTable() {
                        Operation: table.OpAppend,
                        Properties: iceberg.Properties{
                                "added-data-files":        "5",
-                               "added-files-size":        "3660",
+                               "added-files-size":        "3600",
                                "added-records":           "5",
                                "changed-partition-count": "1",
                                "total-data-files":        "5",
                                "total-delete-files":      "0",
                                "total-equality-deletes":  "0",
-                               "total-files-size":        "3660",
+                               "total-files-size":        "3600",
                                "total-position-deletes":  "0",
                                "total-records":           "5",
                        },
@@ -825,15 +826,15 @@ func (t *TableWritingTestSuite) TestReplaceDataFiles() {
                Operation: table.OpOverwrite,
                Properties: iceberg.Properties{
                        "added-data-files":       "1",
-                       "added-files-size":       "1082",
+                       "added-files-size":       "1068",
                        "added-records":          "4",
                        "deleted-data-files":     "2",
                        "deleted-records":        "4",
-                       "removed-files-size":     "2164",
+                       "removed-files-size":     "2136",
                        "total-data-files":       "4",
                        "total-delete-files":     "0",
                        "total-equality-deletes": "0",
-                       "total-files-size":       "4328",
+                       "total-files-size":       "4272",
                        "total-position-deletes": "0",
                        "total-records":          "10",
                },
@@ -1135,3 +1136,72 @@ func TestTableWriting(t *testing.T) {
        suite.Run(t, &TableWritingTestSuite{formatVersion: 1})
        suite.Run(t, &TableWritingTestSuite{formatVersion: 2})
 }
+
+func TestNullableStructRequiredField(t *testing.T) {
+       loc := t.TempDir()
+
+       cat, err := catalog.Load(context.Background(), "default", 
iceberg.Properties{
+               "uri":          ":memory:",
+               "type":         "sql",
+               sql.DriverKey:  sqliteshim.ShimName,
+               sql.DialectKey: string(sql.SQLite),
+               "warehouse":    "file://" + loc,
+       })
+       require.NoError(t, err)
+
+       arrowSchema := arrow.NewSchema([]arrow.Field{
+               {
+                       Name: "analytic", Type: arrow.StructOf(
+                               arrow.Field{Name: "category", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                               arrow.Field{Name: "desc", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                               arrow.Field{Name: "name", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                               arrow.Field{Name: "related_analytics", Type: 
arrow.ListOf(
+                                       arrow.StructOf(
+                                               arrow.Field{Name: "category", 
Type: arrow.BinaryTypes.String, Nullable: true},
+                                               arrow.Field{Name: "desc", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                                               arrow.Field{Name: "name", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                                               arrow.Field{Name: "type", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                                               arrow.Field{Name: "type_id", 
Type: arrow.PrimitiveTypes.Int32, Nullable: false},
+                                               arrow.Field{Name: "uid", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                                               arrow.Field{Name: "version", 
Type: arrow.BinaryTypes.String, Nullable: true},
+                                       ),
+                               ), Nullable: true},
+                               arrow.Field{Name: "type", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                               arrow.Field{Name: "type_id", Type: 
arrow.PrimitiveTypes.Int32, Nullable: false},
+                               arrow.Field{Name: "uid", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                               arrow.Field{Name: "version", Type: 
arrow.BinaryTypes.String, Nullable: true},
+                       ), Nullable: true,
+               },
+               {Name: "uid", Type: arrow.BinaryTypes.String, Nullable: true},
+       }, nil)
+
+       sc, err := table.ArrowSchemaToIcebergWithFreshIDs(arrowSchema, false)
+       require.NoError(t, err)
+
+       ctx := context.TODO()
+       require.NoError(t, cat.CreateNamespace(ctx, 
table.Identifier{"testing"}, nil))
+       tbl, err := cat.CreateTable(ctx, table.Identifier{"testing", 
"nullable_struct_required_field"}, sc,
+               catalog.WithProperties(iceberg.Properties{"format-version": 
"2"}),
+               catalog.WithLocation(loc))
+       require.NoError(t, err)
+       require.NotNil(t, tbl)
+
+       bldr := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema)
+       defer bldr.Release()
+
+       const N = 100
+       bldr.Field(0).AppendNulls(N)
+       bldr.Field(1).AppendNulls(N)
+
+       rec := bldr.NewRecord()
+       defer rec.Release()
+
+       arrTable := array.NewTableFromRecords(arrowSchema, []arrow.Record{rec})
+       defer arrTable.Release()
+
+       tx := tbl.NewTransaction()
+       require.NoError(t, tx.AppendTable(ctx, arrTable, N, nil))
+       stagedTbl, err := tx.StagedTable()
+       require.NoError(t, err)
+       require.NotNil(t, stagedTbl)
+}

Reply via email to