This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 8b0c895 fix(table): Handle nullable struct with Required Field (#408)
8b0c895 is described below
commit 8b0c895f91f32ea99e8ab3303ecb3f38dbfc8523
Author: Matt Topol <[email protected]>
AuthorDate: Wed Apr 30 13:29:50 2025 -0400
fix(table): Handle nullable struct with Required Field (#408)
fixes #398
Have `toRequestedSchema` use the new `NewStructArrayWithFieldsAndNulls`
function to correctly propagate null info and handle required fields
within a nullable struct.
---
go.mod | 6 ++--
go.sum | 16 +++++-----
table/arrow_utils.go | 31 ++++++++++++++++++-
table/table_test.go | 84 +++++++++++++++++++++++++++++++++++++++++++++++-----
4 files changed, 118 insertions(+), 19 deletions(-)
diff --git a/go.mod b/go.mod
index a1e84b7..d096f04 100644
--- a/go.mod
+++ b/go.mod
@@ -23,7 +23,7 @@ toolchain go1.24.2
require (
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.1
- github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de
+ github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357
github.com/aws/aws-sdk-go-v2 v1.36.3
github.com/aws/aws-sdk-go-v2/config v1.29.14
github.com/aws/aws-sdk-go-v2/credentials v1.17.67
@@ -297,7 +297,7 @@ require (
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/mock v0.5.0 // indirect
golang.org/x/crypto v0.37.0 // indirect
- golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
+ golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/net v0.39.0 // indirect
golang.org/x/oauth2 v0.29.0 // indirect
@@ -305,7 +305,7 @@ require (
golang.org/x/term v0.31.0 // indirect
golang.org/x/text v0.24.0 // indirect
golang.org/x/time v0.11.0 // indirect
- golang.org/x/tools v0.31.0 // indirect
+ golang.org/x/tools v0.32.0 // indirect
google.golang.org/genproto v0.0.0-20250324211829-b45e905df463 //
indirect
google.golang.org/genproto/googleapis/api
v0.0.0-20250324211829-b45e905df463 // indirect
google.golang.org/genproto/googleapis/rpc
v0.0.0-20250414145226-207652e42e2e // indirect
diff --git a/go.sum b/go.sum
index 64f8652..969e51a 100644
--- a/go.sum
+++ b/go.sum
@@ -103,8 +103,8 @@ github.com/andybalholm/brotli v1.1.1
h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7X
github.com/andybalholm/brotli v1.1.1/go.mod
h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/antlr4-go/antlr/v4 v4.13.1
h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod
h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
-github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de
h1:wklDQWXjXSjWDoYXJvlPKii9nx6nXC8dEY/a4XRQOvI=
-github.com/apache/arrow-go/v18 v18.2.1-0.20250325140533-276892c275de/go.mod
h1:EOkJNffq8UnNiioDTUIuynsY5mDoSojZHQZ5ELgGnWM=
+github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357
h1:Lm+F4evdybvTwpnILZTne33EE+iIdAxt5O1B4L6Irrk=
+github.com/apache/arrow-go/v18 v18.2.1-0.20250425153947-5ae8b27ab357/go.mod
h1:726FKYtoaZ2qLvPq3SK3fbiQmWV7H+rqUS7oDs6PS1U=
github.com/apache/thrift v0.21.0
h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE=
github.com/apache/thrift v0.21.0/go.mod
h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw=
github.com/apparentlymart/go-textseg/v15 v15.0.0
h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY=
@@ -843,8 +843,8 @@ golang.org/x/crypto v0.18.0/go.mod
h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1m
golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE=
golang.org/x/crypto v0.37.0/go.mod
h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/exp v0.0.0-20250305212735-054e65f0b394
h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw=
-golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod
h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0
h1:R84qjqJb5nVJMxqWYb3np9L5ZsaDtB+a39EqjV0JSUM=
+golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0/go.mod
h1:S9Xr4PYopiDyqSyp5NjCrhFrqg6A5zA2E/iPHPhqnS8=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod
h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod
h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod
h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
@@ -961,16 +961,16 @@ golang.org/x/tools v0.1.12/go.mod
h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.6.0/go.mod
h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod
h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.17.0/go.mod
h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
-golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
-golang.org/x/tools v0.31.0/go.mod
h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
+golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU=
+golang.org/x/tools v0.32.0/go.mod
h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod
h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da
h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod
h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
-gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
-gonum.org/v1/gonum v0.15.1/go.mod
h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod
h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/api v0.230.0 h1:2u1hni3E+UXAXrONrrkfWpi/V6cyKVAbfGVeGtC3OxM=
google.golang.org/api v0.230.0/go.mod
h1:aqvtoMk7YkiXx+6U12arQFExiRV9D/ekvMCwCd/TksQ=
google.golang.org/appengine v1.1.0/go.mod
h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
diff --git a/table/arrow_utils.go b/table/arrow_utils.go
index 4743175..8085698 100644
--- a/table/arrow_utils.go
+++ b/table/arrow_utils.go
@@ -27,6 +27,7 @@ import (
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
+ "github.com/apache/arrow-go/v18/arrow/bitutil"
"github.com/apache/arrow-go/v18/arrow/compute"
"github.com/apache/arrow-go/v18/arrow/extensions"
"github.com/apache/arrow-go/v18/arrow/memory"
@@ -417,6 +418,15 @@ func ArrowSchemaToIceberg(sc *arrow.Schema,
downcastNsTimestamp bool, nameMappin
}
}
+func ArrowSchemaToIcebergWithFreshIDs(sc *arrow.Schema, downcastNsTimestamp
bool) (*iceberg.Schema, error) {
+ schemaWithoutIDs, err := arrowToSchemaWithoutIDs(sc,
downcastNsTimestamp)
+ if err != nil {
+ return nil, err
+ }
+
+ return iceberg.AssignFreshSchemaIDs(schemaWithoutIDs, nil)
+}
+
func arrowToSchemaWithoutIDs(sc *arrow.Schema, downcastNsTimestamp bool)
(*iceberg.Schema, error) {
withoutIDs, err := VisitArrowSchema(sc, convertToIceberg{
downcastTimestamp: downcastNsTimestamp,
@@ -813,7 +823,26 @@ func (a *arrowProjectionVisitor) Struct(st
iceberg.StructType, structArr arrow.A
}
}
- return retOrPanic(array.NewStructArrayWithFields(fieldArrs, fields))
+ var nullBitmap *memory.Buffer
+ if structArr.NullN() > 0 {
+ if structArr.Data().Offset() > 0 {
+ // the children already accounted for any offset
because we used the `Field` method
+ // on the struct array in the FieldPartner accessor. So
we just need to adjust the
+ // bitmap to account for the offset.
+ nullBitmap =
memory.NewResizableBuffer(compute.GetAllocator(a.ctx))
+ defer nullBitmap.Release()
+
nullBitmap.Resize(int(bitutil.BytesForBits(int64(structArr.Len()))))
+
+ bitutil.CopyBitmap(structArr.NullBitmapBytes(),
structArr.Data().Offset(), structArr.Len(),
+ nullBitmap.Bytes(), 0)
+
+ } else {
+ nullBitmap = structArr.Data().Buffers()[0]
+ }
+ }
+
+ return retOrPanic(array.NewStructArrayWithFieldsAndNulls(fieldArrs,
fields,
+ nullBitmap, structArr.NullN(), 0))
}
func (a *arrowProjectionVisitor) Field(_ iceberg.NestedField, _ arrow.Array,
fieldArr arrow.Array) arrow.Array {
diff --git a/table/table_test.go b/table/table_test.go
index 7f5e66a..3a8338f 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -38,6 +38,7 @@ import (
"github.com/apache/iceberg-go/table"
"github.com/google/uuid"
"github.com/pterm/pterm"
+ "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/uptrace/bun/driver/sqliteshim"
)
@@ -320,12 +321,12 @@ func (t *TableWritingTestSuite)
TestAddFilesUnpartitioned() {
Operation: table.OpAppend,
Properties: iceberg.Properties{
"added-data-files": "5",
- "added-files-size": "3660",
+ "added-files-size": "3600",
"added-records": "5",
"total-data-files": "5",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "3660",
+ "total-files-size": "3600",
"total-position-deletes": "0",
"total-records": "5",
},
@@ -460,13 +461,13 @@ func (t *TableWritingTestSuite)
TestAddFilesPartitionedTable() {
Operation: table.OpAppend,
Properties: iceberg.Properties{
"added-data-files": "5",
- "added-files-size": "3660",
+ "added-files-size": "3600",
"added-records": "5",
"changed-partition-count": "1",
"total-data-files": "5",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "3660",
+ "total-files-size": "3600",
"total-position-deletes": "0",
"total-records": "5",
},
@@ -825,15 +826,15 @@ func (t *TableWritingTestSuite) TestReplaceDataFiles() {
Operation: table.OpOverwrite,
Properties: iceberg.Properties{
"added-data-files": "1",
- "added-files-size": "1082",
+ "added-files-size": "1068",
"added-records": "4",
"deleted-data-files": "2",
"deleted-records": "4",
- "removed-files-size": "2164",
+ "removed-files-size": "2136",
"total-data-files": "4",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "4328",
+ "total-files-size": "4272",
"total-position-deletes": "0",
"total-records": "10",
},
@@ -1135,3 +1136,72 @@ func TestTableWriting(t *testing.T) {
suite.Run(t, &TableWritingTestSuite{formatVersion: 1})
suite.Run(t, &TableWritingTestSuite{formatVersion: 2})
}
+
+func TestNullableStructRequiredField(t *testing.T) {
+ loc := t.TempDir()
+
+ cat, err := catalog.Load(context.Background(), "default",
iceberg.Properties{
+ "uri": ":memory:",
+ "type": "sql",
+ sql.DriverKey: sqliteshim.ShimName,
+ sql.DialectKey: string(sql.SQLite),
+ "warehouse": "file://" + loc,
+ })
+ require.NoError(t, err)
+
+ arrowSchema := arrow.NewSchema([]arrow.Field{
+ {
+ Name: "analytic", Type: arrow.StructOf(
+ arrow.Field{Name: "category", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "desc", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "name", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "related_analytics", Type:
arrow.ListOf(
+ arrow.StructOf(
+ arrow.Field{Name: "category",
Type: arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "desc", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "name", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "type", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "type_id",
Type: arrow.PrimitiveTypes.Int32, Nullable: false},
+ arrow.Field{Name: "uid", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "version",
Type: arrow.BinaryTypes.String, Nullable: true},
+ ),
+ ), Nullable: true},
+ arrow.Field{Name: "type", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "type_id", Type:
arrow.PrimitiveTypes.Int32, Nullable: false},
+ arrow.Field{Name: "uid", Type:
arrow.BinaryTypes.String, Nullable: true},
+ arrow.Field{Name: "version", Type:
arrow.BinaryTypes.String, Nullable: true},
+ ), Nullable: true,
+ },
+ {Name: "uid", Type: arrow.BinaryTypes.String, Nullable: true},
+ }, nil)
+
+ sc, err := table.ArrowSchemaToIcebergWithFreshIDs(arrowSchema, false)
+ require.NoError(t, err)
+
+ ctx := context.TODO()
+ require.NoError(t, cat.CreateNamespace(ctx,
table.Identifier{"testing"}, nil))
+ tbl, err := cat.CreateTable(ctx, table.Identifier{"testing",
"nullable_struct_required_field"}, sc,
+ catalog.WithProperties(iceberg.Properties{"format-version":
"2"}),
+ catalog.WithLocation(loc))
+ require.NoError(t, err)
+ require.NotNil(t, tbl)
+
+ bldr := array.NewRecordBuilder(memory.DefaultAllocator, arrowSchema)
+ defer bldr.Release()
+
+ const N = 100
+ bldr.Field(0).AppendNulls(N)
+ bldr.Field(1).AppendNulls(N)
+
+ rec := bldr.NewRecord()
+ defer rec.Release()
+
+ arrTable := array.NewTableFromRecords(arrowSchema, []arrow.Record{rec})
+ defer arrTable.Release()
+
+ tx := tbl.NewTransaction()
+ require.NoError(t, tx.AppendTable(ctx, arrTable, N, nil))
+ stagedTbl, err := tx.StagedTable()
+ require.NoError(t, err)
+ require.NotNil(t, stagedTbl)
+}