This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new fc20f37d fix(avro): correctly set nullability for ListType (#709)
fc20f37d is described below
commit fc20f37d89be6b81ae9dd8567e00ac6943671aec
Author: Willem Jan <[email protected]>
AuthorDate: Fri Mar 13 17:56:12 2026 +0100
fix(avro): correctly set nullability for ListType (#709)
### Rationale for this change
Nullability of a list field itself is lost when converting from avro to
arrow schema; only the nullability of the list members is applied.
### What changes are included in this PR?
Use buildArrowField function for listfield to leverage arrow field
creation; which applies nullability as expected. Also use
buildArrowField for float,double,boolean case for consistency.
### Are these changes tested?
Added testcase for a nullable list.
### Are there any user-facing changes?
Debatable
---------
Co-authored-by: Willem Jan Noort <[email protected]>
---
arrow/avro/reader_test.go | 5 +++++
arrow/avro/schema.go | 10 ++++++----
arrow/avro/schema_test.go | 5 +++++
arrow/avro/testdata/alltypes.avsc | 10 ++++++++++
arrow/avro/testdata/testdata.go | 17 +++++++++--------
5 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/arrow/avro/reader_test.go b/arrow/avro/reader_test.go
index 0eb513b3..4aaac675 100644
--- a/arrow/avro/reader_test.go
+++ b/arrow/avro/reader_test.go
@@ -80,6 +80,11 @@ func TestReader(t *testing.T) {
Type: arrow.BinaryTypes.Binary,
Nullable: true,
},
+ {
+ Name: "nullable_remote_ips",
+ Type:
arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
+ Nullable: true,
+ },
{
Name: "person",
Type: arrow.StructOf(
diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go
index 91b1729b..4d9e7670 100644
--- a/arrow/avro/schema.go
+++ b/arrow/avro/schema.go
@@ -106,7 +106,7 @@ func arrowSchemafromAvro(n *schemaNode) {
k := strconv.FormatInt(int64(index), 10)
symbols[k] = symbol
}
- var dt = arrow.DictionaryType{IndexType:
arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered:
false}
+ dt := arrow.DictionaryType{IndexType:
arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered:
false}
sl := int64(len(symbols))
switch {
case sl <= math.MaxUint8:
@@ -125,12 +125,14 @@ func arrowSchemafromAvro(n *schemaNode) {
} else {
arrowSchemafromAvro(c)
}
+ var typ *arrow.ListType
switch c.arrowField.Nullable {
case true:
- n.arrowField = arrow.Field{Name: n.name, Type:
arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata}
+ typ = arrow.ListOfField(c.arrowField)
case false:
- n.arrowField = arrow.Field{Name: n.name, Type:
arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata}
+ typ = arrow.ListOfNonNullable(c.arrowField.Type)
}
+ n.arrowField = buildArrowField(n, typ, c.arrowField.Metadata)
case "map":
n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(),
n.schema.(*avro.MapSchema).Values())
c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values())
@@ -160,7 +162,7 @@ func arrowSchemafromAvro(n *schemaNode) {
n.arrowField = buildArrowField(n,
avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
}
case "float", "double", "boolean":
- n.arrowField = arrow.Field{Name: n.name, Type:
avroPrimitiveToArrowType(string(st)), Nullable: n.nullable}
+ n.arrowField = buildArrowField(n,
avroPrimitiveToArrowType(string(st)), arrow.Metadata{})
case "<ref>":
refSchema :=
n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name()))
if refSchema == nil {
diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go
index 921e5e81..33b6d2a0 100644
--- a/arrow/avro/schema_test.go
+++ b/arrow/avro/schema_test.go
@@ -79,6 +79,11 @@ func TestSchemaStringEqual(t *testing.T) {
Type: arrow.BinaryTypes.Binary,
Nullable: true,
},
+ {
+ Name: "nullable_remote_ips",
+ Type:
arrow.ListOfNonNullable(arrow.BinaryTypes.Binary),
+ Nullable: true,
+ },
{
Name: "person",
Type: arrow.StructOf(
diff --git a/arrow/avro/testdata/alltypes.avsc
b/arrow/avro/testdata/alltypes.avsc
index a4e3037b..29a72e56 100644
--- a/arrow/avro/testdata/alltypes.avsc
+++ b/arrow/avro/testdata/alltypes.avsc
@@ -85,6 +85,16 @@
"bytes"
]
},
+ {
+ "name": "nullable_remote_ips",
+ "type": [
+ "null",
+ {
+ "type": "array",
+ "items": "bytes"
+ }
+ ]
+ },
{
"name": "person",
"type": {
diff --git a/arrow/avro/testdata/testdata.go b/arrow/avro/testdata/testdata.go
index 9770cfec..235231da 100644
--- a/arrow/avro/testdata/testdata.go
+++ b/arrow/avro/testdata/testdata.go
@@ -126,6 +126,7 @@ type Example struct {
Fraction *float64 `avro:"fraction" json:"fraction"`
IsEmergency bool `avro:"is_emergency"
json:"is_emergency"`
RemoteIP *ByteArray `avro:"remote_ip" json:"remote_ip"`
+ NullableRemoteIPS *[]ByteArray `avro:"nullable_remote_ips"
json:"nullable_remote_ips"`
Person PersonData `avro:"person" json:"person"`
DecimalField DecimalType `avro:"decimalField"
json:"decimalField"`
Decimal256Field DecimalType `avro:"decimal256Field"
json:"decimal256Field"`
@@ -215,12 +216,12 @@ func sampleData() Example {
InheritNamespace: "d",
Md5: MD5{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15},
},
- ID: 42,
- BigID: 42000000000,
- Temperature: func() *float32 { v := float32(36.6); return &v
}(),
- Fraction: func() *float64 { v := float64(0.75); return &v
}(),
- IsEmergency: true,
- RemoteIP: func() *ByteArray { v := ByteArray{192, 168, 1,
1}; return &v }(),
+ ID: 42,
+ BigID: 42000000000,
+ Temperature: func() *float32 { v := float32(36.6); return
&v }(),
+ Fraction: func() *float64 { v := float64(0.75); return
&v }(),
+ IsEmergency: true,
+ RemoteIP: func() *ByteArray { v := ByteArray{192, 168,
1, 1}; return &v }(),
Person: PersonData{
Lastname: "Doe",
Address: AddressUSRecord{
@@ -248,7 +249,7 @@ func sampleData() Example {
func writeOCFSampleData(td string, data Example) string {
path := filepath.Join(td, sampleAvroFileName)
- ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
0644)
+ ocfFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
0o644)
if err != nil {
log.Fatal(err)
}
@@ -272,7 +273,7 @@ func writeOCFSampleData(td string, data Example) string {
func writeJSONSampleData(td string, data Example) string {
path := filepath.Join(td, sampleJSONFileName)
- jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
0644)
+ jsonFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
0o644)
if err != nil {
log.Fatal(err)
}