This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new bf168276 fix(avro): error on complex (non-nullable) unions instead of
silently dropping (#777) (#808)
bf168276 is described below
commit bf1682769528153e9c192c622c7be76413ab9a05
Author: Sai Asish Y <[email protected]>
AuthorDate: Wed May 13 10:12:34 2026 -0700
fix(avro): error on complex (non-nullable) unions instead of silently
dropping (#777) (#808)
### Rationale for this change
Fixes #777. `ArrowSchemaFromAvro` previously silently dropped
non-nullable Avro unions (e.g. `["int","string"]`) in both
`arrowSchemafromAvro` and `iterateFields`. Output schemas were missing
fields with no warning to the caller.
### What changes are included in this PR?
Panic with a clear error message at the two `case "union"` / `case
*avro.UnionSchema` arms when the union is not a simple nullable. The
package's existing `recover()` in `ArrowSchemaFromAvro` converts this
into a returned error.
### Are these changes tested?
Yes, `TestComplexUnionReportsError` exercises a record with
`["int","string"]` and asserts a non-nil error mentioning `union`.
### Are there any user-facing changes?
Callers that previously received an incomplete schema for a
complex-union field now get an explicit error.
Signed-off-by: SAY-5 <[email protected]>
Co-authored-by: SAY-5 <[email protected]>
---
arrow/avro/schema.go | 11 ++++++++---
arrow/avro/schema_test.go | 25 +++++++++++++++++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/arrow/avro/schema.go b/arrow/avro/schema.go
index 4d9e7670..8eb4b91a 100644
--- a/arrow/avro/schema.go
+++ b/arrow/avro/schema.go
@@ -139,13 +139,16 @@ func arrowSchemafromAvro(n *schemaNode) {
arrowSchemafromAvro(c)
n.arrowField = buildArrowField(n,
arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata)
case "union":
- if n.schema.(*avro.UnionSchema).Nullable() {
- if len(n.schema.(*avro.UnionSchema).Types()) > 1 {
- n.schema =
n.schema.(*avro.UnionSchema).Types()[1]
+ us := n.schema.(*avro.UnionSchema)
+ if us.Nullable() {
+ if len(us.Types()) > 1 {
+ n.schema = us.Types()[1]
n.union = true
n.nullable = true
arrowSchemafromAvro(n)
}
+ } else {
+ panic(fmt.Errorf("complex (non-nullable) avro union at
'%v' is not supported", n.schemaPath()))
}
// Avro "fixed" field type = Arrow FixedSize Primitive BinaryType
case "fixed":
@@ -245,6 +248,8 @@ func iterateFields(n *schemaNode) {
c.nullable = true
arrowSchemafromAvro(c)
}
+ } else {
+ panic(fmt.Errorf("complex (non-nullable) avro
union in field '%v' is not supported", f.Name()))
}
default:
n.schemaCache.Add(f.Name(), f.Type())
diff --git a/arrow/avro/schema_test.go b/arrow/avro/schema_test.go
index 33b6d2a0..b4b91b85 100644
--- a/arrow/avro/schema_test.go
+++ b/arrow/avro/schema_test.go
@@ -18,10 +18,12 @@ package avro
import (
"fmt"
+ "strings"
"testing"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/avro/testdata"
+ avropkg "github.com/hamba/avro/v2"
)
func TestSchemaStringEqual(t *testing.T) {
@@ -174,3 +176,26 @@ func TestSchemaStringEqual(t *testing.T) {
})
}
}
+
+func TestComplexUnionReportsError(t *testing.T) {
+ // Non-nullable union (e.g. [int, string]) is not supported and should
+ // produce a clear error rather than being silently dropped.
+ const avroSchemaJSON = `{
+ "type": "record",
+ "name": "WithComplexUnion",
+ "fields": [
+ {"name": "value", "type": ["int", "string"]}
+ ]
+ }`
+ schema, err := avropkg.Parse(avroSchemaJSON)
+ if err != nil {
+ t.Fatalf("avro parse: %v", err)
+ }
+ got, err := ArrowSchemaFromAvro(schema)
+ if err == nil {
+ t.Fatalf("expected error for complex union, got schema=%v", got)
+ }
+ if !strings.Contains(err.Error(), "union") {
+ t.Fatalf("expected error to mention union, got: %v", err)
+ }
+}