joellubi commented on code in PR #43555:
URL: https://github.com/apache/arrow/pull/43555#discussion_r1705561902
##########
go/arrow/util/protobuf_reflect.go:
##########
@@ -401,24 +396,40 @@ func (pmr protobufMapReflection) generateKeyValuePairs()
chan protobufMapKeyValu
go func() {
defer close(out)
- for _, k := range pmr.rValue.MapKeys() {
+ if !pmr.rValue.IsValid() {
kvp := protobufMapKeyValuePairReflection{
k: ProtobufFieldReflection{
parent: pmr.parent,
descriptor: pmr.descriptor.MapKey(),
- prValue: getMapKey(k),
- rValue: k,
schemaOptions: pmr.schemaOptions,
},
v: ProtobufFieldReflection{
parent: pmr.parent,
descriptor:
pmr.descriptor.MapValue(),
- prValue:
pmr.prValue.Map().Get(protoreflect.MapKey(getMapKey(k))),
- rValue: pmr.rValue.MapIndex(k),
schemaOptions: pmr.schemaOptions,
},
}
out <- kvp
+ } else {
+ for _, k := range pmr.rValue.MapKeys() {
Review Comment:
May be a bit nicer instead to `return` early from the conditional and get
rid of the need for `else`, since this block already has considerable nesting.
```suggestion
return
}
for _, k := range pmr.rValue.MapKeys() {
```
##########
go/arrow/util/protobuf_reflect_test.go:
##########
@@ -30,14 +34,50 @@ import (
"google.golang.org/protobuf/types/known/anypb"
)
-func SetupTest() util_message.AllTheTypes {
- msg := util_message.ExampleMessage{
- Field1: "Example",
+type Fixture struct {
+ msg proto.Message
+ schema string
+ jsonStr string
+ nullJsonStr string
+}
+
+type J map[string]any
+
+func AllTheTypesFixture() Fixture {
+ e := J{"field1": "Example"}
+
+ m := J{
+ "str": "Hello",
+ "int32": 10,
+ "int64": 100,
+ "sint32": -10,
+ "sin64": -100,
+ "uint32": 10,
+ "uint64": 100,
+ "fixed32": 10,
+ "fixed64": 1000,
+ "sfixed32": 10,
+ "bool": false,
+ "bytes": "SGVsbG8sIHdvcmxkIQ==",
+ "double": 1.1,
+ "enum": "OPTION_1",
+ "message": e,
+ "oneof": []any{0, "World"},
+ "any": J{"field1": "Example"},
+ "simple_map": []J{{"key": 99, "value": "Hello"}},
+ "complex_map": []J{{"key": "complex", "value": e}},
+ "simple_list": []any{"Hello", "World"},
+ "complex_list": []J{e},
}
+ jm, _ := json.Marshal(m)
Review Comment:
Would be nice to catch this if it error occurs during testing.
```suggestion
jm, err := json.Marshal(m)
if err != nil {
panic(err)
}
```
##########
go/arrow/util/protobuf_reflect_test.go:
##########
@@ -169,123 +369,140 @@ func TestGetSchema(t *testing.T) {
- Double: type=float64, nullable
- Enum: type=dictionary<values=utf8, indices=int32, ordered=false>,
nullable
- Oneofstring: type=utf8, nullable`
-
- require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+ CheckSchema(t, pmr, want)
onlyEnum := func(pfr *ProtobufFieldReflection) bool {
return !pfr.isEnum()
}
- got = NewProtobufMessageReflection(
- &msg,
+ pmr = NewProtobufMessageReflection(
+ f.msg,
WithExclusionPolicy(onlyEnum),
WithEnumHandler(EnumNumber),
- ).Schema().String()
+ )
want = `schema:
fields: 1
- enum: type=int32, nullable`
+ CheckSchema(t, pmr, want)
- require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
-
- got = NewProtobufMessageReflection(
- &msg,
+ pmr = NewProtobufMessageReflection(
+ f.msg,
WithExclusionPolicy(onlyEnum),
WithEnumHandler(EnumValue),
- ).Schema().String()
+ )
want = `schema:
fields: 1
- enum: type=utf8, nullable`
-
- require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+ CheckSchema(t, pmr, want)
}
func TestRecordFromProtobuf(t *testing.T) {
- msg := SetupTest()
+ f := AllTheTypesFixture()
- pmr := NewProtobufMessageReflection(&msg,
WithOneOfHandler(OneOfDenseUnion))
- schema := pmr.Schema()
- got := pmr.Record(nil)
- jsonStr := `[
- {
- "str":"Hello",
- "int32":10,
- "int64":100,
- "sint32":-10,
- "sin64":-100,
- "uint32":10,
- "uint64":100,
- "fixed32":10,
- "fixed64":1000,
- "sfixed32":10,
- "bool":false,
- "bytes":"SGVsbG8sIHdvcmxkIQ==",
- "double":1.1,
- "enum":"OPTION_1",
- "message":{"field1":"Example"},
- "oneof": [0, "World"],
- "any":{"field1":"Example"},
- "simple_map":[{"key":99,"value":"Hello"}],
-
"complex_map":[{"key":"complex","value":{"field1":"Example"}}],
- "simple_list":["Hello","World"],
- "complex_list":[{"field1":"Example"}]
- }
- ]`
- want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema,
strings.NewReader(jsonStr))
-
- require.NoError(t, err)
- require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got,
want)
+ pmr := NewProtobufMessageReflection(f.msg,
WithOneOfHandler(OneOfDenseUnion))
+ CheckRecord(t, pmr, fmt.Sprintf(`[%s]`, f.jsonStr))
onlyEnum := func(pfr *ProtobufFieldReflection) bool { return
!pfr.isEnum() }
- pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum),
WithEnumHandler(EnumValue))
- got = pmr.Record(nil)
- jsonStr = `[ { "enum":"OPTION_1" } ]`
- want, _, err = array.RecordFromJSON(memory.NewGoAllocator(),
pmr.Schema(), strings.NewReader(jsonStr))
- require.NoError(t, err)
- require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got,
want)
+ pmr = NewProtobufMessageReflection(f.msg,
WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumValue))
+ jsonStr := `[ { "enum":"OPTION_1" } ]`
+ CheckRecord(t, pmr, jsonStr)
- pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum),
WithEnumHandler(EnumNumber))
- got = pmr.Record(nil)
+ pmr = NewProtobufMessageReflection(f.msg,
WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumNumber))
jsonStr = `[ { "enum":"1" } ]`
- want, _, err = array.RecordFromJSON(memory.NewGoAllocator(),
pmr.Schema(), strings.NewReader(jsonStr))
- require.NoError(t, err)
- require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got,
want)
+ CheckRecord(t, pmr, jsonStr)
}
func TestNullRecordFromProtobuf(t *testing.T) {
pmr := NewProtobufMessageReflection(&util_message.AllTheTypes{})
- schema := pmr.Schema()
- got := pmr.Record(nil)
- _, _ = got.MarshalJSON()
- jsonStr := `[
- {
- "str":"",
- "int32":0,
- "int64":0,
- "sint32":0,
- "sin64":0,
- "uint32":0,
- "uint64":0,
- "fixed32":0,
- "fixed64":0,
- "sfixed32":0,
- "bool":false,
- "bytes":"",
- "double":0,
- "enum":"OPTION_0",
- "message":null,
- "oneofmessage":{"field1":""},
- "oneofstring":"",
- "any":null,
- "simple_map":[],
- "complex_map":[],
- "simple_list":[],
- "complex_list":[]
- }
- ]`
+ f := AllTheTypesFixture()
+ CheckRecord(t, pmr, fmt.Sprintf(`[%s]`, f.nullJsonStr))
+}
- want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema,
strings.NewReader(jsonStr))
+func TestExcludedNested(t *testing.T) {
+ msg := util_message.ExampleMessage{
+ Field1: "Example",
+ }
+ schema := `schema:
+ fields: 2
+ - simple_a: type=list<item: struct<field1: utf8>, nullable>, nullable
+ - simple_b: type=list<item: struct<field1: utf8>, nullable>, nullable`
- require.NoError(t, err)
- require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got,
want)
+ simpleNested := util_message.SimpleNested{
+ SimpleA: []*util_message.ExampleMessage{&msg},
+ SimpleB: []*util_message.ExampleMessage{&msg},
+ }
+ pmr := NewProtobufMessageReflection(&simpleNested)
+ jsonStr := `[{ "simple_a":[{"field1":"Example"}],
"simple_b":[{"field1":"Example"}] }]`
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ //exclude one value
+ simpleNested = util_message.SimpleNested{
+ SimpleA: []*util_message.ExampleMessage{&msg},
+ }
+ jsonStr = `[{ "simple_a":[{"field1":"Example"}] }]`
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ //exclude both values
+ simpleNested = util_message.SimpleNested{}
+ jsonStr = `[{ "simple_a":[{}] }]`
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ f := AllTheTypesNoAnyFixture()
+ schema = `schema:
+ fields: 2
+ - all_the_types_no_any_a: type=list<item: struct<str: utf8, int32: int32,
int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64,
fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary,
double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>,
message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1:
utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8,
struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>,
complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>, nullable
+ - all_the_types_no_any_b: type=list<item: struct<str: utf8, int32: int32,
int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64,
fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary,
double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>,
message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1:
utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8,
struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>,
complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>, nullable`
+
+ complexNested := util_message.ComplexNested{
+ AllTheTypesNoAnyA:
[]*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+ AllTheTypesNoAnyB:
[]*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+ }
+ jsonStr = fmt.Sprintf(`[{ "all_the_types_a": [%s], "all_the_types_b":
[%s] }]`, f.jsonStr, f.jsonStr)
+ pmr = NewProtobufMessageReflection(&complexNested)
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ // exclude one value
+ complexNested = util_message.ComplexNested{
+ AllTheTypesNoAnyB:
[]*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+ }
+ jsonStr = fmt.Sprintf(`[{ "all_the_types_a": [%s], "all_the_types_b":
[%s] }]`, f.nullJsonStr, f.jsonStr)
+ pmr = NewProtobufMessageReflection(&complexNested)
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ // exclude both values
+ complexNested = util_message.ComplexNested{}
+ jsonStr = fmt.Sprintf(`[{ "all_the_types_a": [%s], "all_the_types_b":
[%s] }]`, f.nullJsonStr, f.nullJsonStr)
+ pmr = NewProtobufMessageReflection(&complexNested)
+ CheckSchema(t, pmr, schema)
+ CheckRecord(t, pmr, jsonStr)
+
+ schema = `schema:
+ fields: 2
+ - complex_nested: type=struct<all_the_types_no_any_a: list<item:
struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64: int64,
uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64, sfixed32:
int32, bool: bool, bytes: binary, double: float64, enum:
dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1:
utf8>, oneofstring: utf8, oneofmessage: struct<field1: utf8>, simple_map:
map<int32, utf8, items_nullable>, complex_map: map<utf8, struct<field1: utf8>,
items_nullable>, simple_list: list<item: utf8, nullable>, complex_list:
list<item: struct<field1: utf8>, nullable>>, nullable>, all_the_types_no_any_b:
list<item: struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64:
int64, uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64,
sfixed32: int32, bool: bool, bytes: binary, double: float64, enum:
dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1:
utf8>, oneofstring: utf8, oneofmessage
: struct<field1: utf8>, simple_map: map<int32, utf8, items_nullable>,
complex_map: map<utf8, struct<field1: utf8>, items_nullable>, simple_list:
list<item: utf8, nullable>, complex_list: list<item: struct<field1: utf8>,
nullable>>, nullable>>, nullable
+ - simple_nested: type=struct<simple_a: list<item: struct<field1: utf8>,
nullable>, simple_b: list<item: struct<field1: utf8>, nullable>>, nullable`
+
+ deepNested := util_message.DeepNested{
+ ComplexNested: &complexNested,
+ SimpleNested: &simpleNested,
+ }
+ pmr = NewProtobufMessageReflection(&deepNested)
+ CheckSchema(t, pmr, schema)
Review Comment:
Any reason not to `CheckRecord()` here too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]