tscottcoombes1 commented on code in PR #40496:
URL: https://github.com/apache/arrow/pull/40496#discussion_r1531179977
##########
go/arrow/util/protobuf_reflect.go:
##########
@@ -0,0 +1,447 @@
+package util
+
+import (
+ "fmt"
+ "github.com/apache/arrow/go/v16/arrow"
+ "github.com/apache/arrow/go/v16/arrow/array"
+ "github.com/apache/arrow/go/v16/arrow/memory"
+ "github.com/huandu/xstrings"
+ "google.golang.org/protobuf/proto"
+ "google.golang.org/protobuf/reflect/protoreflect"
+ "google.golang.org/protobuf/types/known/anypb"
+ "reflect"
+)
+
+type SchemaOptions struct {
+ exclusionPolicy func(pfr ProtobufFieldReflection) bool
+ fieldNameFormatter func(str string) string
+}
+
+type ProtobufStructReflection struct {
+ descriptor protoreflect.MessageDescriptor
+ message protoreflect.Message
+ rValue reflect.Value
+ SchemaOptions
+}
+
+type Option func(*ProtobufStructReflection)
+
+func NewProtobufStructReflection(msg proto.Message, options ...Option)
*ProtobufStructReflection {
+ v := reflect.ValueOf(msg)
+ for v.Kind() == reflect.Ptr {
+ v = v.Elem()
+ }
+ includeAll := func(pfr ProtobufFieldReflection) bool {
+ return false
+ }
+ noFormatting := func(str string) string {
+ return str
+ }
+ psr := &ProtobufStructReflection{
+ descriptor: msg.ProtoReflect().Descriptor(),
+ message: msg.ProtoReflect(),
+ rValue: v,
+ SchemaOptions: SchemaOptions{
+ exclusionPolicy: includeAll,
+ fieldNameFormatter: noFormatting,
+ },
+ }
+
+ for _, opt := range options {
+ opt(psr)
+ }
+
+ return psr
+}
+
+func WithExclusionPolicy(ex func(pfr ProtobufFieldReflection) bool) Option {
+ return func(psr *ProtobufStructReflection) {
+ psr.exclusionPolicy = ex
+ }
+}
+
+func WithFieldNameFormatter(formatter func(str string) string) Option {
+ return func(psr *ProtobufStructReflection) {
+ psr.fieldNameFormatter = formatter
+ }
+}
+
+func (psr ProtobufStructReflection) unmarshallAny() ProtobufStructReflection {
+ if psr.descriptor.FullName() == "google.protobuf.Any" {
+ for psr.rValue.Type().Kind() == reflect.Ptr {
+ psr.rValue = reflect.Indirect(psr.rValue)
+ }
+ fieldValueAsAny, _ := psr.rValue.Interface().(anypb.Any)
+ msg, _ := fieldValueAsAny.UnmarshalNew()
+
+ v := reflect.ValueOf(msg)
+ for v.Kind() == reflect.Ptr {
+ v = reflect.Indirect(v)
+ }
+
+ return ProtobufStructReflection{
+ descriptor: msg.ProtoReflect().Descriptor(),
+ message: msg.ProtoReflect(),
+ rValue: v,
+ SchemaOptions: psr.SchemaOptions,
+ }
+ } else {
+ return psr
+ }
+}
+
+func (psr ProtobufStructReflection) GetArrowFields() []arrow.Field {
+ var fields []arrow.Field
+
+ for pfr := range psr.generateStructFields() {
+ fields = append(fields, arrow.Field{
+ Name:
psr.fieldNameFormatter(string(pfr.descriptor.Name())),
+ Type: pfr.getDataType(),
+ Nullable: true,
+ })
+ }
+
+ return fields
+}
+
+func (psr ProtobufStructReflection) GetSchema() *arrow.Schema {
+ return arrow.NewSchema(psr.GetArrowFields(), nil)
+}
+
+type ProtobufListReflection struct {
+ ProtobufFieldReflection
+}
+
+func (pfr ProtobufFieldReflection) AsList() ProtobufListReflection {
+ return ProtobufListReflection{pfr}
+}
+
+func (plr ProtobufListReflection) getDataType() arrow.DataType {
+ for li := range plr.generateListItems() {
+ return arrow.ListOf(li.getDataType())
+ }
+ return nil
+}
+
+func (pfr ProtobufFieldReflection) AsMap() ProtobufMapReflection {
+ return ProtobufMapReflection{pfr}
+}
+
+type ProtobufMapReflection struct {
+ ProtobufFieldReflection
+}
+
+func (pmr ProtobufMapReflection) getDataType() arrow.DataType {
+ for kvp := range pmr.generateKeyValuePairs() {
+ return kvp.getDataType()
+ }
+ return nil
+}
+
+type ProtobufMapKeyValuePairReflection struct {
+ k ProtobufFieldReflection
+ v ProtobufFieldReflection
+}
+
+func (pmr ProtobufMapKeyValuePairReflection) getDataType() arrow.DataType {
+ return arrow.MapOf(pmr.k.getDataType(), pmr.v.getDataType())
+}
+
+func (pmr ProtobufMapReflection) generateKeyValuePairs() chan
ProtobufMapKeyValuePairReflection {
+ out := make(chan ProtobufMapKeyValuePairReflection)
+
+ go func() {
+ defer close(out)
+ for _, k := range pmr.rValue.MapKeys() {
+ kvp := ProtobufMapKeyValuePairReflection{
+ k: ProtobufFieldReflection{
+ descriptor: pmr.descriptor.MapKey(),
+ prValue: getMapKey(k),
+ rValue: k,
+ SchemaOptions: pmr.SchemaOptions,
+ },
+ v: ProtobufFieldReflection{
+ descriptor:
pmr.descriptor.MapValue(),
+ prValue:
pmr.prValue.Map().Get(protoreflect.MapKey(getMapKey(k))),
+ rValue: pmr.rValue.MapIndex(k),
+ SchemaOptions: pmr.SchemaOptions,
+ },
+ }
+ out <- kvp
+ }
+ }()
+
+ return out
+}
+
+func (psr ProtobufStructReflection) generateStructFields() chan
ProtobufFieldReflection {
+ out := make(chan ProtobufFieldReflection)
+
+ go func() {
+ defer close(out)
+ fds := psr.descriptor.Fields()
+ for i := 0; i < fds.Len(); i++ {
+ pfr := psr.getFieldByName(string(fds.Get(i).Name()))
+ if psr.exclusionPolicy(pfr) {
+ continue
+ }
+ out <- pfr
+ }
+ }()
+
+ return out
+}
+
+func (pfr ProtobufFieldReflection) AsStruct() ProtobufStructReflection {
+ psr := ProtobufStructReflection{
+ descriptor: pfr.descriptor.Message(),
+ message: pfr.prValue.Message(),
+ rValue: pfr.rValue,
+ SchemaOptions: pfr.SchemaOptions,
+ }
+ psr = psr.unmarshallAny()
+ return psr
+}
+
+func (psr ProtobufStructReflection) getDataType() arrow.DataType {
+ return arrow.StructOf(psr.GetArrowFields()...)
+}
+
+func (psr ProtobufStructReflection) getFieldByName(n string)
ProtobufFieldReflection {
+ fd := psr.descriptor.Fields().ByTextName(xstrings.ToSnakeCase(n))
+ fv := psr.rValue
+ if fv.IsValid() {
+ if !fv.IsZero() {
+ for fv.Kind() == reflect.Ptr || fv.Kind() ==
reflect.Interface {
+ fv = fv.Elem()
+ }
+ if fd.ContainingOneof() != nil {
+ n = string(fd.ContainingOneof().Name())
+ }
+ fv = fv.FieldByName(xstrings.ToCamelCase(n))
+ for fv.Kind() == reflect.Ptr {
+ fv = fv.Elem()
+ }
+ }
+ }
+ return ProtobufFieldReflection{
+ fd,
+ psr.message.Get(fd),
+ fv,
+ psr.SchemaOptions,
+ }
+}
+
+type ProtobufFieldReflection struct {
+ descriptor protoreflect.FieldDescriptor
+ prValue protoreflect.Value
+ rValue reflect.Value
+ SchemaOptions
+}
+
+func (pfr ProtobufFieldReflection) isStruct() bool {
+ return pfr.descriptor.Kind() == protoreflect.MessageKind &&
!pfr.descriptor.IsMap() && pfr.rValue.Kind() != reflect.Slice
+}
+
+func (pfr ProtobufFieldReflection) isMap() bool {
+ return pfr.descriptor.Kind() == protoreflect.MessageKind &&
pfr.descriptor.IsMap()
+}
+
+func (pfr ProtobufFieldReflection) isList() bool {
+ return pfr.descriptor.IsList() && pfr.rValue.Kind() == reflect.Slice
+}
+
+func (pfr ProtobufFieldReflection) getListLength() int {
+ return pfr.prValue.List().Len()
+}
+
+func (pfr ProtobufFieldReflection) getMapLength() int {
+ return pfr.prValue.Map().Len()
+}
+
+func (plr ProtobufListReflection) generateListItems() chan
ProtobufFieldReflection {
+ out := make(chan ProtobufFieldReflection)
+
+ go func() {
+ defer close(out)
+ for i := 0; i < plr.prValue.List().Len(); i++ {
+ out <- ProtobufFieldReflection{
+ descriptor: plr.descriptor,
+ prValue: plr.prValue.List().Get(i),
+ rValue: plr.rValue.Index(i),
+ SchemaOptions: plr.SchemaOptions,
+ }
+ }
+ }()
+
+ return out
+}
+
+func (pfr ProtobufFieldReflection) getDataType() arrow.DataType {
+ var dt arrow.DataType
+
+ typeMap := map[protoreflect.Kind]arrow.DataType{
+ //Numeric
+ protoreflect.Int32Kind: arrow.PrimitiveTypes.Int32,
+ protoreflect.Int64Kind: arrow.PrimitiveTypes.Int64,
+ protoreflect.Sint32Kind: arrow.PrimitiveTypes.Int32,
+ protoreflect.Sint64Kind: arrow.PrimitiveTypes.Int64,
+ protoreflect.Uint32Kind: arrow.PrimitiveTypes.Uint32,
+ protoreflect.Uint64Kind: arrow.PrimitiveTypes.Uint64,
+ protoreflect.Fixed32Kind: arrow.PrimitiveTypes.Uint32,
+ protoreflect.Fixed64Kind: arrow.PrimitiveTypes.Uint64,
+ protoreflect.Sfixed32Kind: arrow.PrimitiveTypes.Int32,
+ protoreflect.Sfixed64Kind: arrow.PrimitiveTypes.Int64,
+ protoreflect.FloatKind: arrow.PrimitiveTypes.Float32,
+ protoreflect.DoubleKind: arrow.PrimitiveTypes.Float64,
+ //Binary
+ protoreflect.StringKind: arrow.BinaryTypes.String,
+ protoreflect.BytesKind: arrow.BinaryTypes.Binary,
+ //Fixed Width
+ protoreflect.BoolKind: arrow.FixedWidthTypes.Boolean,
+ // Enum
+ protoreflect.EnumKind: arrow.PrimitiveTypes.Int32,
Review Comment:
In order to get the enum string representation I think you need to do a
lookup, e.g.
```
AllTheTypes_ExampleEnum_name[AllTheTypes_OPTION_0]
```
I am not sure how to do that with reflection (or if it is possible).
If it is trivial for you, then please let me know how, otherwise I am happy
to only have access to the enum int value.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]