twalthr commented on a change in pull request #10503: [FLINK-15137][avro] 
Improve schema derivation for Avro format
URL: https://github.com/apache/flink/pull/10503#discussion_r359216153
 
 

 ##########
 File path: 
flink-formats/flink-avro/src/main/java/org/apache/flink/formats/avro/typeutils/AvroSchemaConverter.java
 ##########
 @@ -157,4 +209,162 @@ private AvroSchemaConverter() {
                }
                throw new IllegalArgumentException("Unsupported Avro type '" + 
schema.getType() + "'.");
        }
+
+       private static LogicalType convertToLogicalType(Schema schema) {
+               return convertToDataType(schema).getLogicalType();
+       }
+
+       private static DataType convertToDataType(Schema schema) {
+               switch (schema.getType()) {
+                       case RECORD:
+                               final List<Schema.Field> fields = 
schema.getFields();
+                               final DataTypes.Field[] dataTypeFields = new 
DataTypes.Field[fields.size()];
+                               for (int i = 0; i < fields.size(); i++) {
+                                       final Schema.Field field = 
fields.get(i);
+                                       dataTypeFields[i] = DataTypes.FIELD(
+                                               field.name(),
+                                               
convertToDataType(field.schema()));
+                               }
+                               return DataTypes.ROW(dataTypeFields);
+                       case ENUM:
+                       case STRING:
+                               // convert Avro's Utf8/CharSequence to String
+                               return DataTypes.STRING();
+                       case ARRAY:
+                               // result type might either be 
ObjectArrayTypeInfo or BasicArrayTypeInfo for Strings
+                               return 
DataTypes.ARRAY(convertToDataType(schema.getElementType()));
+                       case MAP:
+                               return DataTypes.MAP(DataTypes.STRING(), 
convertToDataType(schema.getValueType()));
+                       case UNION:
+                               final Schema actualSchema;
+                               if (schema.getTypes().size() == 2 && 
schema.getTypes().get(0).getType() == Schema.Type.NULL) {
+                                       actualSchema = schema.getTypes().get(1);
+                               } else if (schema.getTypes().size() == 2 && 
schema.getTypes().get(1).getType() == Schema.Type.NULL) {
+                                       actualSchema = schema.getTypes().get(0);
+                               } else if (schema.getTypes().size() == 1) {
+                                       actualSchema = schema.getTypes().get(0);
+                               } else {
+                                       // use Kryo for serialization
+                                       return 
DataTypes.RAW(Types.GENERIC(Object.class));
+                               }
+                               return convertToDataType(actualSchema);
+                       case FIXED:
+                       case BYTES:
+                               // logical decimal type
+                               if (schema.getLogicalType() instanceof 
LogicalTypes.Decimal) {
+                                       LogicalTypes.Decimal decimalType = 
(LogicalTypes.Decimal) schema.getLogicalType();
+                                       return 
DataTypes.DECIMAL(decimalType.getPrecision(), decimalType.getScale());
+                               }
+                               // convert fixed size binary data to primitive 
byte arrays
+                               return DataTypes.BYTES();
+                       case INT:
+                               // logical date and time type
+                               final org.apache.avro.LogicalType logicalType = 
schema.getLogicalType();
+                               if (logicalType == LogicalTypes.date()) {
+                                       return DataTypes.DATE();
+                               } else if (logicalType == 
LogicalTypes.timeMillis()) {
+                                       return DataTypes.TIME(3);
+                               }
+                               return DataTypes.INT();
+                       case LONG:
+                               // logical timestamp type
+                               if (schema.getLogicalType() == 
LogicalTypes.timestampMillis()) {
 
 Review comment:
   Add also micros?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to