twalthr commented on a change in pull request #10503: [FLINK-15137][avro] 
Improve schema derivation for Avro format
URL: https://github.com/apache/flink/pull/10503#discussion_r359216053
 
 

 ##########
 File path: 
flink-formats/flink-avro/src/main/java/org/apache/flink/formats/avro/typeutils/AvroSchemaConverter.java
 ##########
 @@ -157,4 +209,162 @@ private AvroSchemaConverter() {
                }
                throw new IllegalArgumentException("Unsupported Avro type '" + 
schema.getType() + "'.");
        }
+
+       private static LogicalType convertToLogicalType(Schema schema) {
+               return convertToDataType(schema).getLogicalType();
+       }
+
+       private static DataType convertToDataType(Schema schema) {
+               switch (schema.getType()) {
+                       case RECORD:
+                               final List<Schema.Field> fields = 
schema.getFields();
+                               final DataTypes.Field[] dataTypeFields = new 
DataTypes.Field[fields.size()];
+                               for (int i = 0; i < fields.size(); i++) {
+                                       final Schema.Field field = 
fields.get(i);
+                                       dataTypeFields[i] = DataTypes.FIELD(
+                                               field.name(),
+                                               
convertToDataType(field.schema()));
+                               }
+                               return DataTypes.ROW(dataTypeFields);
+                       case ENUM:
+                       case STRING:
+                               // convert Avro's Utf8/CharSequence to String
+                               return DataTypes.STRING();
+                       case ARRAY:
+                               // result type might either be 
ObjectArrayTypeInfo or BasicArrayTypeInfo for Strings
+                               return 
DataTypes.ARRAY(convertToDataType(schema.getElementType()));
+                       case MAP:
+                               return DataTypes.MAP(DataTypes.STRING(), 
convertToDataType(schema.getValueType()));
+                       case UNION:
+                               final Schema actualSchema;
+                               if (schema.getTypes().size() == 2 && 
schema.getTypes().get(0).getType() == Schema.Type.NULL) {
+                                       actualSchema = schema.getTypes().get(1);
+                               } else if (schema.getTypes().size() == 2 && 
schema.getTypes().get(1).getType() == Schema.Type.NULL) {
+                                       actualSchema = schema.getTypes().get(0);
+                               } else if (schema.getTypes().size() == 1) {
+                                       actualSchema = schema.getTypes().get(0);
+                               } else {
+                                       // use Kryo for serialization
+                                       return 
DataTypes.RAW(Types.GENERIC(Object.class));
+                               }
+                               return convertToDataType(actualSchema);
+                       case FIXED:
+                       case BYTES:
+                               // logical decimal type
+                               if (schema.getLogicalType() instanceof 
LogicalTypes.Decimal) {
+                                       LogicalTypes.Decimal decimalType = 
(LogicalTypes.Decimal) schema.getLogicalType();
+                                       return 
DataTypes.DECIMAL(decimalType.getPrecision(), decimalType.getScale());
+                               }
+                               // convert fixed size binary data to primitive 
byte arrays
+                               return DataTypes.BYTES();
+                       case INT:
+                               // logical date and time type
+                               final org.apache.avro.LogicalType logicalType = 
schema.getLogicalType();
+                               if (logicalType == LogicalTypes.date()) {
+                                       return DataTypes.DATE();
+                               } else if (logicalType == 
LogicalTypes.timeMillis()) {
+                                       return DataTypes.TIME(3);
+                               }
+                               return DataTypes.INT();
+                       case LONG:
+                               // logical timestamp type
+                               if (schema.getLogicalType() == 
LogicalTypes.timestampMillis()) {
+                                       return DataTypes.TIMESTAMP(3);
+                               }
+                               return DataTypes.BIGINT();
+                       case FLOAT:
+                               return DataTypes.FLOAT();
+                       case DOUBLE:
+                               return DataTypes.DOUBLE();
+                       case BOOLEAN:
+                               return DataTypes.BOOLEAN();
+                       case NULL:
+                               return DataTypes.NULL();
+               }
+               throw new IllegalArgumentException("Unsupported Avro type '" + 
schema.getType() + "'.");
+       }
+
+       private static Schema convertToSchema(LogicalType logicalType, int 
rowTypeCounter) {
+               switch (logicalType.getTypeRoot()) {
+                       case NULL:
+                               return SchemaBuilder.builder().nullType();
+                       case BOOLEAN:
+                               return 
SchemaBuilder.builder().nullable().booleanType();
+                       case INTEGER:
+                               return 
SchemaBuilder.builder().nullable().intType();
+                       case BIGINT:
+                               return 
SchemaBuilder.builder().nullable().longType();
+                       case FLOAT:
+                               return 
SchemaBuilder.builder().nullable().floatType();
+                       case DOUBLE:
+                               return 
SchemaBuilder.builder().nullable().doubleType();
+                       case CHAR:
+                       case VARCHAR:
+                               return 
SchemaBuilder.builder().nullable().stringType();
+                       case BINARY:
+                       case VARBINARY:
+                               return 
SchemaBuilder.builder().nullable().bytesType();
+                       case TIMESTAMP_WITHOUT_TIME_ZONE:
+                               // use long to represents Timestamp
+                               return 
LogicalTypes.timestampMillis().addToSchema(SchemaBuilder.builder().longType());
+                       case DATE:
+                               // use int to represents Date
+                               return 
LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType());
+                       case TIME_WITHOUT_TIME_ZONE:
+                               // use int to represents Time, we only support 
millisecond when deserialization
+                               return 
LogicalTypes.timeMillis().addToSchema(SchemaBuilder.builder().intType());
 
 Review comment:
   Can't we support more than millis now?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to