the-other-tim-brown commented on code in PR #17581:
URL: https://github.com/apache/hudi/pull/17581#discussion_r2623500053
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java:
##########
@@ -339,48 +345,48 @@ protected GenericEnumSymbol makeInstance(Object seed,
Set<Object> seenSchemas) {
}
};
- private Object serializeEnum(PrimitiveObjectInspector fieldOI, Object
structFieldData, Schema schema) throws HoodieException {
+ private Object serializeEnum(PrimitiveObjectInspector fieldOI, Object
structFieldData, HoodieSchema schema) throws HoodieException {
try {
- return enums.retrieve(schema).retrieve(serializePrimitive(fieldOI,
structFieldData, schema));
+ return
enums.retrieve(schema.toAvroSchema()).retrieve(serializePrimitive(fieldOI,
structFieldData, schema));
} catch (Exception e) {
throw new HoodieException(e);
}
}
- private Object serializeStruct(StructTypeInfo typeInfo,
StructObjectInspector ssoi, Object o, Schema schema) {
+ private Object serializeStruct(StructTypeInfo typeInfo,
StructObjectInspector ssoi, Object o, HoodieSchema schema) {
int size = schema.getFields().size();
List<? extends StructField> allStructFieldRefs =
ssoi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = ssoi.getStructFieldsDataAsList(o);
- GenericData.Record record = new GenericData.Record(schema);
+ GenericData.Record record = new GenericData.Record(schema.toAvroSchema());
ArrayList<TypeInfo> allStructFieldTypeInfos =
typeInfo.getAllStructFieldTypeInfos();
for (int i = 0; i < size; i++) {
- Schema.Field field = schema.getFields().get(i);
+ HoodieSchemaField field = schema.getFields().get(i);
setUpRecordFieldFromWritable(allStructFieldTypeInfos.get(i),
structFieldsDataAsList.get(i),
allStructFieldRefs.get(i).getFieldObjectInspector(), record, field);
}
return record;
}
- private Object serializePrimitive(PrimitiveObjectInspector fieldOI, Object
structFieldData, Schema schema) throws HoodieException {
+ private Object serializePrimitive(PrimitiveObjectInspector fieldOI, Object
structFieldData, HoodieSchema schema) throws HoodieException {
switch (fieldOI.getPrimitiveCategory()) {
case BINARY:
- if (schema.getType() == Schema.Type.BYTES) {
+ if (schema.getType() == HoodieSchemaType.BYTES) {
return AvroSerdeUtils.getBufferFromBytes((byte[])
fieldOI.getPrimitiveJavaObject(structFieldData));
- } else if (schema.getType() == Schema.Type.FIXED) {
- GenericData.Fixed fixed = new GenericData.Fixed(schema, (byte[])
fieldOI.getPrimitiveJavaObject(structFieldData));
+ } else if (schema.getType() == HoodieSchemaType.FIXED) {
+ GenericData.Fixed fixed = new
GenericData.Fixed(schema.toAvroSchema(), (byte[])
fieldOI.getPrimitiveJavaObject(structFieldData));
return fixed;
} else {
throw new HoodieException("Unexpected Avro schema for Binary
TypeInfo: " + schema.getType());
}
case DECIMAL:
HiveDecimal dec = (HiveDecimal)
fieldOI.getPrimitiveJavaObject(structFieldData);
- LogicalTypes.Decimal decimal = (LogicalTypes.Decimal)
schema.getLogicalType();
+ HoodieSchema.Decimal decimal = (HoodieSchema.Decimal) schema;
BigDecimal bd = new
BigDecimal(dec.toString()).setScale(decimal.getScale());
- if (schema.getType() == Schema.Type.BYTES) {
- return HoodieAvroUtils.DECIMAL_CONVERSION.toBytes(bd, schema,
decimal);
+ if (schema.getType() == HoodieSchemaType.BYTES) {
Review Comment:
Should we also handle the case where the schema is a decimal?
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java:
##########
@@ -396,7 +402,7 @@ private Object serializePrimitive(PrimitiveObjectInspector
fieldOI, Object struc
case TIMESTAMP:
return HoodieHiveUtils.getMills(structFieldData);
case INT:
- if (schema.getLogicalType() != null &&
schema.getLogicalType().getName().equals("date")) {
+ if (schema.getType() != null && schema.getType() ==
HoodieSchemaType.DATE) {
Review Comment:
We can simplify this, the type cannot be null right?
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java:
##########
@@ -493,12 +499,16 @@ private static void
copyOldValueOrSetDefault(GenericRecord oldRecord, GenericRec
Object newFieldValue;
if (fieldValue instanceof GenericRecord) {
GenericRecord record = (GenericRecord) fieldValue;
- newFieldValue = rewriteRecordIgnoreResultCheck(record,
AvroSchemaUtils.resolveUnionSchema(field.schema(),
record.getSchema().getFullName()));
+ HoodieSchema nonNullFieldSchema = field.schema().getNonNullType();
+ if (!Objects.equals(nonNullFieldSchema.getFullName(),
record.getSchema().getFullName())) {
Review Comment:
Can you add some details on the logic change here?
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveAvroSerializer.java:
##########
@@ -419,20 +425,20 @@ private Object serializeUnion(UnionTypeInfo typeInfo,
UnionObjectInspector field
schema.getTypes().get(tag));
}
- private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector
fieldOI, Object structFieldData, Schema schema) throws HoodieException {
+ private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector
fieldOI, Object structFieldData, HoodieSchema schema) throws HoodieException {
List<?> list = fieldOI.getList(structFieldData);
- List<Object> deserialized = new GenericData.Array<Object>(list.size(),
schema);
+ List<Object> deserialized = new GenericData.Array<>(list.size(),
schema.toAvroSchema());
TypeInfo listElementTypeInfo = typeInfo.getListElementTypeInfo();
ObjectInspector listElementObjectInspector =
fieldOI.getListElementObjectInspector();
// NOTE: We have to resolve nullable schema, since Avro permits array
elements
// to be null
- Schema arrayNestedType =
AvroSchemaUtils.getNonNullTypeFromUnion(schema.getElementType());
- Schema elementType;
+ HoodieSchema arrayNestedType = schema.getElementType().getNonNullType();
+ HoodieSchema elementType;
if (listElementObjectInspector.getCategory() ==
ObjectInspector.Category.PRIMITIVE) {
elementType = arrayNestedType;
} else {
- elementType = arrayNestedType.getField("element") == null ?
arrayNestedType : arrayNestedType.getField("element").schema();
+ elementType = arrayNestedType.getField("element").isEmpty() ?
arrayNestedType : arrayNestedType.getField("element").get().schema();
Review Comment:
```suggestion
elementType =
arrayNestedType.getField("element").map(HoodieSchemaField::schema).orElse(arrayNestedType)
```
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HiveTypeUtils.java:
##########
@@ -259,22 +260,22 @@ private static TypeInfo generateTypeInfoWorker(Schema
schema,
}
}
- private static TypeInfo generateRecordTypeInfo(Schema schema,
- Set<Schema> seenSchemas)
throws AvroSerdeException {
- assert schema.getType().equals(Schema.Type.RECORD);
+ private static TypeInfo generateRecordTypeInfo(HoodieSchema schema,
+ Set<HoodieSchema>
seenSchemas) throws AvroSerdeException {
+ ValidationUtils.checkArgument(schema.getType() == RECORD, schema + " is
not a RECORD");
Review Comment:
Use the supplier for the message to avoid eagerly generating the error
message for all usages in this class
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]