voonhous commented on code in PR #17581:
URL: https://github.com/apache/hudi/pull/17581#discussion_r2644017066
##########
hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHiveAvroSerializer.java:
##########
@@ -409,4 +408,200 @@ public void testGetJavaInvalidFieldAccess() {
serializer.getValueAsJava(record, "properties.value");
});
}
+
+ @Test
+ public void testSerializeDecimalBackedByBytes() {
+ // Create schema with BYTES-backed decimal (not FIXED)
+ String schemaWithBytesDecimal =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":10,\"scale\":2}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithBytesDecimal);
+
+ // Create an Avro record with BYTES-backed decimal
+ GenericData.Record avroRecord = new
GenericData.Record(schema.toAvroSchema());
+ avroRecord.put("id", 42);
+
+ HoodieSchema.Decimal decimalType = (HoodieSchema.Decimal)
schema.getField("amount").get().schema().getTypes().get(1);
+ BigDecimal bd = new BigDecimal("1234.56").setScale(decimalType.getScale());
+ ByteBuffer decimalBytes = HoodieAvroUtils.DECIMAL_CONVERSION.toBytes(bd,
decimalType.toAvroSchema(), decimalType.toAvroSchema().getLogicalType());
+ avroRecord.put("amount", decimalBytes);
+
+ assertTrue(GenericData.get().validate(schema.toAvroSchema(), avroRecord));
+
+ // Convert to ArrayWritable
+ ArrayWritable writable = (ArrayWritable)
HoodieRealtimeRecordReaderUtils.avroToArrayWritable(avroRecord,
schema.toAvroSchema(), true);
+
+ // Set up Hive types and serializer
+ List<TypeInfo> columnTypeList =
createHiveTypeInfoFrom("int,decimal(10,2)");
+ List<String> columnNameList = createHiveColumnsFrom("id,amount");
+ StructTypeInfo rowTypeInfo = (StructTypeInfo)
TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList);
+
+ // Serialize and verify
+ GenericRecord testRecord = new HiveAvroSerializer(new
ArrayWritableObjectInspector(rowTypeInfo), columnNameList,
columnTypeList).serialize(writable, schema);
+ assertTrue(GenericData.get().validate(schema.toAvroSchema(), testRecord));
+
+ // Verify the decimal value is correctly serialized
+ assertEquals(42, testRecord.get("id"));
+ ByteBuffer resultBytes = (ByteBuffer) testRecord.get("amount");
+ BigDecimal resultDecimal =
HoodieAvroUtils.DECIMAL_CONVERSION.fromBytes(resultBytes,
decimalType.toAvroSchema(), decimalType.toAvroSchema().getLogicalType());
+ assertEquals(bd, resultDecimal);
+ }
+
+ @Test
+ public void testSerializeDecimalBackedByFixed() {
+ // Create schema with FIXED-backed decimal (existing test covers this but
making it explicit)
+ String schemaWithFixedDecimal =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed_decimal\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":2}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithFixedDecimal);
+
+ // Create an Avro record with FIXED-backed decimal
+ GenericData.Record avroRecord = new
GenericData.Record(schema.toAvroSchema());
+ avroRecord.put("id", 42);
+
+ HoodieSchema.Decimal decimalType = (HoodieSchema.Decimal)
schema.getField("amount").get().schema().getTypes().get(1);
+ BigDecimal bd = new BigDecimal("1234.56").setScale(decimalType.getScale());
+ avroRecord.put("amount", HoodieAvroUtils.DECIMAL_CONVERSION.toFixed(bd,
decimalType.toAvroSchema(), decimalType.toAvroSchema().getLogicalType()));
+
+ assertTrue(GenericData.get().validate(schema.toAvroSchema(), avroRecord));
+
+ // Convert to ArrayWritable
+ ArrayWritable writable = (ArrayWritable)
HoodieRealtimeRecordReaderUtils.avroToArrayWritable(avroRecord,
schema.toAvroSchema(), true);
+
+ // Set up Hive types and serializer
+ List<TypeInfo> columnTypeList =
createHiveTypeInfoFrom("int,decimal(10,2)");
+ List<String> columnNameList = createHiveColumnsFrom("id,amount");
+ StructTypeInfo rowTypeInfo = (StructTypeInfo)
TypeInfoFactory.getStructTypeInfo(columnNameList, columnTypeList);
+
+ // Serialize and verify
+ GenericRecord testRecord = new HiveAvroSerializer(new
ArrayWritableObjectInspector(rowTypeInfo), columnNameList,
columnTypeList).serialize(writable, schema);
+ assertTrue(GenericData.get().validate(schema.toAvroSchema(), testRecord));
+
+ // Verify the decimal value is correctly serialized
+ assertEquals(42, testRecord.get("id"));
+ GenericData.Fixed resultFixed = (GenericData.Fixed)
testRecord.get("amount");
+ BigDecimal resultDecimal =
HoodieAvroUtils.DECIMAL_CONVERSION.fromFixed(resultFixed,
decimalType.toAvroSchema(), decimalType.toAvroSchema().getLogicalType());
+ assertEquals(bd, resultDecimal);
+ }
+
+ @Test
+ public void testGenerateColumnTypesForDecimalBackedByBytes() throws
Exception {
+ // Test HiveTypeUtils.generateColumnTypes and convertToTypeInfo branch at
lines 152-162 for decimal backed by bytes
+ String schemaWithDecimalBytes =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":10,\"scale\":2}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithDecimalBytes);
+
+ // Test that HiveTypeUtils.generateColumnTypes correctly identifies
bytes-backed decimal as decimal type
+ List<TypeInfo> columnTypes = HiveTypeUtils.generateColumnTypes(schema);
+ assertEquals(2, columnTypes.size());
+ assertEquals(TypeInfoFactory.intTypeInfo, columnTypes.get(0));
+ // The second column should be decimal(10,2) type due to the decimal
logical type backed by bytes
+ assertEquals(TypeInfoFactory.getDecimalTypeInfo(10, 2),
columnTypes.get(1));
+ }
+
+ @Test
+ public void testGenerateColumnTypesForDecimalBackedByFixed() throws
Exception {
+ // Test HiveTypeUtils.generateColumnTypes and convertToTypeInfo branch at
lines 152-162 for decimal backed by fixed
+ String schemaWithDecimalFixed =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"fixed\",\"name\":\"fixed_decimal\",\"size\":6,\"logicalType\":\"decimal\",\"precision\":12,\"scale\":4}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithDecimalFixed);
+ assertInstanceOf(HoodieSchema.Decimal.class,
schema.getField("amount").get().getNonNullSchema());
+
+ // Test that HiveTypeUtils.generateColumnTypes correctly identifies
fixed-backed decimal as decimal type
+ List<TypeInfo> columnTypes = HiveTypeUtils.generateColumnTypes(schema);
+ assertEquals(2, columnTypes.size());
+ assertEquals(TypeInfoFactory.intTypeInfo, columnTypes.get(0));
+ // The second column should be decimal(12,4) type due to the decimal
logical type backed by fixed
+ assertEquals(TypeInfoFactory.getDecimalTypeInfo(12, 4),
columnTypes.get(1));
+ }
+
+ @Test
+ public void testGenerateColumnTypesForDate() throws Exception {
+ // Test HiveTypeUtils.generateColumnTypes and convertToTypeInfo branch at
lines 187-189 for date
+ String schemaWithDate =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"birth_date\",\"type\":[\"null\",{\"type\":\"int\",\"logicalType\":\"date\"}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithDate);
+
+ // Test that HiveTypeUtils.generateColumnTypes correctly identifies date
as date type
+ List<TypeInfo> columnTypes = HiveTypeUtils.generateColumnTypes(schema);
+ assertEquals(2, columnTypes.size());
+ assertEquals(TypeInfoFactory.intTypeInfo, columnTypes.get(0));
+ // The second column should be date type due to the date logical type
+ assertEquals(TypeInfoFactory.dateTypeInfo, columnTypes.get(1));
+ }
+
+ @Test
+ public void testGenerateColumnTypesForTimestampMillis() throws Exception {
+ // Test HiveTypeUtils.generateColumnTypes and convertToTypeInfo branch at
lines 192-194 for timestamp-millis
+ String schemaWithTimestampMillis =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"created_at\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithTimestampMillis);
+
+ // Test that HiveTypeUtils.generateColumnTypes correctly identifies
timestamp-millis as timestamp type
+ List<TypeInfo> columnTypes = HiveTypeUtils.generateColumnTypes(schema);
+ assertEquals(2, columnTypes.size());
+ assertEquals(TypeInfoFactory.intTypeInfo, columnTypes.get(0));
+ // The second column should be timestamp type due to the timestamp-millis
logical type
+ assertEquals(TypeInfoFactory.timestampTypeInfo, columnTypes.get(1));
+ }
+
+ @Test
+ public void testGenerateColumnTypesForTimestampMicros() {
+ // Test timestamp-micros - AvroSerDe.TIMESTAMP_TYPE_NAME is only
"timestamp-millis", NOT "timestamp-micros"
+ String schemaWithTimestampMicros =
"{\"type\":\"record\",\"name\":\"test_record\",\"fields\":["
+ + "{\"name\":\"id\",\"type\":\"int\"},"
+ +
"{\"name\":\"updated_at\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-micros\"}],\"default\":null}"
+ + "]}";
+
+ HoodieSchema schema = HoodieSchema.parse(schemaWithTimestampMicros);
+
+ // HiveTypeUtils.generateColumnTypes throws an exception for
timestamp-micros since it's not supported by AvroSerDe
+ assertThrows(Exception.class, () -> {
Review Comment:
```
testGenerateColumnTypesForTimeMicros()
testGenerateColumnTypesForTimeMillis()
testGenerateColumnTypesForTimestampMicros()
```
These 3 tests are failing on master without the `HiveTypeUtils` migration.
Let me fix them.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]