voonhous commented on code in PR #14340:
URL: https://github.com/apache/hudi/pull/14340#discussion_r2593121120
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBufferedRecordMerger.java:
##########
@@ -800,54 +804,64 @@ private BufferedRecordMerger<InternalRow>
createPartialMerger(RecordMergeMode me
);
}
- private static Schema getSchema1() {
- Schema fullSchema = Schema.createRecord("TestRecord", null, null, false);
- List<Schema.Field> fields = Arrays.asList(
- new Schema.Field("id",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("name",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("age",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.INT),
Schema.create(Schema.Type.NULL))), null, 0),
- new Schema.Field("city",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("timestamp",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG),
Schema.create(Schema.Type.NULL))), null, 0L)
+ private static HoodieSchema getSchema1() {
+ HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+ HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+ HoodieSchema intSchema = HoodieSchema.create(HoodieSchemaType.INT);
+ HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+ List<HoodieSchemaField> fields = Arrays.asList(
+ HoodieSchemaField.of("id", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("name", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("age", HoodieSchema.createUnion(intSchema,
nullSchema), null, 0),
+ HoodieSchemaField.of("city", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("timestamp", HoodieSchema.createUnion(longSchema,
nullSchema), null, 0L)
);
- fullSchema.setFields(fields);
- return fullSchema;
+ return HoodieSchema.createRecord("TestRecord", null, null, fields);
}
- private static Schema getSchema2() {
+ private static HoodieSchema getSchema2() {
// Create a partial schema with only some fields
- Schema partialSchema = Schema.createRecord("PartialRecord", null, null,
false);
- partialSchema.setFields(Arrays.asList(
- new Schema.Field("precombine",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG),
Schema.create(Schema.Type.NULL))), null, 0),
- new Schema.Field("id",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("name",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE)
- ));
- return partialSchema;
+ HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+ HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+ HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+ List<HoodieSchemaField> fields = Arrays.asList(
+ HoodieSchemaField.of("precombine",
HoodieSchema.createUnion(longSchema, nullSchema), null, 0),
Review Comment:
Just want to call this out: Avro has a strict validation rule, where the
default value for a union field must match the type of the FIRST element in the
union type array.
- ["null", "int"] with default null **(Valid)**
- ["null", "int"] with default 0 **(Invalid)**
- ["int", "null"] with default 0 **(Valid)**
- ["int", "null"] with default null **(Invalid)**
If we use `HoodieSchema.createNullable` for int fields with default 0, the
following error will be thrown for the affected tests:
```
Caused by: org.apache.avro.AvroTypeException: Invalid default for field age:
0 not a ["null","int"]
at org.apache.avro.Schema.validateDefault(Schema.java:1635)
at org.apache.avro.Schema.access$500(Schema.java:94)
at org.apache.avro.Schema$Field.<init>(Schema.java:561)
at org.apache.avro.Schema$Field.<init>(Schema.java:607)
at
org.apache.hudi.avro.HoodieAvroUtils.createNewSchemaField(HoodieAvroUtils.java:381)
at
org.apache.hudi.common.schema.HoodieSchemaField.of(HoodieSchemaField.java:113)
at
org.apache.hudi.common.schema.HoodieSchemaField.of(HoodieSchemaField.java:92)
at
org.apache.hudi.functional.TestBufferedRecordMerger.getSchema1(TestBufferedRecordMerger.java:815)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]