Re: [PR] feat(schema): phase 5 - Perform Java Client Core Migration [hudi]

via GitHub Thu, 04 Dec 2025 19:18:36 -0800


the-other-tim-brown commented on code in PR #14340:
URL: https://github.com/apache/hudi/pull/14340#discussion_r2591251262



##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieReaderContext.java:
##########
@@ -181,14 +200,14 @@ public Option<HoodieRecordMerger> 
getRecordMerger(RecordMergeMode mergeMode, Str
 
   @Override
   public ClosableIterator<ArrayWritable> 
mergeBootstrapReaders(ClosableIterator<ArrayWritable> skeletonFileIterator,
-                                                               Schema 
skeletonRequiredSchema,
+                                                               HoodieSchema 
skeletonRequiredSchema,
                                                                
ClosableIterator<ArrayWritable> dataFileIterator,
-                                                               Schema 
dataRequiredSchema,
+                                                               HoodieSchema 
dataRequiredSchema,
                                                                
List<Pair<String, Object>> partitionFieldsAndValues) {
     int skeletonLen = skeletonRequiredSchema.getFields().size();
     int dataLen = dataRequiredSchema.getFields().size();
     int[] partitionFieldPositions = partitionFieldsAndValues.stream()
-        .map(pair -> 
dataRequiredSchema.getField(pair.getKey()).pos()).mapToInt(Integer::intValue).toArray();
+        .map(pair -> 
dataRequiredSchema.getField(pair.getKey()).get().pos()).mapToInt(Integer::intValue).toArray();

Review Comment:
   Instead of `.get` we should do an `orElseThrow` to throw a more meaningful 
exception if the value is not present in the option.



##########
hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieAvroIndexedRecord.java:
##########
@@ -60,24 +65,26 @@ void testIsBuiltInDelete() {
 
   @Test
   void testIsDeleteWithCustomField() {
-    Schema schema = SchemaBuilder.record("TestRecord")
-        .fields()
-        .optionalString("custom_is_deleted")
-        .requiredString("field2")
-        .endRecord();
-    GenericRecord record1 = new GenericRecordBuilder(schema)
+    HoodieSchema hoodieSchema = HoodieSchema.createRecord("TestRecord", null, 
null,
+        Arrays.asList(
+            HoodieSchemaField.of("custom_is_deleted",
+                
HoodieSchema.createUnion(HoodieSchema.create(HoodieSchemaType.NULL), 
HoodieSchema.create(HoodieSchemaType.STRING)),

Review Comment:
   Here we can also use the `HoodieSchema.createNullable` to simplify the test 
setup



##########
hudi-common/src/test/java/org/apache/hudi/common/table/read/TestUpdateProcessor.java:
##########
@@ -55,11 +57,11 @@ class TestUpdateProcessor {
   private final HoodieReaderContext<IndexedRecord> readerContext = 
mock(HoodieReaderContext.class, RETURNS_DEEP_STUBS);
   private final RecordContext<IndexedRecord> recordContext = 
mock(RecordContext.class);
   private static final String KEY = "key";
-  private static final Schema SCHEMA = SchemaBuilder.record("TestRecord")
-      .fields()
-      .name("key").type().stringType().noDefault()
-      .name("value").type().stringType().noDefault()
-      .endRecord();
+  private static final HoodieSchema SCHEMA = 
HoodieSchema.createRecord("TestRecord", null, null,
+      java.util.Arrays.asList(

Review Comment:
   nitpick: Import `Arrays`



##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBufferedRecordMerger.java:
##########
@@ -800,54 +804,64 @@ private BufferedRecordMerger<InternalRow> 
createPartialMerger(RecordMergeMode me
     );
   }
 
-  private static Schema getSchema1() {
-    Schema fullSchema = Schema.createRecord("TestRecord", null, null, false);
-    List<Schema.Field> fields = Arrays.asList(
-        new Schema.Field("id", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), 
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
-        new Schema.Field("name", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), 
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
-        new Schema.Field("age", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.INT), 
Schema.create(Schema.Type.NULL))), null, 0),
-        new Schema.Field("city", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), 
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
-        new Schema.Field("timestamp", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG), 
Schema.create(Schema.Type.NULL))), null, 0L)
+  private static HoodieSchema getSchema1() {
+    HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+    HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+    HoodieSchema intSchema = HoodieSchema.create(HoodieSchemaType.INT);
+    HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+    List<HoodieSchemaField> fields = Arrays.asList(
+        HoodieSchemaField.of("id", HoodieSchema.createUnion(nullSchema, 
stringSchema), null, HoodieSchema.NULL_VALUE),
+        HoodieSchemaField.of("name", HoodieSchema.createUnion(nullSchema, 
stringSchema), null, HoodieSchema.NULL_VALUE),
+        HoodieSchemaField.of("age", HoodieSchema.createUnion(intSchema, 
nullSchema), null, 0),
+        HoodieSchemaField.of("city", HoodieSchema.createUnion(nullSchema, 
stringSchema), null, HoodieSchema.NULL_VALUE),
+        HoodieSchemaField.of("timestamp", HoodieSchema.createUnion(longSchema, 
nullSchema), null, 0L)
     );
-    fullSchema.setFields(fields);
-    return fullSchema;
+    return HoodieSchema.createRecord("TestRecord", null, null, fields);
   }
 
-  private static Schema getSchema2() {
+  private static HoodieSchema getSchema2() {
     // Create a partial schema with only some fields
-    Schema partialSchema = Schema.createRecord("PartialRecord", null, null, 
false);
-    partialSchema.setFields(Arrays.asList(
-        new Schema.Field("precombine", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG), 
Schema.create(Schema.Type.NULL))), null, 0),
-        new Schema.Field("id", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), 
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
-        new Schema.Field("name", 
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), 
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE)
-    ));
-    return partialSchema;
+    HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+    HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+    HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+    List<HoodieSchemaField> fields = Arrays.asList(
+        HoodieSchemaField.of("precombine", 
HoodieSchema.createUnion(longSchema, nullSchema), null, 0),

Review Comment:
   Instead of the `createUnion` we can use the `HoodieSchema.createNullable`



##########
hudi-common/src/main/java/org/apache/hudi/common/table/read/BufferedRecord.java:
##########
@@ -90,7 +91,11 @@ public HoodieOperation getHoodieOperation() {
 
   public BufferedRecord<T> toBinary(RecordContext<T> recordContext) {
     if (record != null) {
-      record = 
recordContext.seal(recordContext.toBinaryRow(recordContext.getSchemaFromBufferRecord(this),
 record));
+      HoodieSchema schema = recordContext.getSchemaFromBufferRecord(this);
+      // Schema can be null in test scenarios where schemas are not registered 
in the RecordContext (e.g. in tests)
+      if (schema != null) {
+        record = recordContext.seal(recordContext.toBinaryRow(schema, record));

Review Comment:
   Do we still want to `seal` in these cases?



##########
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/format/TestFlinkRowDataReaderContext.java:
##########
@@ -48,11 +49,12 @@
 class TestFlinkRowDataReaderContext {
   private final StorageConfiguration<?> storageConfig = 
mock(StorageConfiguration.class);
   private final HoodieTableConfig tableConfig = mock(HoodieTableConfig.class);
-  private static final Schema AVRO_SCHEMA = 
SchemaBuilder.record("TestRecord").fields()
-      .requiredInt("id")
-      .requiredString("name")
-      .requiredBoolean("active")
-      .endRecord();
+  private static final HoodieSchema SCHEMA = 
HoodieSchema.createRecord("TestRecord", null, null,
+      java.util.Arrays.asList(

Review Comment:
   nitpick: let's import `Arrays`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(schema): phase 5 - Perform Java Client Core Migration [hudi]

Reply via email to