the-other-tim-brown commented on code in PR #14340:
URL: https://github.com/apache/hudi/pull/14340#discussion_r2591251262
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieReaderContext.java:
##########
@@ -181,14 +200,14 @@ public Option<HoodieRecordMerger>
getRecordMerger(RecordMergeMode mergeMode, Str
@Override
public ClosableIterator<ArrayWritable>
mergeBootstrapReaders(ClosableIterator<ArrayWritable> skeletonFileIterator,
- Schema
skeletonRequiredSchema,
+ HoodieSchema
skeletonRequiredSchema,
ClosableIterator<ArrayWritable> dataFileIterator,
- Schema
dataRequiredSchema,
+ HoodieSchema
dataRequiredSchema,
List<Pair<String, Object>> partitionFieldsAndValues) {
int skeletonLen = skeletonRequiredSchema.getFields().size();
int dataLen = dataRequiredSchema.getFields().size();
int[] partitionFieldPositions = partitionFieldsAndValues.stream()
- .map(pair ->
dataRequiredSchema.getField(pair.getKey()).pos()).mapToInt(Integer::intValue).toArray();
+ .map(pair ->
dataRequiredSchema.getField(pair.getKey()).get().pos()).mapToInt(Integer::intValue).toArray();
Review Comment:
Instead of `.get` we should do an `orElseThrow` to throw a more meaningful
exception if the value is not present in the option.
##########
hudi-hadoop-common/src/test/java/org/apache/hudi/common/model/TestHoodieAvroIndexedRecord.java:
##########
@@ -60,24 +65,26 @@ void testIsBuiltInDelete() {
@Test
void testIsDeleteWithCustomField() {
- Schema schema = SchemaBuilder.record("TestRecord")
- .fields()
- .optionalString("custom_is_deleted")
- .requiredString("field2")
- .endRecord();
- GenericRecord record1 = new GenericRecordBuilder(schema)
+ HoodieSchema hoodieSchema = HoodieSchema.createRecord("TestRecord", null,
null,
+ Arrays.asList(
+ HoodieSchemaField.of("custom_is_deleted",
+
HoodieSchema.createUnion(HoodieSchema.create(HoodieSchemaType.NULL),
HoodieSchema.create(HoodieSchemaType.STRING)),
Review Comment:
Here we can also use the `HoodieSchema.createNullable` to simplify the test
setup
##########
hudi-common/src/test/java/org/apache/hudi/common/table/read/TestUpdateProcessor.java:
##########
@@ -55,11 +57,11 @@ class TestUpdateProcessor {
private final HoodieReaderContext<IndexedRecord> readerContext =
mock(HoodieReaderContext.class, RETURNS_DEEP_STUBS);
private final RecordContext<IndexedRecord> recordContext =
mock(RecordContext.class);
private static final String KEY = "key";
- private static final Schema SCHEMA = SchemaBuilder.record("TestRecord")
- .fields()
- .name("key").type().stringType().noDefault()
- .name("value").type().stringType().noDefault()
- .endRecord();
+ private static final HoodieSchema SCHEMA =
HoodieSchema.createRecord("TestRecord", null, null,
+ java.util.Arrays.asList(
Review Comment:
nitpick: Import `Arrays`
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBufferedRecordMerger.java:
##########
@@ -800,54 +804,64 @@ private BufferedRecordMerger<InternalRow>
createPartialMerger(RecordMergeMode me
);
}
- private static Schema getSchema1() {
- Schema fullSchema = Schema.createRecord("TestRecord", null, null, false);
- List<Schema.Field> fields = Arrays.asList(
- new Schema.Field("id",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("name",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("age",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.INT),
Schema.create(Schema.Type.NULL))), null, 0),
- new Schema.Field("city",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("timestamp",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG),
Schema.create(Schema.Type.NULL))), null, 0L)
+ private static HoodieSchema getSchema1() {
+ HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+ HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+ HoodieSchema intSchema = HoodieSchema.create(HoodieSchemaType.INT);
+ HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+ List<HoodieSchemaField> fields = Arrays.asList(
+ HoodieSchemaField.of("id", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("name", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("age", HoodieSchema.createUnion(intSchema,
nullSchema), null, 0),
+ HoodieSchemaField.of("city", HoodieSchema.createUnion(nullSchema,
stringSchema), null, HoodieSchema.NULL_VALUE),
+ HoodieSchemaField.of("timestamp", HoodieSchema.createUnion(longSchema,
nullSchema), null, 0L)
);
- fullSchema.setFields(fields);
- return fullSchema;
+ return HoodieSchema.createRecord("TestRecord", null, null, fields);
}
- private static Schema getSchema2() {
+ private static HoodieSchema getSchema2() {
// Create a partial schema with only some fields
- Schema partialSchema = Schema.createRecord("PartialRecord", null, null,
false);
- partialSchema.setFields(Arrays.asList(
- new Schema.Field("precombine",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.LONG),
Schema.create(Schema.Type.NULL))), null, 0),
- new Schema.Field("id",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE),
- new Schema.Field("name",
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING))), null, Schema.NULL_VALUE)
- ));
- return partialSchema;
+ HoodieSchema nullSchema = HoodieSchema.create(HoodieSchemaType.NULL);
+ HoodieSchema stringSchema = HoodieSchema.create(HoodieSchemaType.STRING);
+ HoodieSchema longSchema = HoodieSchema.create(HoodieSchemaType.LONG);
+
+ List<HoodieSchemaField> fields = Arrays.asList(
+ HoodieSchemaField.of("precombine",
HoodieSchema.createUnion(longSchema, nullSchema), null, 0),
Review Comment:
Instead of the `createUnion` we can use the `HoodieSchema.createNullable`
##########
hudi-common/src/main/java/org/apache/hudi/common/table/read/BufferedRecord.java:
##########
@@ -90,7 +91,11 @@ public HoodieOperation getHoodieOperation() {
public BufferedRecord<T> toBinary(RecordContext<T> recordContext) {
if (record != null) {
- record =
recordContext.seal(recordContext.toBinaryRow(recordContext.getSchemaFromBufferRecord(this),
record));
+ HoodieSchema schema = recordContext.getSchemaFromBufferRecord(this);
+ // Schema can be null in test scenarios where schemas are not registered
in the RecordContext (e.g. in tests)
+ if (schema != null) {
+ record = recordContext.seal(recordContext.toBinaryRow(schema, record));
Review Comment:
Do we still want to `seal` in these cases?
##########
hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/format/TestFlinkRowDataReaderContext.java:
##########
@@ -48,11 +49,12 @@
class TestFlinkRowDataReaderContext {
private final StorageConfiguration<?> storageConfig =
mock(StorageConfiguration.class);
private final HoodieTableConfig tableConfig = mock(HoodieTableConfig.class);
- private static final Schema AVRO_SCHEMA =
SchemaBuilder.record("TestRecord").fields()
- .requiredInt("id")
- .requiredString("name")
- .requiredBoolean("active")
- .endRecord();
+ private static final HoodieSchema SCHEMA =
HoodieSchema.createRecord("TestRecord", null, null,
+ java.util.Arrays.asList(
Review Comment:
nitpick: let's import `Arrays`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]