This is an automated email from the ASF dual-hosted git repository. vhs pushed a commit to branch phase-18-HoodieAvroUtils-removal-p2 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit 2a50c5ab0a903a16aeac7e36874511985dc255a4 Author: voon <[email protected]> AuthorDate: Sun Dec 14 21:10:37 2025 +0800 Additional compilation error fixes - AvroOrcUtils - HoodieBootstrapSchemaProvider - HoodieSparkBootstrapSchemaProvider - TestOrcBootstrap --- .../bootstrap/HoodieBootstrapSchemaProvider.java | 6 +- .../SchemaConflictResolutionStrategy.java | 2 - .../org/apache/hudi/index/HoodieIndexUtils.java | 3 - ...TestSimpleSchemaConflictResolutionStrategy.java | 1 - .../apache/hudi/index/TestHoodieIndexUtils.java | 4 +- .../HoodieSparkBootstrapSchemaProvider.java | 14 ++--- .../org/apache/hudi/common/util/AvroOrcUtils.java | 67 ++++++++++------------ .../org/apache/hudi/functional/TestBootstrap.java | 1 - .../apache/hudi/functional/TestOrcBootstrap.java | 27 ++++----- 9 files changed, 55 insertions(+), 70 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java index 99b2dbad1be7..a459e6b9f67f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/bootstrap/HoodieBootstrapSchemaProvider.java @@ -25,8 +25,6 @@ import org.apache.hudi.common.schema.HoodieSchemaType; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; -import org.apache.avro.Schema; - import java.util.List; /** @@ -46,7 +44,7 @@ public abstract class HoodieBootstrapSchemaProvider { * @param partitions List of partitions with files within them * @return Avro Schema */ - public final Schema getBootstrapSchema(HoodieEngineContext context, List<Pair<String, List<HoodieFileStatus>>> partitions) { + public final HoodieSchema getBootstrapSchema(HoodieEngineContext context, List<Pair<String, List<HoodieFileStatus>>> partitions) { if (writeConfig.getSchema() != null) { // Use schema specified by user if set HoodieSchema userSchema = HoodieSchema.parse(writeConfig.getSchema()); @@ -64,7 +62,7 @@ public abstract class HoodieBootstrapSchemaProvider { * @param partitions List of partitions with files within them * @return Avro Schema */ - protected abstract Schema getBootstrapSourceSchema(HoodieEngineContext context, + protected abstract HoodieSchema getBootstrapSourceSchema(HoodieEngineContext context, List<Pair<String, List<HoodieFileStatus>>> partitions); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SchemaConflictResolutionStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SchemaConflictResolutionStrategy.java index 2dd39fbd0589..2eb09bd27d6b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SchemaConflictResolutionStrategy.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SchemaConflictResolutionStrategy.java @@ -29,8 +29,6 @@ import org.apache.hudi.exception.HoodieSchemaEvolutionConflictException; import org.apache.hudi.exception.HoodieWriteConflictException; import org.apache.hudi.table.HoodieTable; -import org.apache.avro.Schema; - /** * Strategy interface for schema conflict resolution with multiple writers. * Users can provide pluggable implementations for different kinds of strategies to resolve conflicts when multiple diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java index 55e2b605ba94..3ec13c4c00ee 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java @@ -79,8 +79,6 @@ import org.apache.hudi.storage.StoragePath; import org.apache.hudi.table.HoodieTable; import lombok.extern.slf4j.Slf4j; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; import java.io.IOException; import java.util.ArrayList; @@ -93,7 +91,6 @@ import java.util.Set; import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; -import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldSchemaFromWriteSchema; import static org.apache.hudi.common.config.HoodieMetadataConfig.GLOBAL_RECORD_LEVEL_INDEX_ENABLE_PROP; import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER; import static org.apache.hudi.common.util.HoodieRecordUtils.getOrderingFieldNames; diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleSchemaConflictResolutionStrategy.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleSchemaConflictResolutionStrategy.java index 4873a608cb38..44759a5743dd 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleSchemaConflictResolutionStrategy.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleSchemaConflictResolutionStrategy.java @@ -44,7 +44,6 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieSchemaEvolutionConflictException; import org.apache.hudi.table.TestBaseHoodieTable; -import org.apache.avro.Schema; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java index a2b233925d73..70a871089fea 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/index/TestHoodieIndexUtils.java @@ -525,8 +525,8 @@ public class TestHoodieIndexUtils { HoodieSchema nullableIntWithDefault = HoodieSchema.createUnion(HoodieSchema.create(HoodieSchemaType.INT), HoodieSchema.create(HoodieSchemaType.NULL)); // Given: A schema with nullable fields (union types) HoodieSchema schema = HoodieSchema.createRecord("TestRecord", null, null, Arrays.asList( - HoodieSchemaField.of("nullableStringField", HoodieSchema.create(HoodieSchemaType.STRING)), - HoodieSchemaField.of("nullableIntField", nullableIntWithDefault, null, 0) + HoodieSchemaField.of("nullableStringField", HoodieSchema.create(HoodieSchemaType.STRING)), + HoodieSchemaField.of("nullableIntField", nullableIntWithDefault, null, 0) )); // Mock the schema resolver try (MockedConstruction<TableSchemaResolver> mockedResolver = Mockito.mockConstruction(TableSchemaResolver.class, diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/bootstrap/HoodieSparkBootstrapSchemaProvider.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/bootstrap/HoodieSparkBootstrapSchemaProvider.java index b8aa55975a17..556866444ee5 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/bootstrap/HoodieSparkBootstrapSchemaProvider.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/bootstrap/HoodieSparkBootstrapSchemaProvider.java @@ -24,13 +24,13 @@ import org.apache.hudi.avro.model.HoodieFileStatus; import org.apache.hudi.client.common.HoodieSparkEngineContext; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.util.AvroOrcUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; -import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.orc.OrcFile; @@ -52,8 +52,8 @@ public class HoodieSparkBootstrapSchemaProvider extends HoodieBootstrapSchemaPro } @Override - protected Schema getBootstrapSourceSchema(HoodieEngineContext context, List<Pair<String, List<HoodieFileStatus>>> partitions) { - Schema schema = partitions.stream().flatMap(p -> p.getValue().stream()).map(fs -> { + protected HoodieSchema getBootstrapSourceSchema(HoodieEngineContext context, List<Pair<String, List<HoodieFileStatus>>> partitions) { + HoodieSchema schema = partitions.stream().flatMap(p -> p.getValue().stream()).map(fs -> { Path filePath = HadoopFSUtils.toPath(fs.getPath()); String extension = FSUtils.getFileExtension(filePath.getName()); if (PARQUET.getFileExtension().equals(extension)) { @@ -69,7 +69,7 @@ public class HoodieSparkBootstrapSchemaProvider extends HoodieBootstrapSchemaPro return schema; } - private static Schema getBootstrapSourceSchemaParquet(HoodieWriteConfig writeConfig, HoodieEngineContext context, Path filePath) { + private static HoodieSchema getBootstrapSourceSchemaParquet(HoodieWriteConfig writeConfig, HoodieEngineContext context, Path filePath) { // NOTE: The type inference of partition column in the parquet table is turned off explicitly, // to be consistent with the existing bootstrap behavior, where the partition column is String // typed in Hudi table. @@ -85,10 +85,10 @@ public class HoodieSparkBootstrapSchemaProvider extends HoodieBootstrapSchemaPro String structName = tableName + "_record"; String recordNamespace = "hoodie." + tableName; - return AvroConversionUtils.convertStructTypeToAvroSchema(parquetSchema, structName, recordNamespace); + return HoodieSchema.fromAvroSchema(AvroConversionUtils.convertStructTypeToAvroSchema(parquetSchema, structName, recordNamespace)); } - private static Schema getBootstrapSourceSchemaOrc(HoodieWriteConfig writeConfig, HoodieEngineContext context, Path filePath) { + private static HoodieSchema getBootstrapSourceSchemaOrc(HoodieWriteConfig writeConfig, HoodieEngineContext context, Path filePath) { Reader orcReader = null; try { orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(context.getStorageConf().unwrapAs(Configuration.class))); @@ -99,7 +99,7 @@ public class HoodieSparkBootstrapSchemaProvider extends HoodieBootstrapSchemaPro String tableName = HoodieAvroUtils.sanitizeName(writeConfig.getTableName()); String structName = tableName + "_record"; String recordNamespace = "hoodie." + tableName; - return AvroOrcUtils.createAvroSchemaWithDefaultValue(orcSchema, structName, recordNamespace, true); + return AvroOrcUtils.createSchemaWithDefaultValue(orcSchema, structName, recordNamespace, true); } } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java index 62a555a0f9d7..7c9780baf623 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java @@ -18,6 +18,9 @@ package org.apache.hudi.common.util; +import org.apache.hudi.common.schema.HoodieSchema; +import org.apache.hudi.common.schema.HoodieSchemaField; +import org.apache.hudi.common.schema.HoodieSchemaType; import org.apache.hudi.exception.HoodieIOException; import org.apache.avro.Conversions; @@ -55,7 +58,6 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import static org.apache.avro.JsonProperties.NULL_VALUE; import static org.apache.hudi.common.util.BinaryUtil.toBytes; import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; @@ -810,74 +812,65 @@ public class AvroOrcUtils { } } - public static Schema createAvroSchemaWithDefaultValue(TypeDescription orcSchema, String recordName, String namespace, boolean nullable) { - Schema avroSchema = createAvroSchemaWithNamespace(orcSchema,recordName,namespace); - List<Schema.Field> fields = new ArrayList<Schema.Field>(); - List<Field> fieldList = avroSchema.getFields(); - for (Field field : fieldList) { - Schema fieldSchema = field.schema(); - Schema nullableSchema = Schema.createUnion(Schema.create(Schema.Type.NULL),fieldSchema); + public static HoodieSchema createSchemaWithDefaultValue(TypeDescription orcSchema, String recordName, String namespace, boolean nullable) { + HoodieSchema hoodieSchema = createSchemaWithNamespace(orcSchema,recordName,namespace); + List<HoodieSchemaField> fields = new ArrayList<>(); + for (HoodieSchemaField field : hoodieSchema.getFields()) { + HoodieSchema fieldSchema = field.schema(); + HoodieSchema nullableSchema = HoodieSchema.createNullable(fieldSchema); if (nullable) { - fields.add(new Schema.Field(field.name(), nullableSchema, null, NULL_VALUE)); + fields.add(HoodieSchemaField.of(field.name(), nullableSchema, null, HoodieSchema.NULL_VALUE)); } else { - fields.add(new Schema.Field(field.name(), fieldSchema, null, null)); + fields.add(HoodieSchemaField.of(field.name(), fieldSchema, null, null)); } } - Schema schema = Schema.createRecord(recordName, null, null, false); - schema.setFields(fields); - return schema; + return HoodieSchema.createRecord(recordName, null, null, false, fields); } - private static Schema createAvroSchemaWithNamespace(TypeDescription orcSchema, String recordName, String namespace) { + private static HoodieSchema createSchemaWithNamespace(TypeDescription orcSchema, String recordName, String namespace) { switch (orcSchema.getCategory()) { case BOOLEAN: - return Schema.create(Schema.Type.BOOLEAN); + return HoodieSchema.create(HoodieSchemaType.BOOLEAN); case BYTE: // tinyint (8 bit), use int to hold it - return Schema.create(Schema.Type.INT); + return HoodieSchema.create(HoodieSchemaType.INT); case SHORT: // smallint (16 bit), use int to hold it - return Schema.create(Schema.Type.INT); + return HoodieSchema.create(HoodieSchemaType.INT); case INT: // the Avro logical type could be AvroTypeUtil.LOGICAL_TYPE_TIME_MILLIS, but there is no way to distinguish - return Schema.create(Schema.Type.INT); + return HoodieSchema.create(HoodieSchemaType.INT); case LONG: // the Avro logical type could be AvroTypeUtil.LOGICAL_TYPE_TIME_MICROS, but there is no way to distinguish - return Schema.create(Schema.Type.LONG); + return HoodieSchema.create(HoodieSchemaType.LONG); case FLOAT: - return Schema.create(Schema.Type.FLOAT); + return HoodieSchema.create(HoodieSchemaType.FLOAT); case DOUBLE: - return Schema.create(Schema.Type.DOUBLE); + return HoodieSchema.create(HoodieSchemaType.DOUBLE); case VARCHAR: case CHAR: case STRING: - return Schema.create(Schema.Type.STRING); + return HoodieSchema.create(HoodieSchemaType.STRING); case DATE: - Schema date = Schema.create(Schema.Type.INT); - LogicalTypes.date().addToSchema(date); - return date; + return HoodieSchema.createDate(); case TIMESTAMP: - Schema timestamp = Schema.create(Schema.Type.LONG); - LogicalTypes.timestampMillis().addToSchema(timestamp); - return timestamp; + return HoodieSchema.createTimestampMillis(); case BINARY: - return Schema.create(Schema.Type.BYTES); + return HoodieSchema.create(HoodieSchemaType.BYTES); case DECIMAL: - Schema decimal = Schema.create(Schema.Type.BYTES); - LogicalTypes.decimal(orcSchema.getPrecision(), orcSchema.getScale()).addToSchema(decimal); - return decimal; + return HoodieSchema.createDecimal(orcSchema.getPrecision(), orcSchema.getScale()); case LIST: - return Schema.createArray(createAvroSchemaWithNamespace(orcSchema.getChildren().get(0), recordName, "")); + return HoodieSchema.createArray(createSchemaWithNamespace(orcSchema.getChildren().get(0), recordName, "")); case MAP: - return Schema.createMap(createAvroSchemaWithNamespace(orcSchema.getChildren().get(1), recordName, "")); + return HoodieSchema.createMap(createSchemaWithNamespace(orcSchema.getChildren().get(1), recordName, "")); case STRUCT: - List<Field> childFields = new ArrayList<>(); + List<HoodieSchemaField> childFields = new ArrayList<>(); for (int i = 0; i < orcSchema.getChildren().size(); i++) { TypeDescription childType = orcSchema.getChildren().get(i); String childName = orcSchema.getFieldNames().get(i); - childFields.add(new Field(childName, createAvroSchemaWithNamespace(childType, childName, ""), null, null)); + childFields.add(HoodieSchemaField.of(childName, createSchemaWithNamespace(childType, childName, ""), null, null)); } - return Schema.createRecord(recordName, null, namespace, false, childFields); + return HoodieSchema.createRecord(recordName, null, namespace, false, childFields); default: throw new IllegalStateException(String.format("Unrecognized ORC type: %s", orcSchema.getCategory().getName())); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java index f529ecd50adb..0cbd34fd3ab2 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java @@ -63,7 +63,6 @@ import org.apache.hudi.table.action.bootstrap.BootstrapUtils; import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils; import org.apache.hudi.testutils.HoodieSparkClientTestBase; -import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java index b8e2a1ea0ea4..ad495b870a1f 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java @@ -35,6 +35,7 @@ import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.testutils.FileCreateUtilsLegacy; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; @@ -57,7 +58,6 @@ import org.apache.hudi.table.action.HoodieWriteMetadata; import org.apache.hudi.table.action.bootstrap.BootstrapUtils; import org.apache.hudi.testutils.HoodieSparkClientTestBase; -import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -87,6 +87,7 @@ import java.io.IOException; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -143,7 +144,7 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { } - public Schema generateNewDataSetAndReturnSchema(long timestamp, int numRecords, List<String> partitionPaths, + public HoodieSchema generateNewDataSetAndReturnSchema(long timestamp, int numRecords, List<String> partitionPaths, String srcPath) throws Exception { boolean isPartitioned = partitionPaths != null && !partitionPaths.isEmpty(); Dataset<Row> df = @@ -164,7 +165,7 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { TypeDescription orcSchema = orcReader.getSchema(); - return AvroOrcUtils.createAvroSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); + return AvroOrcUtils.createSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); } @Test @@ -233,7 +234,7 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { } List<String> partitions = Arrays.asList("2020/04/01", "2020/04/02", "2020/04/03"); long timestamp = Instant.now().toEpochMilli(); - Schema schema = generateNewDataSetAndReturnSchema(timestamp, totalRecords, partitions, bootstrapBasePath); + HoodieSchema schema = generateNewDataSetAndReturnSchema(timestamp, totalRecords, partitions, bootstrapBasePath); HoodieWriteConfig config = getConfigBuilder(schema.toString(), partitioned) .withSchema(schema.toString()) .withKeyGenerator(keyGeneratorClass) @@ -350,13 +351,13 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { testBootstrapCommon(true, true, EffectiveMode.MIXED_BOOTSTRAP_MODE); } - private void checkBootstrapResults(int totalRecords, Schema schema, String maxInstant, boolean checkNumRawFiles, + private void checkBootstrapResults(int totalRecords, HoodieSchema schema, String maxInstant, boolean checkNumRawFiles, int expNumInstants, long expTimestamp, long expROTimestamp, boolean isDeltaCommit, boolean validateRecordsForCommitTime) throws Exception { checkBootstrapResults(totalRecords, schema, maxInstant, checkNumRawFiles, expNumInstants, expNumInstants, - expTimestamp, expROTimestamp, isDeltaCommit, Arrays.asList(maxInstant), validateRecordsForCommitTime); + expTimestamp, expROTimestamp, isDeltaCommit, Collections.singletonList(maxInstant), validateRecordsForCommitTime); } - private void checkBootstrapResults(int totalRecords, Schema schema, String instant, boolean checkNumRawFiles, + private void checkBootstrapResults(int totalRecords, HoodieSchema schema, String instant, boolean checkNumRawFiles, int expNumInstants, int numVersions, long expTimestamp, long expROTimestamp, boolean isDeltaCommit, List<String> instantsWithValidRecords, boolean validateCommitRecords) throws Exception { metaClient.reloadActiveTimeline(); @@ -419,8 +420,8 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { new Path(filePath), new OrcFile.ReaderOptions(jsc.hadoopConfiguration())); TypeDescription orcSchema = orcReader.getSchema(); - Schema avroSchema = AvroOrcUtils.createAvroSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); - return generateInputBatch(jsc, partitionPaths, avroSchema); + HoodieSchema hoodieSchema = AvroOrcUtils.createSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); + return generateInputBatch(jsc, partitionPaths, hoodieSchema); } catch (IOException ioe) { throw new HoodieIOException(ioe.getMessage(), ioe); @@ -429,13 +430,13 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { } private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc, - List<Pair<String, List<HoodieFileStatus>>> partitionPaths, Schema writerSchema) { + List<Pair<String, List<HoodieFileStatus>>> partitionPaths, HoodieSchema writerSchema) { List<Pair<String, Path>> fullFilePathsWithPartition = partitionPaths.stream().flatMap(p -> p.getValue().stream() .map(x -> Pair.of(p.getKey(), HadoopFSUtils.toPath(x.getPath())))).collect(Collectors.toList()); return jsc.parallelize(fullFilePathsWithPartition.stream().flatMap(p -> { try { Configuration conf = jsc.hadoopConfiguration(); - AvroReadSupport.setAvroReadSchema(conf, writerSchema); + AvroReadSupport.setAvroReadSchema(conf, writerSchema.toAvroSchema()); Reader orcReader = OrcFile.createReader( p.getValue(), new OrcFile.ReaderOptions(jsc.hadoopConfiguration())); @@ -443,9 +444,9 @@ public class TestOrcBootstrap extends HoodieSparkClientTestBase { TypeDescription orcSchema = orcReader.getSchema(); - Schema avroSchema = AvroOrcUtils.createAvroSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); + HoodieSchema hoodieSchema = AvroOrcUtils.createSchemaWithDefaultValue(orcSchema, "test_orc_record", null, true); - Iterator<GenericRecord> recIterator = new OrcReaderIterator(recordReader, avroSchema, orcSchema); + Iterator<GenericRecord> recIterator = new OrcReaderIterator(recordReader, hoodieSchema.toAvroSchema(), orcSchema); return StreamSupport.stream(Spliterators.spliteratorUnknownSize(recIterator, 0), false).map(gr -> { String key = gr.get("_row_key").toString();
