yihua commented on code in PR #17573:
URL: https://github.com/apache/hudi/pull/17573#discussion_r2633304473
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestHoodieMergeHandleWithSparkMerger.java:
##########
@@ -171,19 +171,19 @@ public HoodieWriteConfig getWriteConfig(Schema
avroSchema, String recordMergerIm
return getConfigBuilder(true)
.withPath(basePath())
- .withSchema(avroSchema.toString())
+ .withSchema(schema.toString())
.withProps(properties)
.build();
}
- public HoodieWriteConfig buildDefaultWriteConfig(Schema avroSchema) {
- HoodieWriteConfig writeConfig = getWriteConfig(avroSchema,
DefaultMerger.class.getName(),
HoodieRecordMerger.EVENT_TIME_BASED_MERGE_STRATEGY_UUID,
RecordMergeMode.EVENT_TIME_ORDERING);
+ public HoodieWriteConfig buildDefaultWriteConfig(HoodieSchema hoodieSchema) {
+ HoodieWriteConfig writeConfig = getWriteConfig(hoodieSchema,
DefaultMerger.class.getName(),
HoodieRecordMerger.EVENT_TIME_BASED_MERGE_STRATEGY_UUID,
RecordMergeMode.EVENT_TIME_ORDERING);
metaClient = getHoodieMetaClient(storageConf(), basePath(),
HoodieTableType.MERGE_ON_READ, writeConfig.getProps());
return writeConfig;
}
- public HoodieWriteConfig buildCustomWriteConfig(Schema avroSchema) {
- HoodieWriteConfig writeConfig = getWriteConfig(avroSchema,
CustomMerger.class.getName(), HoodieRecordMerger.CUSTOM_MERGE_STRATEGY_UUID,
RecordMergeMode.CUSTOM);
+ public HoodieWriteConfig buildCustomWriteConfig(HoodieSchema hoodieSchema) {
+ HoodieWriteConfig writeConfig = getWriteConfig(hoodieSchema,
CustomMerger.class.getName(), HoodieRecordMerger.CUSTOM_MERGE_STRATEGY_UUID,
RecordMergeMode.CUSTOM);
Review Comment:
nit: `hoodieSchema` -> `schema`
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala:
##########
@@ -212,8 +211,8 @@ class HoodieFileGroupReaderBasedFileFormat(tablePath:
String,
exclusionFields.add("op")
partitionSchema.fields.foreach(f => exclusionFields.add(f.name))
val requestedSchema = StructType(requiredSchema.fields ++
partitionSchema.fields.filter(f => mandatoryFields.contains(f.name)))
- val requestedAvroSchema = AvroSchemaUtils.pruneDataSchema(avroTableSchema,
AvroConversionUtils.convertStructTypeToAvroSchema(requestedSchema,
sanitizedTableName), exclusionFields)
- val dataAvroSchema = AvroSchemaUtils.pruneDataSchema(avroTableSchema,
AvroConversionUtils.convertStructTypeToAvroSchema(dataSchema,
sanitizedTableName), exclusionFields)
+ val requestedHoodieSchema = HoodieSchemaUtils.pruneDataSchema(schema,
HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(requestedSchema,
sanitizedTableName), exclusionFields)
+ val dataHoodieSchema = HoodieSchemaUtils.pruneDataSchema(schema,
HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(dataSchema,
sanitizedTableName), exclusionFields)
Review Comment:
```suggestion
val requestedSchema = HoodieSchemaUtils.pruneDataSchema(schema,
HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(requestedSchema,
sanitizedTableName), exclusionFields)
val dataSchema = HoodieSchemaUtils.pruneDataSchema(schema,
HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(dataSchema,
sanitizedTableName), exclusionFields)
```
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/SparkHelpers.scala:
##########
@@ -51,12 +51,12 @@ object SparkHelpers {
val sourceRecords = HoodieIOFactory.getIOFactory(storage)
.getFileFormatUtils(HoodieFileFormat.PARQUET)
.readAvroRecords(storage, sourceFile).asScala
- val schema: Schema = sourceRecords.head.getSchema
+ val schema: HoodieSchema =
HoodieSchema.fromAvroSchema(sourceRecords.head.getSchema)
val filter: BloomFilter = BloomFilterFactory.createBloomFilter(
BLOOM_FILTER_NUM_ENTRIES_VALUE.defaultValue.toInt,
BLOOM_FILTER_FPP_VALUE.defaultValue.toDouble,
BLOOM_FILTER_DYNAMIC_MAX_ENTRIES.defaultValue.toInt,
BLOOM_FILTER_TYPE.defaultValue);
- val writeSupport: HoodieAvroWriteSupport[_] = new
HoodieAvroWriteSupport(getAvroSchemaConverter(conf.unwrap()).convert(schema),
- schema, Option.of(filter), new Properties())
+ val writeSupport: HoodieAvroWriteSupport[_] = new
HoodieAvroWriteSupport(getAvroSchemaConverter(conf.unwrap()).convert(schema.getAvroSchema),
+ schema.getAvroSchema, Option.of(filter), new Properties())
Review Comment:
Seems no need to change this as it merely converting the the Avro schema to
`HoodieSchema` and then back?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]