nsivabalan commented on code in PR #13950:
URL: https://github.com/apache/hudi/pull/13950#discussion_r2389506617
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java:
##########
@@ -753,6 +753,27 @@ private void validateVersion8Properties(HoodieTableConfig
tableConfig) {
}
private void validateVersion9Properties(HoodieTableMetaClient metaClient,
HoodieTableConfig tableConfig) {
+ Option<TimelineLayoutVersion> layoutVersion =
tableConfig.getTimelineLayoutVersion();
+ assertTrue(layoutVersion.isPresent(), "Timeline layout version should be
present for V8+");
Review Comment:
minor `V9+`
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java:
##########
@@ -753,6 +753,27 @@ private void validateVersion8Properties(HoodieTableConfig
tableConfig) {
}
private void validateVersion9Properties(HoodieTableMetaClient metaClient,
HoodieTableConfig tableConfig) {
+ Option<TimelineLayoutVersion> layoutVersion =
tableConfig.getTimelineLayoutVersion();
+ assertTrue(layoutVersion.isPresent(), "Timeline layout version should be
present for V8+");
+ assertEquals(TimelineLayoutVersion.LAYOUT_VERSION_2, layoutVersion.get(),
+ "Timeline layout should be V2 for V8+");
+ assertTrue(tableConfig.contains(HoodieTableConfig.TIMELINE_PATH),
+ "Timeline path should be set for V9");
+ assertEquals(HoodieTableConfig.TIMELINE_PATH.defaultValue(),
+ tableConfig.getString(HoodieTableConfig.TIMELINE_PATH),
+ "Timeline path should have default value");
+ assertTrue(tableConfig.contains(HoodieTableConfig.RECORD_MERGE_MODE),
+ "Record merge mode should be set for V9");
+ RecordMergeMode mergeMode = tableConfig.getRecordMergeMode();
+ assertNotNull(mergeMode, "Merge mode should not be null");
+
+ assertTrue(tableConfig.contains(HoodieTableConfig.INITIAL_VERSION),
+ "Initial version should be set for V9");
+ if (tableConfig.contains(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)) {
+ assertTrue(tableConfig.contains(HoodieTableConfig.KEY_GENERATOR_TYPE),
+ "Key generator type should be set when key generator class is
present");
+ }
Review Comment:
we could code re-use across V8 and v9 validations
##########
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java:
##########
@@ -920,7 +919,18 @@ public static Triple<RecordMergeMode, String, String>
inferMergingConfigsForWrit
String recordMergeStrategyId,
String orderingFieldNamesAsString,
HoodieTableVersion tableVersion) {
- return inferMergingConfigsForPreV9Table(recordMergeMode, payloadClassName,
recordMergeStrategyId, orderingFieldNamesAsString, tableVersion);
+ if (tableVersion.greaterThanOrEquals(HoodieTableVersion.NINE)) {
Review Comment:
do we have UTs for this?
##########
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java:
##########
@@ -966,7 +976,9 @@ public static Triple<RecordMergeMode, String, String>
inferMergingConfigsForPreV
inferredRecordMergeMode = modeBasedOnPayload != null ?
modeBasedOnPayload : modeBasedOnStrategyId;
}
}
- if (recordMergeMode != null) {
+
+ if (recordMergeMode != null &&
(!tableVersion.greaterThanOrEquals(HoodieTableVersion.NINE)
Review Comment:
why don't we use `lesserThan` instead of using `greaterThanOrEquals` and
then negating
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -314,6 +314,21 @@ object HoodieWriterUtils {
&& currentPartitionFields != tableConfigPartitionFields) {
diffConfigs.append(s"PartitionPath:\t$currentPartitionFields\t$tableConfigPartitionFields\n")
}
+ // The value of `HoodieTableConfig.RECORD_MERGE_STRATEGY_ID` can be
NULL or non-NULL.
+ // The non-NULL value has been validated above in the regular code
path.
+ // Here we check the NULL case since if the value is NULL, the check
is skipped above.
+ // So here we check if the write config contains non-null merge
strategy id. If so, throw.
+ // Here are two exclusions:
+ // CASE 1: For < v9 tables, we skip check completely for backward
compatibility.
+ // CASE 2: For >= v9 tables, merge-into queries.
Review Comment:
whats this special handling for MIT queries
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkDataSource.scala:
##########
@@ -445,35 +447,51 @@ class TestSparkDataSource extends
SparkClientFunctionalTestHarness {
df1.write.format("hudi")
.option(HoodieWriteConfig.RECORD_MERGE_MODE.key, diffMergeMode.name)
.option(DataSourceWriteOptions.OPERATION.key,
DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL)
+ .option(HoodieWriteConfig.AUTO_UPGRADE_VERSION.key, "false")
.mode(SaveMode.Append)
.save(basePath)
val finalDf = spark.read.format("hudi")
.options(readOpts)
.load(basePath)
- assertEquals(399, finalDf.count())
- } else {
- Assertions.assertThrows(classOf[HoodieException], () => {
- df1.write.format("hudi")
- .option(HoodieWriteConfig.RECORD_MERGE_MODE.key, "any_other_payload")
- .option(DataSourceWriteOptions.OPERATION.key,
DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL)
- .mode(SaveMode.Append)
- .save(basePath)
- })
- }
- Assertions.assertThrows(classOf[HoodieException], () => {
df1.write.format("hudi")
.option(HoodieWriteConfig.WRITE_PAYLOAD_CLASS_NAME.key,
"any_other_payload")
.option(DataSourceWriteOptions.OPERATION.key,
DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL)
+ .option(HoodieWriteConfig.AUTO_UPGRADE_VERSION.key, "false")
.mode(SaveMode.Append)
.save(basePath)
- })
- Assertions.assertThrows(classOf[HoodieException], () => {
df1.write.format("hudi")
.option(HoodieWriteConfig.RECORD_MERGE_STRATEGY_ID.key,
"any_other_strategy_id")
.option(DataSourceWriteOptions.OPERATION.key,
DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL)
+ .option(HoodieWriteConfig.AUTO_UPGRADE_VERSION.key, "false")
.mode(SaveMode.Append)
.save(basePath)
- })
+ assertEquals(399, finalDf.count())
+ } else {
+ Assertions.assertThrows(classOf[HoodieException], () => {
+ df1.write.format("hudi")
+ .option(HoodieWriteConfig.RECORD_MERGE_MODE.key, "any_other_payload")
Review Comment:
should we try to change the merge mode to some other valid value?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]