nsivabalan commented on code in PR #12992:
URL: https://github.com/apache/hudi/pull/12992#discussion_r2003863469
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java:
##########
@@ -237,8 +247,31 @@ void testRollbackWithDeltaAndCompactionCommit(boolean
rollbackUsingMarkers, int
// Verify there are no errors
assertNoWriteErrors(statuses);
+ WriteMarkers markers =
WriteMarkersFactory.get(secondCfg.getMarkersType(), hoodieTable, commitTime1);
+ IOType logFileIOType = tableVersion >= 8 ? IOType.CREATE :
IOType.APPEND;
+ assertTrue(markers.allMarkerFilePaths().stream().anyMatch(name ->
name.contains("log") && name.contains(logFileIOType.name())));
+
// Test failed delta commit rollback
secondClient.rollback(commitTime1);
+ // After rollback for table version 6, there should be new
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieInstant rollbackInstant =
metaClient.getActiveTimeline().getRollbackTimeline().firstInstant().get();
+ HoodieRollbackMetadata rollbackMetadata =
metaClient.getActiveTimeline().readInstantContent(rollbackInstant,
HoodieRollbackMetadata.class);
+ List<StoragePathInfo> logFiles = listAllLogFilesInPath(hoodieTable);
+ if (tableVersion < HoodieTableVersion.EIGHT.versionCode()) {
+ // Ensure rollback metadata contains rollback log files for table
version 6
+ assertTrue(rollbackMetadata.getPartitionMetadata().values().stream()
+ .noneMatch(rollbackPartitionMetadata ->
rollbackPartitionMetadata.getRollbackLogFiles().isEmpty()));
Review Comment:
or incase of completed rollback, it should be part of
`logFilesFromFailedCommit `
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java:
##########
@@ -237,8 +247,31 @@ void testRollbackWithDeltaAndCompactionCommit(boolean
rollbackUsingMarkers, int
// Verify there are no errors
assertNoWriteErrors(statuses);
+ WriteMarkers markers =
WriteMarkersFactory.get(secondCfg.getMarkersType(), hoodieTable, commitTime1);
+ IOType logFileIOType = tableVersion >= 8 ? IOType.CREATE :
IOType.APPEND;
+ assertTrue(markers.allMarkerFilePaths().stream().anyMatch(name ->
name.contains("log") && name.contains(logFileIOType.name())));
+
// Test failed delta commit rollback
secondClient.rollback(commitTime1);
+ // After rollback for table version 6, there should be new
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieInstant rollbackInstant =
metaClient.getActiveTimeline().getRollbackTimeline().firstInstant().get();
+ HoodieRollbackMetadata rollbackMetadata =
metaClient.getActiveTimeline().readInstantContent(rollbackInstant,
HoodieRollbackMetadata.class);
+ List<StoragePathInfo> logFiles = listAllLogFilesInPath(hoodieTable);
+ if (tableVersion < HoodieTableVersion.EIGHT.versionCode()) {
+ // Ensure rollback metadata contains rollback log files for table
version 6
+ assertTrue(rollbackMetadata.getPartitionMetadata().values().stream()
+ .noneMatch(rollbackPartitionMetadata ->
rollbackPartitionMetadata.getRollbackLogFiles().isEmpty()));
+ // Total 3 partitions would contain 6 log files - one rollback log
file and one log file with data block
+ assertEquals(6, logFiles.size());
Review Comment:
can we also validate logFilesFromFailedCommit from completed rollback commit
metadata. this should refer to actual data file added by the commit being
rolledback
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java:
##########
@@ -237,8 +247,31 @@ void testRollbackWithDeltaAndCompactionCommit(boolean
rollbackUsingMarkers, int
// Verify there are no errors
assertNoWriteErrors(statuses);
+ WriteMarkers markers =
WriteMarkersFactory.get(secondCfg.getMarkersType(), hoodieTable, commitTime1);
+ IOType logFileIOType = tableVersion >= 8 ? IOType.CREATE :
IOType.APPEND;
+ assertTrue(markers.allMarkerFilePaths().stream().anyMatch(name ->
name.contains("log") && name.contains(logFileIOType.name())));
+
// Test failed delta commit rollback
secondClient.rollback(commitTime1);
+ // After rollback for table version 6, there should be new
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieInstant rollbackInstant =
metaClient.getActiveTimeline().getRollbackTimeline().firstInstant().get();
+ HoodieRollbackMetadata rollbackMetadata =
metaClient.getActiveTimeline().readInstantContent(rollbackInstant,
HoodieRollbackMetadata.class);
+ List<StoragePathInfo> logFiles = listAllLogFilesInPath(hoodieTable);
+ if (tableVersion < HoodieTableVersion.EIGHT.versionCode()) {
+ // Ensure rollback metadata contains rollback log files for table
version 6
+ assertTrue(rollbackMetadata.getPartitionMetadata().values().stream()
+ .noneMatch(rollbackPartitionMetadata ->
rollbackPartitionMetadata.getRollbackLogFiles().isEmpty()));
Review Comment:
rollbackLogFiles should only refer to the log files added due to rollback
command block.
original log files added with the commit being rolledback should be part of
`logBlocksToBeDeleted`
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java:
##########
@@ -237,8 +247,31 @@ void testRollbackWithDeltaAndCompactionCommit(boolean
rollbackUsingMarkers, int
// Verify there are no errors
assertNoWriteErrors(statuses);
+ WriteMarkers markers =
WriteMarkersFactory.get(secondCfg.getMarkersType(), hoodieTable, commitTime1);
+ IOType logFileIOType = tableVersion >= 8 ? IOType.CREATE :
IOType.APPEND;
+ assertTrue(markers.allMarkerFilePaths().stream().anyMatch(name ->
name.contains("log") && name.contains(logFileIOType.name())));
+
// Test failed delta commit rollback
secondClient.rollback(commitTime1);
+ // After rollback for table version 6, there should be new
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieInstant rollbackInstant =
metaClient.getActiveTimeline().getRollbackTimeline().firstInstant().get();
+ HoodieRollbackMetadata rollbackMetadata =
metaClient.getActiveTimeline().readInstantContent(rollbackInstant,
HoodieRollbackMetadata.class);
+ List<StoragePathInfo> logFiles = listAllLogFilesInPath(hoodieTable);
+ if (tableVersion < HoodieTableVersion.EIGHT.versionCode()) {
+ // Ensure rollback metadata contains rollback log files for table
version 6
+ assertTrue(rollbackMetadata.getPartitionMetadata().values().stream()
+ .noneMatch(rollbackPartitionMetadata ->
rollbackPartitionMetadata.getRollbackLogFiles().isEmpty()));
Review Comment:
synced up f2f. we are good here.
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java:
##########
@@ -237,8 +247,31 @@ void testRollbackWithDeltaAndCompactionCommit(boolean
rollbackUsingMarkers, int
// Verify there are no errors
assertNoWriteErrors(statuses);
+ WriteMarkers markers =
WriteMarkersFactory.get(secondCfg.getMarkersType(), hoodieTable, commitTime1);
+ IOType logFileIOType = tableVersion >= 8 ? IOType.CREATE :
IOType.APPEND;
+ assertTrue(markers.allMarkerFilePaths().stream().anyMatch(name ->
name.contains("log") && name.contains(logFileIOType.name())));
+
// Test failed delta commit rollback
secondClient.rollback(commitTime1);
+ // After rollback for table version 6, there should be new
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieInstant rollbackInstant =
metaClient.getActiveTimeline().getRollbackTimeline().firstInstant().get();
+ HoodieRollbackMetadata rollbackMetadata =
metaClient.getActiveTimeline().readInstantContent(rollbackInstant,
HoodieRollbackMetadata.class);
+ List<StoragePathInfo> logFiles = listAllLogFilesInPath(hoodieTable);
+ if (tableVersion < HoodieTableVersion.EIGHT.versionCode()) {
+ // Ensure rollback metadata contains rollback log files for table
version 6
+ assertTrue(rollbackMetadata.getPartitionMetadata().values().stream()
+ .noneMatch(rollbackPartitionMetadata ->
rollbackPartitionMetadata.getRollbackLogFiles().isEmpty()));
Review Comment:
`logBlocksToBeDeleted` will be the case for rollback plan
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]