danny0405 commented on code in PR #8837:
URL: https://github.com/apache/hudi/pull/8837#discussion_r1245993652
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java:
##########
@@ -900,10 +903,75 @@ public void update(HoodieCleanMetadata cleanMetadata,
String instantTime) {
*/
@Override
public void update(HoodieRestoreMetadata restoreMetadata, String
instantTime) {
- processAndCommit(instantTime, () ->
HoodieTableMetadataUtil.convertMetadataToRecords(engineContext,
- metadataMetaClient.getActiveTimeline(), restoreMetadata,
getRecordsGenerationParams(), instantTime,
- metadata.getSyncedInstantTime()));
- closeInternal();
+ dataMetaClient.reloadActiveTimeline();
+
+ // Since the restore has completed on the dataset, the latest write
timeline instant is the one to which the
+ // restore was performed. This should be always present.
+ final String restoreToInstantTime =
dataMetaClient.getActiveTimeline().getWriteTimeline()
+ .lastInstant().get().getTimestamp();
+
+ // We cannot restore to before the oldest compaction on MDT as we don't
have the basefiles before that time.
+ Option<HoodieInstant> oldestCompaction =
metadataMetaClient.getCommitTimeline().filterCompletedInstants().firstInstant();
+ if (oldestCompaction.isPresent()) {
+ if (HoodieTimeline.LESSER_THAN_OR_EQUALS.test(restoreToInstantTime,
oldestCompaction.get().getTimestamp())) {
+ String msg = String.format("Cannot restore MDT to %s because it is
before the oldest compaction at %s", restoreToInstantTime,
+ oldestCompaction.get().getTimestamp()) + ". Please delete MDT and
restore again";
+ LOG.error(msg);
+ throw new HoodieMetadataException(msg);
+ }
+ }
+
+ // Restore requires the existing pipelines to be shutdown. So we can
safely scan the dataset to find the current
+ // list of files in the filesystem.
+ List<DirectoryInfo> dirInfoList =
listAllPartitionsFromFilesystem(instantTime);
+ Map<String, DirectoryInfo> dirInfoMap =
dirInfoList.stream().collect(Collectors.toMap(DirectoryInfo::getRelativePath,
Function.identity()));
+ dirInfoList.clear();
+
+ LOG.info("Restoring MDT to " + restoreToInstantTime + " at " +
instantTime);
+ getWriteClient().restoreToInstant(restoreToInstantTime, false);
+
+ // At this point we have also reverted the cleans which have occurred
after the restoreToInstantTime. Hence, a sync
+ // is required to bring back those cleans.
+ try {
+ initMetadataReader();
+ HoodieCleanMetadata cleanMetadata = new HoodieCleanMetadata();
+ Map<String, HoodieCleanPartitionMetadata> partitionMetadata = new
HashMap<>();
+ for (String partition : metadata.fetchAllPartitionPaths()) {
+ FileStatus[] metadataFiles = metadata.getAllFilesInPartition(new
Path(dataWriteConfig.getBasePath(), partition));
+ if (!dirInfoMap.containsKey(partition)) {
+ // Entire partition has been deleted
+ List<String> filePaths = Arrays.stream(metadataFiles).map(f ->
f.getPath().getName()).collect(Collectors.toList());
+ HoodieCleanPartitionMetadata cleanPartitionMetadata = new
HoodieCleanPartitionMetadata(partition, "", filePaths, filePaths,
+ Collections.emptyList(), true);
+ partitionMetadata.put(partition, cleanPartitionMetadata);
+ } else {
+ // Some files cleaned in the partition
+ Map<String, Long> fsFiles =
dirInfoMap.get(partition).getFileNameToSizeMap();
+ List<String> filesDeleted = Arrays.stream(metadataFiles).map(f ->
f.getPath().getName())
+ .filter(n ->
!fsFiles.containsKey(n)).collect(Collectors.toList());
+ if (!filesDeleted.isEmpty()) {
+ LOG.info("Found deleted files in partition " + partition + ": " +
filesDeleted);
+ HoodieCleanPartitionMetadata cleanPartitionMetadata = new
HoodieCleanPartitionMetadata(partition, "", filesDeleted, filesDeleted,
+ Collections.EMPTY_LIST, false);
+ partitionMetadata.put(partition, cleanPartitionMetadata);
Review Comment:
Collections.emptyList()
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]