lokeshj1703 commented on code in PR #13007:
URL: https://github.com/apache/hudi/pull/13007#discussion_r2013575672
##########
hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java:
##########
@@ -110,6 +115,10 @@ public boolean isAtomicCreationSupported() {
return supportAtomicCreation != null && supportAtomicCreation;
}
+ public boolean getListStatusFriendly() {
+ return listStatusFriendly != null && listStatusFriendly;
Review Comment:
Addressed
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java:
##########
@@ -96,6 +96,21 @@ static HoodieRollbackStat
mergeRollbackStat(HoodieRollbackStat stat1, HoodieRoll
return new HoodieRollbackStat(stat1.getPartitionPath(),
successDeleteFiles, failedDeleteFiles, commandBlocksCount,
logFilesFromFailedCommit);
}
+ static HoodieRollbackRequest mergeRollbackRequest(HoodieRollbackRequest
rollbackRequest1, HoodieRollbackRequest rollbackRequest2) {
Review Comment:
Addressed. Removed the tests around it since there is no production use.
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/marker/DirectWriteMarkers.java:
##########
@@ -115,15 +108,64 @@ public Set<String>
createdAndMergedDataPaths(HoodieEngineContext context, int pa
HoodieStorage storage = HoodieStorageUtils.getStorage(path,
storageConf);
return storage.listFiles(path).stream()
.map(pathInfo -> pathInfo.getPath().toString())
- .filter(pathStr ->
pathStr.contains(HoodieTableMetaClient.MARKER_EXTN)
- && !pathStr.endsWith(IOType.APPEND.name()))
+ .filter(pathStr -> NOT_APPEND_MARKER_PREDICATE.test(pathStr))
.map(this::translateMarkerToDataPath);
}, parallelism));
}
return dataFiles;
}
+ public Set<String> getAppendedLogPaths(HoodieEngineContext context, int
parallelism) throws IOException {
+ Set<String> logFiles = new HashSet<>();
+ List<String> subDirectories =
getSubDirectoriesByMarkerCondition(storage.listDirectEntries(markerDirPath),
logFiles, APPEND_MARKER_PREDICATE);
+
+ if (subDirectories.size() > 0) {
+ parallelism = Math.min(subDirectories.size(), parallelism);
+ StorageConfiguration<?> storageConf = storage.getConf();
+ context.setJobStatus(this.getClass().getSimpleName(), "Obtaining marker
files for all created, merged paths");
+ logFiles.addAll(context.flatMap(subDirectories, directory -> {
+ Queue<StoragePath> candidatesDirs = new LinkedList<>();
+ candidatesDirs.add(new StoragePath(directory));
+ List<String> result = new ArrayList<>();
+ while (!candidatesDirs.isEmpty()) {
+ StoragePath path = candidatesDirs.remove();
+ HoodieStorage storage = HoodieStorageUtils.getStorage(path,
storageConf);
+ List<StoragePathInfo> storagePathInfos =
storage.listDirectEntries(path);
+ for (StoragePathInfo pathInfo : storagePathInfos) {
+ if (pathInfo.isDirectory()) {
+ candidatesDirs.add(pathInfo.getPath());
+ } else {
+ String pathStr = pathInfo.getPath().toString();
+ if (APPEND_MARKER_PREDICATE.test(pathStr)) {
+ result.add(translateMarkerToDataPath(pathStr));
+ }
+ }
+ }
+ }
+ return result.stream();
+ }, parallelism));
+ }
+
+ return logFiles;
+ }
+
+ private List<String>
getSubDirectoriesByMarkerCondition(List<StoragePathInfo> topLevelInfoList,
Set<String> dataFiles, Predicate<String> pathCondition) {
+ List<String> subDirectories = new ArrayList<>();
+ for (StoragePathInfo topLevelInfo: topLevelInfoList) {
+ if (topLevelInfo.isFile()) {
+ String pathStr = topLevelInfo.getPath().toString();
+ if (pathCondition.test(pathStr)) {
+ dataFiles.add(translateMarkerToDataPath(pathStr));
Review Comment:
Addressed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]