voonhous commented on code in PR #18963:
URL: https://github.com/apache/hudi/pull/18963#discussion_r3390008832
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java:
##########
@@ -69,37 +69,74 @@ public CleanActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config
}
private static boolean deleteFileAndGetResult(HoodieStorage storage, String
deletePathStr) {
+ StoragePath deletePath = new StoragePath(deletePathStr);
+ log.debug("Working on delete path: {}", deletePath);
+ try {
+ // Plan entries handled here are always base/log/bootstrap file paths
(never directories),
+ // so delete directly instead of paying an extra getPathInfo RPC per
file.
+ boolean deleteResult = storage.deleteFile(deletePath);
+ if (deleteResult) {
+ log.debug("Cleaned file at path: {}", deletePath);
+ return true;
+ }
+ if (storage.exists(deletePath)) {
+ throw new HoodieIOException("Failed to delete path during clean
execution " + deletePath);
+ }
+ // Hadoop file systems report a missing path by returning false from
delete instead of
+ // throwing FileNotFoundException, so this is the regular retried-clean
case below.
+ log.debug("Already cleaned up file at path: {}", deletePath);
+ return true;
+ } catch (FileNotFoundException fio) {
+ // With cleanPlan being used for retried cleaning operations, its
possible to clean a file twice if a file to be
+ // deleted is not found, treat it as a success. In other words, there
is nothing else to be cleaned up on the
+ // FileSystem, except for updating the MDT. By returning success, we
would remove the entry from MDT.
+ return true;
+ } catch (IOException e) {
+ try {
+ if (storage.exists(deletePath)) {
+ log.error("Delete file failed: {} and file still exists",
deletePath, e);
+ throw new HoodieIOException(e.getMessage(), e);
+ }
+ log.warn("Delete file failed: {} but file does not exist", deletePath,
e);
+ return false;
+ } catch (IOException ex) {
+ log.error("Delete file failed: {} with exception: {} and existence
check also failed", deletePath, e, ex);
+ throw new HoodieIOException(ex.getMessage(), ex);
+ }
+ }
+ }
+
+ private static boolean deletePathAndGetResult(HoodieStorage storage, String
deletePathStr) {
StoragePath deletePath = new StoragePath(deletePathStr);
log.debug("Working on delete path: {}", deletePath);
try {
boolean deleteResult = storage.getPathInfo(deletePath).isDirectory()
? storage.deleteDirectory(deletePath)
: storage.deleteFile(deletePath);
if (deleteResult) {
- log.debug("Cleaned file at path: {}", deletePath);
+ log.debug("Cleaned path: {}", deletePath);
} else {
if (storage.exists(deletePath)) {
throw new HoodieIOException("Failed to delete path during clean
execution " + deletePath);
} else {
- log.debug("Already cleaned up file at path: {}", deletePath);
+ log.debug("Already cleaned up path: {}", deletePath);
}
}
return deleteResult;
} catch (FileNotFoundException fio) {
- // With cleanPlan being used for retried cleaning operations, its
possible to clean a file twice if a file to be
- // deleted is not found, treat it as a success. In other words, there
is nothing else to be cleaned up on the
- // FileSystem, except for updating the MDT. By returning success, we
would remove the entry from MDT.
+ // With cleanPlan being used for retried cleaning operations, its
possible to clean a path twice if a path to be
+ // deleted is not found, treat it as a success.
return true;
} catch (IOException e) {
try {
if (storage.exists(deletePath)) {
- log.error("Delete file failed: {} and file still exists",
deletePath, e);
+ log.error("Delete path failed: {} and path still exists",
deletePath, e);
throw new HoodieIOException(e.getMessage(), e);
}
- log.warn("Delete file failed: {} but file does not exist", deletePath,
e);
+ log.warn("Delete path failed: {} but path does not exist", deletePath,
e);
return false;
} catch (IOException ex) {
- log.error("Delete file failed: {} with exception: {} and existence
check also failed", deletePath, e, ex);
+ log.error("Delete path failed: {} with exception: {} and existence
check also failed", deletePath, e, ex);
Review Comment:
Renamed to `deleteDirAndGetResult` with the log messages updated
accordingly.
On the `isDirectory` check: it is unnecessary -- only partition directories
reach this method, so it now calls `deleteDirectory` directly, which also saves
the `getPathInfo` round trip per deleted partition.
A missing directory on a retried clean surfaces as a false return from
delete (Hadoop file systems do not throw `FileNotFoundException` here) and is
handled the same way as in the file variant: verify non-existence and treat it
as already cleaned.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]