danielhumanmod commented on code in PR #312:
URL: https://github.com/apache/polaris/pull/312#discussion_r1827226532
##########
polaris-service/src/main/java/org/apache/polaris/service/task/ManifestFileCleanupTaskHandler.java:
##########
@@ -68,58 +70,107 @@ public boolean canHandleTask(TaskEntity task) {
@Override
public boolean handleTask(TaskEntity task) {
ManifestCleanupTask cleanupTask = task.readData(ManifestCleanupTask.class);
- ManifestFile manifestFile =
decodeManifestData(cleanupTask.getManifestFileData());
TableIdentifier tableId = cleanupTask.getTableId();
try (FileIO authorizedFileIO = fileIOSupplier.apply(task)) {
-
- // if the file doesn't exist, we assume that another task execution was
successful, but failed
- // to drop the task entity. Log a warning and return success
- if (!TaskUtils.exists(manifestFile.path(), authorizedFileIO)) {
+ if (cleanupTask.getManifestFileData() != null) {
+ ManifestFile manifestFile =
decodeManifestData(cleanupTask.getManifestFileData());
+ return cleanUpManifestFile(manifestFile, authorizedFileIO, tableId);
+ } else if (cleanupTask.getContentFileBatch() != null) {
+ return cleanUpContentFiles(cleanupTask.getContentFileBatch(),
authorizedFileIO, tableId);
+ } else {
LOGGER
.atWarn()
- .addKeyValue("manifestFile", manifestFile.path())
.addKeyValue("tableId", tableId)
- .log("Manifest cleanup task scheduled, but manifest file doesn't
exist");
+ .log("Cleanup task scheduled, but input file doesn't exist");
return true;
}
+ }
+ }
- ManifestReader<DataFile> dataFiles = ManifestFiles.read(manifestFile,
authorizedFileIO);
- List<CompletableFuture<Void>> dataFileDeletes =
- StreamSupport.stream(
- Spliterators.spliteratorUnknownSize(dataFiles.iterator(),
Spliterator.IMMUTABLE),
- false)
- .map(
- file ->
- tryDelete(
- tableId, authorizedFileIO, manifestFile,
file.path().toString(), null, 1))
- .toList();
- LOGGER.debug(
- "Scheduled {} data files to be deleted from manifest {}",
- dataFileDeletes.size(),
- manifestFile.path());
- try {
- // wait for all data files to be deleted, then wait for the manifest
itself to be deleted
-
CompletableFuture.allOf(dataFileDeletes.toArray(CompletableFuture[]::new))
- .thenCompose(
- (v) -> {
- LOGGER
- .atInfo()
- .addKeyValue("manifestFile", manifestFile.path())
- .log("All data files in manifest deleted - deleting
manifest");
- return tryDelete(
- tableId, authorizedFileIO, manifestFile,
manifestFile.path(), null, 1);
- })
- .get();
- return true;
- } catch (InterruptedException e) {
- LOGGER.error(
- "Interrupted exception deleting data files from manifest {}",
manifestFile.path(), e);
- throw new RuntimeException(e);
- } catch (ExecutionException e) {
- LOGGER.error("Unable to delete data files from manifest {}",
manifestFile.path(), e);
- return false;
- }
+ private boolean cleanUpManifestFile(
Review Comment:
Sorry for the lots of changes here, but don’t worry—it’s mainly because I
refactored the deletion logic for the manifest and all its data into a new
method; no other changes were made in lines 91-135.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]