abdullah alamoudi has submitted this change and it was merged. Change subject: [NO ISSUE][STO] Cleanup corrupted resources on failed creation ......................................................................
[NO ISSUE][STO] Cleanup corrupted resources on failed creation - user model changes: no - storage format changes: no - interface changes: no Details: - In some cases, when a rebalance is cancelled by the user, we abort the process of creating local resources on nodes. When that happens, it can leave corrupted resources that causes subsequent drop and/or create to fail until the node is restarted. - To avoid this, we make sure that the operation that creates the resource is atomic. Change-Id: I095a8eb0f1be2a9aa0fc269770978691746c3cec Reviewed-on: https://asterix-gerrit.ics.uci.edu/2800 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Contrib: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: abdullah alamoudi <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- M asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java M hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java 2 files changed, 25 insertions(+), 4 deletions(-) Approvals: Anon. E. Moose #1000171: abdullah alamoudi: Looks good to me, but someone else must approve Jenkins: Verified; No violations found; ; Verified Murtadha Hubail: Looks good to me, approved diff --git a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java index 7cd31bb..93d9414 100644 --- a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java +++ b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java @@ -73,6 +73,7 @@ import org.apache.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager; import org.apache.hyracks.storage.common.ILocalResourceRepository; import org.apache.hyracks.storage.common.LocalResource; +import org.apache.hyracks.util.ExitUtil; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -182,6 +183,7 @@ return resource; } + @SuppressWarnings("squid:S1181") @Override public synchronized void insert(LocalResource resource) throws HyracksDataException { String relativePath = getFileName(resource.getPath()); @@ -194,16 +196,21 @@ if (!parent.exists() && !parent.mkdirs()) { throw HyracksDataException.create(CANNOT_CREATE_FILE, parent.getAbsolutePath()); } - createResourceFileMask(resourceFile); + // The next block should be all or nothing try { + createResourceFileMask(resourceFile); byte[] bytes = OBJECT_MAPPER.writeValueAsBytes(resource.toJson(persistedResourceRegistry)); final Path path = Paths.get(resourceFile.getAbsolutePath()); Files.write(path, bytes); - } catch (IOException e) { + indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null, 0); + deleteResourceFileMask(resourceFile); + } catch (Exception e) { + cleanup(resourceFile); throw HyracksDataException.create(e); + } catch (Throwable th) { + LOGGER.error("Error creating resource {}", resourceFile, th); + ExitUtil.halt(ExitUtil.EC_ERROR_CREATING_RESOURCES); } - indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null, 0); - deleteResourceFileMask(resourceFile); resourceCache.put(resource.getPath(), resource); //if replication enabled, send resource metadata info to remote nodes if (isReplicationEnabled) { @@ -211,6 +218,18 @@ } } + @SuppressWarnings("squid:S1181") + private void cleanup(FileReference resourceFile) { + if (resourceFile.getFile().exists()) { + try { + IoUtil.delete(resourceFile); + } catch (Throwable th) { + LOGGER.error("Error cleaning up corrupted resource {}", resourceFile, th); + ExitUtil.halt(ExitUtil.EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES); + } + } + } + @Override public synchronized void delete(String relativePath) throws HyracksDataException { FileReference resourceFile = getLocalResourceFileByName(ioManager, relativePath); diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java index 14cfc59..f7c401a 100644 --- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java +++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java @@ -40,6 +40,8 @@ public static final int EC_INCONSISTENT_METADATA = 8; public static final int EC_UNCAUGHT_THROWABLE = 9; public static final int EC_UNHANDLED_EXCEPTION = 11; + public static final int EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES = 12; + public static final int EC_ERROR_CREATING_RESOURCES = 13; public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22; public static final int EC_IMMEDIATE_HALT = 33; public static final int EC_HALT_ABNORMAL_RESERVED_44 = 44; -- To view, visit https://asterix-gerrit.ics.uci.edu/2800 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I095a8eb0f1be2a9aa0fc269770978691746c3cec Gerrit-PatchSet: 2 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: abdullah alamoudi <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Till Westmann <[email protected]> Gerrit-Reviewer: abdullah alamoudi <[email protected]>
