abdullah alamoudi has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/2800
Change subject: [NO ISSUE][STO] Cleanup corrupted resources on failed creation
......................................................................
[NO ISSUE][STO] Cleanup corrupted resources on failed creation
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- In some cases, when a rebalance is cancelled by the user,
we abort the process of creating local resources on
nodes. When that happens, it can leave corrupted
resources that causes subsequent drop and/or create
to fail until the node is restarted.
- To avoid this, we make sure that the operation that
creates the resource is atomic.
Change-Id: I095a8eb0f1be2a9aa0fc269770978691746c3cec
---
M
asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
M
hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
2 files changed, 25 insertions(+), 4 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/00/2800/1
diff --git
a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
index 7cd31bb..93d9414 100644
---
a/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
+++
b/asterixdb/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/PersistentLocalResourceRepository.java
@@ -73,6 +73,7 @@
import
org.apache.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
import org.apache.hyracks.storage.common.ILocalResourceRepository;
import org.apache.hyracks.storage.common.LocalResource;
+import org.apache.hyracks.util.ExitUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -182,6 +183,7 @@
return resource;
}
+ @SuppressWarnings("squid:S1181")
@Override
public synchronized void insert(LocalResource resource) throws
HyracksDataException {
String relativePath = getFileName(resource.getPath());
@@ -194,16 +196,21 @@
if (!parent.exists() && !parent.mkdirs()) {
throw HyracksDataException.create(CANNOT_CREATE_FILE,
parent.getAbsolutePath());
}
- createResourceFileMask(resourceFile);
+ // The next block should be all or nothing
try {
+ createResourceFileMask(resourceFile);
byte[] bytes =
OBJECT_MAPPER.writeValueAsBytes(resource.toJson(persistedResourceRegistry));
final Path path = Paths.get(resourceFile.getAbsolutePath());
Files.write(path, bytes);
- } catch (IOException e) {
+
indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null,
0);
+ deleteResourceFileMask(resourceFile);
+ } catch (Exception e) {
+ cleanup(resourceFile);
throw HyracksDataException.create(e);
+ } catch (Throwable th) {
+ LOGGER.error("Error creating resource {}", resourceFile, th);
+ ExitUtil.halt(ExitUtil.EC_ERROR_CREATING_RESOURCES);
}
-
indexCheckpointManagerProvider.get(DatasetResourceReference.of(resource)).init(null,
0);
- deleteResourceFileMask(resourceFile);
resourceCache.put(resource.getPath(), resource);
//if replication enabled, send resource metadata info to remote nodes
if (isReplicationEnabled) {
@@ -211,6 +218,18 @@
}
}
+ @SuppressWarnings("squid:S1181")
+ private void cleanup(FileReference resourceFile) {
+ if (resourceFile.getFile().exists()) {
+ try {
+ IoUtil.delete(resourceFile);
+ } catch (Throwable th) {
+ LOGGER.error("Error cleaning up corrupted resource {}",
resourceFile, th);
+
ExitUtil.halt(ExitUtil.EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES);
+ }
+ }
+ }
+
@Override
public synchronized void delete(String relativePath) throws
HyracksDataException {
FileReference resourceFile = getLocalResourceFileByName(ioManager,
relativePath);
diff --git
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
index 14cfc59..f7c401a 100644
---
a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
+++
b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
@@ -40,6 +40,8 @@
public static final int EC_INCONSISTENT_METADATA = 8;
public static final int EC_UNCAUGHT_THROWABLE = 9;
public static final int EC_UNHANDLED_EXCEPTION = 11;
+ public static final int EC_FAILED_TO_DELETE_CORRUPTED_RESOURCES = 12;
+ public static final int EC_ERROR_CREATING_RESOURCES = 13;
public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22;
public static final int EC_IMMEDIATE_HALT = 33;
public static final int EC_HALT_ABNORMAL_RESERVED_44 = 44;
--
To view, visit https://asterix-gerrit.ics.uci.edu/2800
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I095a8eb0f1be2a9aa0fc269770978691746c3cec
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: abdullah alamoudi <[email protected]>