[asterixdb] branch master updated: [NO ISSUE][ACTIVE] Account for force stop while suspending
This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git The following commit(s) were added to refs/heads/master by this push: new 5e11053 [NO ISSUE][ACTIVE] Account for force stop while suspending new bc8aaf6 Merge branch 'cheshire-cat' into master 5e11053 is described below commit 5e110538870c9385f316e0395b80a76f0e6d8e7c Author: Murtadha Hubail AuthorDate: Fri Sep 10 01:36:34 2021 +0300 [NO ISSUE][ACTIVE] Account for force stop while suspending - user model changes: no - storage format changes: no - interface changes: no Details: - When a failure happens while trying to suspend ingestion, we will force stop the active job. If the job completes ungracefully, we set the listener state to TEMPORARILY_FAILED. However, since force to stop only waits for STOPPED state, the thread waiting for ingestion to be suspended will wait forever. This change accounts for such case and makes the force stop waits for TEMPORARILY_FAILED too. Change-Id: Ib33f191be2b84d97a08e3bc6d607b0edbf35bed1 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13144 Tested-by: Jenkins Integration-Tests: Jenkins Reviewed-by: Murtadha Hubail Reviewed-by: Ali Alsuliman (cherry picked from commit eed8714ae56bd61656750bf543181e7dd68c26c1) Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13203 --- .../org/apache/asterix/app/active/ActiveEntityEventsListener.java | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java index 0242ecd..ddd3d64 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java @@ -195,7 +195,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl @SuppressWarnings("unchecked") protected void finish(ActiveEvent event) throws HyracksDataException { if (LOGGER.isEnabled(level)) { -LOGGER.log(level, "the job " + jobId + " finished"); +LOGGER.log(level, "the job {} finished", jobId); } JobId lastJobId = jobId; if (numRegistered != numDeRegistered) { @@ -208,7 +208,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl JobStatus jobStatus = status.getLeft(); List exceptions = status.getRight(); if (LOGGER.isEnabled(level)) { -LOGGER.log(level, "The job finished with status: " + jobStatus); +LOGGER.log(level, "The job finished with status: {}", jobStatus); } if (!jobSuccessfullyTerminated(jobStatus)) { jobFailure = exceptions.isEmpty() ? new RuntimeDataException(ErrorCode.UNREPORTED_TASK_FAILURE_EXCEPTION) @@ -440,8 +440,9 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl private void cancelJob(Throwable th) { cancelJobSafely(metadataProvider, th); +// we can come here due to a failure while in suspending state final WaitForStateSubscriber cancelSubscriber = -new WaitForStateSubscriber(this, EnumSet.of(ActivityState.STOPPED)); +new WaitForStateSubscriber(this, EnumSet.of(ActivityState.STOPPED, ActivityState.TEMPORARILY_FAILED)); final Span span = Span.start(2, TimeUnit.MINUTES); InvokeUtil.doUninterruptibly(() -> { if (!cancelSubscriber.sync(span)) { @@ -491,6 +492,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl forceStop(subscriber, ie); Thread.currentThread().interrupt(); } catch (Throwable e) { +LOGGER.error("forcing active job stop due to", e); forceStop(subscriber, e); } finally { Thread.currentThread().setName(nameBefore);
[asterixdb] branch master updated: [NO ISSUE][ACTIVE] Account for force stop while suspending
This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git The following commit(s) were added to refs/heads/master by this push: new eed8714 [NO ISSUE][ACTIVE] Account for force stop while suspending eed8714 is described below commit eed8714ae56bd61656750bf543181e7dd68c26c1 Author: Murtadha Hubail AuthorDate: Fri Sep 10 01:36:34 2021 +0300 [NO ISSUE][ACTIVE] Account for force stop while suspending - user model changes: no - storage format changes: no - interface changes: no Details: - When a failure happens while trying to suspend ingestion, we will force stop the active job. If the job completes ungracefully, we set the listener state to TEMPORARILY_FAILED. However, since force to stop only waits for STOPPED state, the thread waiting for ingestion to be suspended will wait forever. This change accounts for such case and makes the force stop waits for TEMPORARILY_FAILED too. Change-Id: Ib33f191be2b84d97a08e3bc6d607b0edbf35bed1 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13144 Tested-by: Jenkins Integration-Tests: Jenkins Reviewed-by: Murtadha Hubail Reviewed-by: Ali Alsuliman --- .../org/apache/asterix/app/active/ActiveEntityEventsListener.java | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java index 0242ecd..ddd3d64 100644 --- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java +++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java @@ -195,7 +195,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl @SuppressWarnings("unchecked") protected void finish(ActiveEvent event) throws HyracksDataException { if (LOGGER.isEnabled(level)) { -LOGGER.log(level, "the job " + jobId + " finished"); +LOGGER.log(level, "the job {} finished", jobId); } JobId lastJobId = jobId; if (numRegistered != numDeRegistered) { @@ -208,7 +208,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl JobStatus jobStatus = status.getLeft(); List exceptions = status.getRight(); if (LOGGER.isEnabled(level)) { -LOGGER.log(level, "The job finished with status: " + jobStatus); +LOGGER.log(level, "The job finished with status: {}", jobStatus); } if (!jobSuccessfullyTerminated(jobStatus)) { jobFailure = exceptions.isEmpty() ? new RuntimeDataException(ErrorCode.UNREPORTED_TASK_FAILURE_EXCEPTION) @@ -440,8 +440,9 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl private void cancelJob(Throwable th) { cancelJobSafely(metadataProvider, th); +// we can come here due to a failure while in suspending state final WaitForStateSubscriber cancelSubscriber = -new WaitForStateSubscriber(this, EnumSet.of(ActivityState.STOPPED)); +new WaitForStateSubscriber(this, EnumSet.of(ActivityState.STOPPED, ActivityState.TEMPORARILY_FAILED)); final Span span = Span.start(2, TimeUnit.MINUTES); InvokeUtil.doUninterruptibly(() -> { if (!cancelSubscriber.sync(span)) { @@ -491,6 +492,7 @@ public abstract class ActiveEntityEventsListener implements IActiveEntityControl forceStop(subscriber, ie); Thread.currentThread().interrupt(); } catch (Throwable e) { +LOGGER.error("forcing active job stop due to", e); forceStop(subscriber, e); } finally { Thread.currentThread().setName(nameBefore);