[asterixdb] branch master updated: [NO ISSUE][ACTIVE] Account for force stop while suspending

2021-09-14 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
 new 5e11053  [NO ISSUE][ACTIVE] Account for force stop while suspending
 new bc8aaf6  Merge branch 'cheshire-cat' into master
5e11053 is described below

commit 5e110538870c9385f316e0395b80a76f0e6d8e7c
Author: Murtadha Hubail 
AuthorDate: Fri Sep 10 01:36:34 2021 +0300

[NO ISSUE][ACTIVE] Account for force stop while suspending

- user model changes: no
- storage format changes: no
- interface changes: no

Details:

- When a failure happens while trying to suspend ingestion,
  we will force stop the active job. If the job completes
  ungracefully, we set the listener state to TEMPORARILY_FAILED.
  However, since force to stop only waits for STOPPED state,
  the thread waiting for ingestion to be suspended will wait
  forever. This change accounts for such case and makes
  the force stop waits for TEMPORARILY_FAILED too.

Change-Id: Ib33f191be2b84d97a08e3bc6d607b0edbf35bed1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13144
Tested-by: Jenkins 
Integration-Tests: Jenkins 
Reviewed-by: Murtadha Hubail 
Reviewed-by: Ali Alsuliman 
(cherry picked from commit eed8714ae56bd61656750bf543181e7dd68c26c1)
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13203
---
 .../org/apache/asterix/app/active/ActiveEntityEventsListener.java | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
index 0242ecd..ddd3d64 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
@@ -195,7 +195,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 @SuppressWarnings("unchecked")
 protected void finish(ActiveEvent event) throws HyracksDataException {
 if (LOGGER.isEnabled(level)) {
-LOGGER.log(level, "the job " + jobId + " finished");
+LOGGER.log(level, "the job {} finished", jobId);
 }
 JobId lastJobId = jobId;
 if (numRegistered != numDeRegistered) {
@@ -208,7 +208,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 JobStatus jobStatus = status.getLeft();
 List exceptions = status.getRight();
 if (LOGGER.isEnabled(level)) {
-LOGGER.log(level, "The job finished with status: " + jobStatus);
+LOGGER.log(level, "The job finished with status: {}", jobStatus);
 }
 if (!jobSuccessfullyTerminated(jobStatus)) {
 jobFailure = exceptions.isEmpty() ? new 
RuntimeDataException(ErrorCode.UNREPORTED_TASK_FAILURE_EXCEPTION)
@@ -440,8 +440,9 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 
 private void cancelJob(Throwable th) {
 cancelJobSafely(metadataProvider, th);
+// we can come here due to a failure while in suspending state
 final WaitForStateSubscriber cancelSubscriber =
-new WaitForStateSubscriber(this, 
EnumSet.of(ActivityState.STOPPED));
+new WaitForStateSubscriber(this, 
EnumSet.of(ActivityState.STOPPED, ActivityState.TEMPORARILY_FAILED));
 final Span span = Span.start(2, TimeUnit.MINUTES);
 InvokeUtil.doUninterruptibly(() -> {
 if (!cancelSubscriber.sync(span)) {
@@ -491,6 +492,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 forceStop(subscriber, ie);
 Thread.currentThread().interrupt();
 } catch (Throwable e) {
+LOGGER.error("forcing active job stop due to", e);
 forceStop(subscriber, e);
 } finally {
 Thread.currentThread().setName(nameBefore);


[asterixdb] branch master updated: [NO ISSUE][ACTIVE] Account for force stop while suspending

2021-09-10 Thread mhubail
This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
 new eed8714  [NO ISSUE][ACTIVE] Account for force stop while suspending
eed8714 is described below

commit eed8714ae56bd61656750bf543181e7dd68c26c1
Author: Murtadha Hubail 
AuthorDate: Fri Sep 10 01:36:34 2021 +0300

[NO ISSUE][ACTIVE] Account for force stop while suspending

- user model changes: no
- storage format changes: no
- interface changes: no

Details:

- When a failure happens while trying to suspend ingestion,
  we will force stop the active job. If the job completes
  ungracefully, we set the listener state to TEMPORARILY_FAILED.
  However, since force to stop only waits for STOPPED state,
  the thread waiting for ingestion to be suspended will wait
  forever. This change accounts for such case and makes
  the force stop waits for TEMPORARILY_FAILED too.

Change-Id: Ib33f191be2b84d97a08e3bc6d607b0edbf35bed1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13144
Tested-by: Jenkins 
Integration-Tests: Jenkins 
Reviewed-by: Murtadha Hubail 
Reviewed-by: Ali Alsuliman 
---
 .../org/apache/asterix/app/active/ActiveEntityEventsListener.java | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
index 0242ecd..ddd3d64 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveEntityEventsListener.java
@@ -195,7 +195,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 @SuppressWarnings("unchecked")
 protected void finish(ActiveEvent event) throws HyracksDataException {
 if (LOGGER.isEnabled(level)) {
-LOGGER.log(level, "the job " + jobId + " finished");
+LOGGER.log(level, "the job {} finished", jobId);
 }
 JobId lastJobId = jobId;
 if (numRegistered != numDeRegistered) {
@@ -208,7 +208,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 JobStatus jobStatus = status.getLeft();
 List exceptions = status.getRight();
 if (LOGGER.isEnabled(level)) {
-LOGGER.log(level, "The job finished with status: " + jobStatus);
+LOGGER.log(level, "The job finished with status: {}", jobStatus);
 }
 if (!jobSuccessfullyTerminated(jobStatus)) {
 jobFailure = exceptions.isEmpty() ? new 
RuntimeDataException(ErrorCode.UNREPORTED_TASK_FAILURE_EXCEPTION)
@@ -440,8 +440,9 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 
 private void cancelJob(Throwable th) {
 cancelJobSafely(metadataProvider, th);
+// we can come here due to a failure while in suspending state
 final WaitForStateSubscriber cancelSubscriber =
-new WaitForStateSubscriber(this, 
EnumSet.of(ActivityState.STOPPED));
+new WaitForStateSubscriber(this, 
EnumSet.of(ActivityState.STOPPED, ActivityState.TEMPORARILY_FAILED));
 final Span span = Span.start(2, TimeUnit.MINUTES);
 InvokeUtil.doUninterruptibly(() -> {
 if (!cancelSubscriber.sync(span)) {
@@ -491,6 +492,7 @@ public abstract class ActiveEntityEventsListener implements 
IActiveEntityControl
 forceStop(subscriber, ie);
 Thread.currentThread().interrupt();
 } catch (Throwable e) {
+LOGGER.error("forcing active job stop due to", e);
 forceStop(subscriber, e);
 } finally {
 Thread.currentThread().setName(nameBefore);