Repository: ambari Updated Branches: refs/heads/trunk 05de8adfc -> 64f4f66b7
AMBARI-10331. BE issue: install repo version is stuck in INSTALLING after sudden server restart (dlysnichenko) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/64f4f66b Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/64f4f66b Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/64f4f66b Branch: refs/heads/trunk Commit: 64f4f66b72cfdee330230963b73049533e4212c0 Parents: 05de8ad Author: Lisnichenko Dmitro <[email protected]> Authored: Thu Apr 2 22:04:30 2015 +0300 Committer: Lisnichenko Dmitro <[email protected]> Committed: Thu Apr 2 22:05:06 2015 +0300 ---------------------------------------------------------------------- .../server/actionmanager/ActionScheduler.java | 53 ++++++++++++++------ 1 file changed, 39 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/64f4f66b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java index 1a49289..9c16b10 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java @@ -579,6 +579,9 @@ class ActionScheduler implements Runnable { // Abort the command itself // We don't need to send CANCEL_COMMANDs in this case db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message); + if (c.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { + processActionDeath(cluster.getClusterName(), c.getHostname(), roleStr); + } status = HostRoleStatus.ABORTED; } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout)) { // Process command timeouts @@ -591,6 +594,9 @@ class ActionScheduler implements Runnable { if (null != cluster) { transitionToFailedState(cluster.getClusterName(), c.getServiceName(), roleStr, host, now, false); + if (c.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { + processActionDeath(cluster.getClusterName(), c.getHostname(), roleStr); + } } // Dequeue command @@ -628,7 +634,13 @@ class ActionScheduler implements Runnable { for(ExecutionCommandWrapper wrapper : commandWrappers) { ExecutionCommand c = wrapper.getExecutionCommand(); transitionToFailedState(stage.getClusterName(), c.getServiceName(), - c.getRole(), hostName, now, true); + c.getRole(), hostName, now, true); + if (c.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { + String clusterName = c.getClusterName(); + processActionDeath(clusterName, + c.getHostname(), + c.getRole()); + } } } db.abortOperation(stage.getRequestId()); @@ -889,23 +901,36 @@ class ActionScheduler implements Runnable { // If host role is an Action, we have to send an event if (hostRoleCommand.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) { String clusterName = hostRoleCommand.getExecutionCommandWrapper().getExecutionCommand().getClusterName(); - try { - // Usually clusterId is defined (except the awkward case when - // "Distribute repositories/install packages" action has been issued - // against a concrete host without binding to a cluster) - Long clusterId = clusterName != null ? - fsmObject.getCluster(clusterName).getClusterId() : null; - ActionFinalReportReceivedEvent event = new ActionFinalReportReceivedEvent( - clusterId, hostRoleCommand.getHostName(), null, - hostRoleCommand.getRole().name()); - ambariEventPublisher.publish(event); - } catch (AmbariException e) { - LOG.error(String.format("Can not get cluster %s", clusterName), e); - } + processActionDeath(clusterName, + hostRoleCommand.getHostName(), + hostRoleCommand.getRole().name()); } } } + + /** + * Attempts to process kill/timeout/abort of action and send + * appropriate event to all listeners + */ + private void processActionDeath(String clusterName, + String hostname, + String role) { + try { + // Usually clusterId is defined (except the awkward case when + // "Distribute repositories/install packages" action has been issued + // against a concrete host without binding to a cluster) + Long clusterId = clusterName != null ? + fsmObject.getCluster(clusterName).getClusterId() : null; + ActionFinalReportReceivedEvent event = new ActionFinalReportReceivedEvent( + clusterId, hostname, null, + role); + ambariEventPublisher.publish(event); + } catch (AmbariException e) { + LOG.error(String.format("Can not get cluster %s", clusterName), e); + } + } + private void updateRoleStats(HostRoleStatus status, RoleStats rs) { switch (status) { case COMPLETED:
