Repository: ambari Updated Branches: refs/heads/branch-2.1 b7d74258f -> 2eaac094c
AMBARI-6690 - Hosts emit : Host Role Invalid State Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/277b9311 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/277b9311 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/277b9311 Branch: refs/heads/branch-2.1 Commit: 277b93114ad592c905a7149b85bfa76dfdd90902 Parents: b7d7425 Author: Artem Baranchuk <[email protected]> Authored: Wed Jun 17 01:37:25 2015 +0300 Committer: Artem Baranchuk <[email protected]> Committed: Wed Jun 17 01:37:25 2015 +0300 ---------------------------------------------------------------------- .../server/actionmanager/ActionDBAccessor.java | 2 +- .../actionmanager/ActionDBAccessorImpl.java | 13 ++++---- .../server/actionmanager/ActionScheduler.java | 11 +++---- .../org/apache/ambari/server/state/Cluster.java | 4 +-- .../server/state/cluster/ClusterImpl.java | 32 +++++++++++--------- 5 files changed, 33 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/277b9311/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java index 907c90a..873261f 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java @@ -144,7 +144,7 @@ public interface ActionDBAccessor { /** * Bulk abort commands */ - void bulkAbortHostRole(Stage s, List<ExecutionCommand> commands); + void bulkAbortHostRole(Stage s, Map<ExecutionCommand, String> commands); /** * Updates scheduled stage. http://git-wip-us.apache.org/repos/asf/ambari/blob/277b9311/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java index 959ed2d..51b2f09 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java @@ -458,13 +458,12 @@ public class ActionDBAccessorImpl implements ActionDBAccessor { @Override public void abortHostRole(String host, long requestId, long stageId, String role) { - String reason = "Host Role in invalid state"; + String reason = String.format("On host %s role %s in invalid state.", host, role); abortHostRole(host, requestId, stageId, role, reason); } @Override - public void abortHostRole(String host, long requestId, long stageId, - String role, String reason) { + public void abortHostRole(String host, long requestId, long stageId, String role, String reason) { CommandReport report = new CommandReport(); report.setExitCode(999); report.setStdErr(reason); @@ -489,9 +488,11 @@ public class ActionDBAccessorImpl implements ActionDBAccessor { @Override @Transactional - public void bulkAbortHostRole(Stage s, List<ExecutionCommand> commands) { - for (ExecutionCommand command : commands) { - abortHostRole(command.getHostname(), s.getRequestId(), s.getStageId(), command.getRole()); + public void bulkAbortHostRole(Stage s, Map<ExecutionCommand, String> commands) { + for (ExecutionCommand command : commands.keySet()) { + String reason = String.format("On host %s role %s in invalid state.\n%s", + command.getHostname(), command.getRole(), commands.get(command)); + abortHostRole(command.getHostname(), s.getRequestId(), s.getStageId(), command.getRole(), reason); } } http://git-wip-us.apache.org/repos/asf/ambari/blob/277b9311/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java index 8ccf622..562a5ca 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java @@ -335,13 +335,12 @@ class ActionScheduler implements Runnable { //Multimap is analog of Map<Object, List<Object>> but allows to avoid nested loop ListMultimap<String, ServiceComponentHostEvent> eventMap = formEventMap(stage, commandsToStart); - List<ExecutionCommand> commandsToAbort = new ArrayList<ExecutionCommand>(); + Map<ExecutionCommand, String> commandsToAbort = new HashMap<ExecutionCommand, String>(); if (!eventMap.isEmpty()) { LOG.debug("==> processing {} serviceComponentHostEvents...", eventMap.size()); Cluster cluster = fsmObject.getCluster(stage.getClusterName()); if (cluster != null) { - List<ServiceComponentHostEvent> failedEvents = - cluster.processServiceComponentHostEvents(eventMap); + Map<ServiceComponentHostEvent, String> failedEvents = cluster.processServiceComponentHostEvents(eventMap); if (failedEvents.size() > 0) { LOG.error("==> {} events failed.", failedEvents.size()); @@ -349,11 +348,11 @@ class ActionScheduler implements Runnable { for (Iterator<ExecutionCommand> iterator = commandsToUpdate.iterator(); iterator.hasNext(); ) { ExecutionCommand cmd = iterator.next(); - for (ServiceComponentHostEvent event : failedEvents) { + for (ServiceComponentHostEvent event : failedEvents.keySet()) { if (StringUtils.equals(event.getHostName(), cmd.getHostname()) && StringUtils.equals(event.getServiceComponentName(), cmd.getRole())) { iterator.remove(); - commandsToAbort.add(cmd); + commandsToAbort.put(cmd, failedEvents.get(event)); break; } } @@ -370,7 +369,7 @@ class ActionScheduler implements Runnable { LOG.debug("==> Aborting {} tasks...", commandsToAbort.size()); // Build a list of HostRoleCommands List<Long> taskIds = new ArrayList<Long>(); - for (ExecutionCommand command : commandsToAbort) { + for (ExecutionCommand command : commandsToAbort.keySet()) { taskIds.add(command.getTaskId()); } Collection<HostRoleCommand> hostRoleCommands = db.getTasks(taskIds); http://git-wip-us.apache.org/repos/asf/ambari/blob/277b9311/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java b/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java index 2e2cc29..12f96c4 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java @@ -501,9 +501,9 @@ public interface Cluster { * Bulk handle service component host events * * @param eventMap serviceName - event mapping - * @return list of failed events + * @return map of failed events where key is event and value is short message */ - List<ServiceComponentHostEvent> processServiceComponentHostEvents(ListMultimap<String, ServiceComponentHostEvent> eventMap); + Map<ServiceComponentHostEvent, String> processServiceComponentHostEvents(ListMultimap<String, ServiceComponentHostEvent> eventMap); /** * Determine whether or not access to this cluster resource should be allowed based http://git-wip-us.apache.org/repos/asf/ambari/blob/277b9311/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java index a49e87c..353b84d 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java @@ -2480,8 +2480,8 @@ public class ClusterImpl implements Cluster { @Transactional @Override - public List<ServiceComponentHostEvent> processServiceComponentHostEvents(ListMultimap<String, ServiceComponentHostEvent> eventMap) { - List<ServiceComponentHostEvent> failedEvents = new ArrayList<ServiceComponentHostEvent>(); + public Map<ServiceComponentHostEvent, String> processServiceComponentHostEvents(ListMultimap<String, ServiceComponentHostEvent> eventMap) { + Map<ServiceComponentHostEvent, String> failedEvents = new HashMap<ServiceComponentHostEvent, String>(); clusterGlobalLock.readLock().lock(); try { @@ -2494,24 +2494,28 @@ public class ClusterImpl implements Cluster { ServiceComponentHost serviceComponentHost = serviceComponent.getServiceComponentHost(event.getHostName()); serviceComponentHost.handleEvent(event); } catch (ServiceNotFoundException e) { - LOG.error(String.format("ServiceComponentHost lookup exception. Service not found for Service: %s. Error: %s", - serviceName, e.getMessage())); + String message = String.format("ServiceComponentHost lookup exception. Service not found for Service: %s. Error: %s", + serviceName, e.getMessage()); + LOG.error(message); e.printStackTrace(); - failedEvents.add(event); + failedEvents.put(event, message); } catch (ServiceComponentNotFoundException e) { - LOG.error(String.format("ServiceComponentHost lookup exception. Service Component not found for Service: %s, Component: %s. Error: %s", - serviceName, event.getServiceComponentName(), e.getMessage())); + String message = String.format("ServiceComponentHost lookup exception. Service Component not found for Service: %s, Component: %s. Error: %s", + serviceName, event.getServiceComponentName(), e.getMessage()); + LOG.error(message); e.printStackTrace(); - failedEvents.add(event); + failedEvents.put(event, message); } catch (ServiceComponentHostNotFoundException e) { - LOG.error(String.format("ServiceComponentHost lookup exception. Service Component Host not found for Service: %s, Component: %s, Host: %s. Error: %s", - serviceName, event.getServiceComponentName(), event.getHostName(), e.getMessage())); + String message = String.format("ServiceComponentHost lookup exception. Service Component Host not found for Service: %s, Component: %s, Host: %s. Error: %s", + serviceName, event.getServiceComponentName(), event.getHostName(), e.getMessage()); + LOG.error(message); e.printStackTrace(); - failedEvents.add(event); + failedEvents.put(event, message); } catch (AmbariException e) { - LOG.error("ServiceComponentHost lookup exception ", e.getMessage()); + String message = String.format("ServiceComponentHost lookup exception %s", e.getMessage()); + LOG.error(message); e.printStackTrace(); - failedEvents.add(event); + failedEvents.put(event, message); } catch (InvalidStateTransitionException e) { LOG.error("Invalid transition ", e); if ((e.getEvent() == ServiceComponentHostEventType.HOST_SVCCOMP_START) && @@ -2519,7 +2523,7 @@ public class ClusterImpl implements Cluster { LOG.warn("Component request for component = " + event.getServiceComponentName() + " to start is invalid, since component is already started. Ignoring this request."); // skip adding this as a failed event, to work around stack ordering issues with Hive } else { - failedEvents.add(event); + failedEvents.put(event, String.format("Invalid transition. %s", e.getMessage())); } } }
