HIVE-15827: LLAP: status tool breaks out of watch mode when live instances is 0 (Prasanth Jayachandran reviewed by Sergey Shelukhin, Siddharth Seth)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95d0ce72 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95d0ce72 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95d0ce72 Branch: refs/heads/hive-14535 Commit: 95d0ce722457eca996d3736bd06f5d95d16bc471 Parents: b978c07 Author: Prasanth Jayachandran <[email protected]> Authored: Mon Feb 6 19:31:58 2017 -0800 Committer: Prasanth Jayachandran <[email protected]> Committed: Mon Feb 6 19:31:58 2017 -0800 ---------------------------------------------------------------------- .../hive/llap/cli/LlapStatusServiceDriver.java | 41 ++++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/95d0ce72/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java index b30f837..1b9eba6 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapStatusServiceDriver.java @@ -497,7 +497,7 @@ public class LlapStatusServiceDriver { Collection<ServiceInstance> serviceInstances; try { serviceInstances = llapRegistry.getInstances(watchTimeoutMs).getAll(); - } catch (IOException e) { + } catch (Exception e) { throw new LlapStatusCliException(ExitCode.LLAP_REGISTRY_ERROR, "Failed to get instances from llap registry", e); } @@ -541,7 +541,11 @@ public class LlapStatusServiceDriver { LOG.warn("Found more entries in LLAP registry, as compared to desired entries"); } } else { - appStatusBuilder.setState(State.RUNNING_PARTIAL); + if (validatedInstances.size() > 0) { + appStatusBuilder.setState(State.RUNNING_PARTIAL); + } else { + appStatusBuilder.setState(State.LAUNCHING); + } } // At this point, everything that can be consumed from AppStatusBuilder has been consumed. @@ -575,6 +579,8 @@ public class LlapStatusServiceDriver { private Long appStartTime; private Long appFinishTime; + private boolean runningThresholdAchieved = false; + private final List<LlapInstance> llapInstances = new LinkedList<>(); private transient Map<String, LlapInstance> containerToInstanceMap = new HashMap<>(); @@ -625,6 +631,11 @@ public class LlapStatusServiceDriver { return this; } + public AppStatusBuilder setRunningThresholdAchieved(boolean thresholdAchieved) { + this.runningThresholdAchieved = thresholdAchieved; + return this; + } + public LlapInstance removeAndgetLlapInstanceForContainer(String containerIdString) { return containerToInstanceMap.remove(containerIdString); } @@ -683,6 +694,10 @@ public class LlapStatusServiceDriver { return llapInstances; } + public boolean isRunningThresholdAchieved() { + return runningThresholdAchieved; + } + @JsonIgnore public AmInfo maybeCreateAndGetAmInfo() { if (amInfo == null) { @@ -994,7 +1009,7 @@ public class LlapStatusServiceDriver { // we have reached RUNNING state, now check if running nodes threshold is met final int liveInstances = statusServiceDriver.appStatusBuilder.getLiveInstances(); final int desiredInstances = statusServiceDriver.appStatusBuilder.getDesiredInstances(); - if (liveInstances > 0 && desiredInstances > 0) { + if (desiredInstances > 0) { final float ratio = (float) liveInstances / (float) desiredInstances; if (ratio < runningNodesThreshold) { LOG.warn("Waiting until running nodes threshold is reached. Current: {} Desired: {}." + @@ -1006,9 +1021,29 @@ public class LlapStatusServiceDriver { continue; } else { desiredStateAttained = true; + statusServiceDriver.appStatusBuilder.setRunningThresholdAchieved(true); } + } else { + numAttempts--; + continue; } } + } else if (ret == ExitCode.YARN_ERROR.getInt() && watchMode) { + LOG.warn("Watch mode enabled and got YARN error. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_CREATE_FAILED.getInt() && watchMode) { + LOG.warn("Watch mode enabled and slider client creation failed. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.SLIDER_CLIENT_ERROR_OTHER.getInt() && watchMode) { + LOG.warn("Watch mode enabled and got slider client error. Retrying.."); + numAttempts--; + continue; + } else if (ret == ExitCode.LLAP_REGISTRY_ERROR.getInt() && watchMode) { + LOG.warn("Watch mode enabled and got LLAP registry error. Retrying.."); + numAttempts--; + continue; } break; } finally {
