Repository: incubator-griffin Updated Branches: refs/heads/master 18fc4cf4c -> 3dda6b345
[GRIFFIN-197] Treat non-existing YARN app as FAILED This avoids jobs becoming stuck in UNKNOWN state on Service side. Also, improves logging for YARN client errors. Author: Nikolay Sokolov <chemika...@gmail.com> Closes #421 from chemikadze/GRIFFIN-197. Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-griffin/commit/3dda6b34 Tree: http://git-wip-us.apache.org/repos/asf/incubator-griffin/tree/3dda6b34 Diff: http://git-wip-us.apache.org/repos/asf/incubator-griffin/diff/3dda6b34 Branch: refs/heads/master Commit: 3dda6b3459d2b3d9f091545e49e156ca5f230e2d Parents: 18fc4cf Author: Nikolay Sokolov <chemika...@gmail.com> Authored: Sun Sep 30 15:09:16 2018 +0800 Committer: William Guo <gu...@apache.org> Committed: Sun Sep 30 15:09:16 2018 +0800 ---------------------------------------------------------------------- .../org/apache/griffin/core/util/YarnNetUtil.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-griffin/blob/3dda6b34/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java b/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java index f935aad..71308ab 100644 --- a/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java +++ b/service/src/main/java/org/apache/griffin/core/util/YarnNetUtil.java @@ -21,14 +21,17 @@ package org.apache.griffin.core.util; import com.google.gson.JsonObject; import com.google.gson.JsonParser; - import org.apache.commons.lang.StringUtils; import org.apache.griffin.core.job.entity.JobInstanceBean; import org.apache.griffin.core.job.entity.LivySessionStates; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.web.client.HttpClientErrorException; import org.springframework.web.client.RestTemplate; +import static org.apache.griffin.core.job.entity.LivySessionStates.State.DEAD; + public class YarnNetUtil { private static final Logger LOGGER = LoggerFactory .getLogger(YarnNetUtil.class); @@ -42,6 +45,9 @@ public class YarnNetUtil { + appId + "/state", "{\"state\": \"KILLED\"}"); } + } catch (HttpClientErrorException e) { + LOGGER.warn("client error {} from yarn: {}", + e.getMessage(), e.getResponseBodyAsString()); } catch (Exception e) { LOGGER.error("delete exception happens by yarn. {}", e); } @@ -56,6 +62,14 @@ public class YarnNetUtil { instance.setState(LivySessionStates.toLivyState(state)); } return true; + } catch (HttpClientErrorException e) { + LOGGER.warn("client error {} from yarn: {}", + e.getMessage(), e.getResponseBodyAsString()); + if (e.getStatusCode() == HttpStatus.NOT_FOUND) { + // in sync with Livy behavior, see com.cloudera.livy.utils.SparkYarnApp + instance.setState(DEAD); + return true; + } } catch (Exception e) { LOGGER.error("update exception happens by yarn. {}", e); }