HADOOP-12317. Applications fail on NM restart on some linux distro because NM container recovery declares AM container as LOST (adhoot via rkanter)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1e06299d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1e06299d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1e06299d Branch: refs/heads/YARN-1197 Commit: 1e06299df82b98795124fe8a33578c111e744ff4 Parents: 4e14f79 Author: Robert Kanter <rkan...@apache.org> Authored: Wed Aug 19 19:00:51 2015 -0700 Committer: Robert Kanter <rkan...@apache.org> Committed: Wed Aug 19 19:00:51 2015 -0700 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 4 ++ .../main/java/org/apache/hadoop/util/Shell.java | 11 ++++-- .../java/org/apache/hadoop/util/TestShell.java | 39 ++++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1e06299d/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d07adcb..943dbac 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1063,6 +1063,10 @@ Release 2.8.0 - UNRELEASED HADOOP-12322. typos in rpcmetrics.java. (Anu Engineer via Arpit Agarwal) + HADOOP-12317. Applications fail on NM restart on some linux distro + because NM container recovery declares AM container as LOST + (adhoot via rkanter) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/1e06299d/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index ed83e8d..e426955 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -212,13 +212,18 @@ abstract public class Shell { public static String[] getCheckProcessIsAliveCommand(String pid) { return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "isAlive", pid } : - new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid }; + isSetsidAvailable ? + new String[] { "kill", "-0", "--", "-" + pid } : + new String[] { "kill", "-0", pid }; } /** Return a command to send a signal to a given pid */ public static String[] getSignalKillCommand(int code, String pid) { - return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } : - new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid }; + return Shell.WINDOWS ? + new String[] { Shell.WINUTILS, "task", "kill", pid } : + isSetsidAvailable ? + new String[] { "kill", "-" + code, "--", "-" + pid } : + new String[] { "kill", "-" + code, pid }; } public static final String ENV_NAME_REGEX = "[A-Za-z_][A-Za-z0-9_]*"; http://git-wip-us.apache.org/repos/asf/hadoop/blob/1e06299d/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java index d9dc9ef..a96a0c8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java @@ -18,6 +18,7 @@ package org.apache.hadoop.util; import junit.framework.TestCase; +import org.junit.Assert; import java.io.BufferedReader; import java.io.File; @@ -150,6 +151,44 @@ public class TestShell extends TestCase { System.err.println("after: " + timersAfter); assertEquals(timersBefore, timersAfter); } + + public void testGetCheckProcessIsAliveCommand() throws Exception { + String anyPid = "9999"; + String[] checkProcessAliveCommand = Shell.getCheckProcessIsAliveCommand( + anyPid); + + String[] expectedCommand; + + if (Shell.WINDOWS) { + expectedCommand = + new String[]{ Shell.WINUTILS, "task", "isAlive", anyPid }; + } else if (Shell.isSetsidAvailable) { + expectedCommand = new String[]{ "kill", "-0", "--", "-" + anyPid }; + } else { + expectedCommand = new String[]{"kill", "-0", anyPid}; + } + Assert.assertArrayEquals(expectedCommand, checkProcessAliveCommand); + } + + public void testGetSignalKillCommand() throws Exception { + String anyPid = "9999"; + int anySignal = 9; + String[] checkProcessAliveCommand = Shell.getSignalKillCommand(anySignal, + anyPid); + + String[] expectedCommand; + if (Shell.WINDOWS) { + expectedCommand = + new String[]{ Shell.WINUTILS, "task", "kill", anyPid }; + } else if (Shell.isSetsidAvailable) { + expectedCommand = + new String[]{ "kill", "-" + anySignal, "--", "-" + anyPid }; + } else { + expectedCommand = + new String[]{ "kill", "-" + anySignal, anyPid }; + } + Assert.assertArrayEquals(expectedCommand, checkProcessAliveCommand); + } private void testInterval(long interval) throws IOException {