[36/50] [abbrv] hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.

2018-10-02 Thread inigoiri
YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in 
AMRMClientRelayer. Contributed by Botong Huang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59d5af21
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59d5af21
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59d5af21

Branch: refs/heads/HDFS-13532
Commit: 59d5af21b7a8f52e8c89cbc2d25fe3d449b2657a
Parents: cc80ac2
Author: Giovanni Matteo Fumarola 
Authored: Mon Oct 1 13:12:38 2018 -0700
Committer: Giovanni Matteo Fumarola 
Committed: Mon Oct 1 13:12:38 2018 -0700

--
 .../hadoop/yarn/server/AMRMClientRelayer.java   | 25 ++--
 .../yarn/server/TestAMRMClientRelayer.java  | 25 
 .../amrmproxy/FederationInterceptor.java| 12 ++
 3 files changed, 60 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
index 2621d3e..ca045d1 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
@@ -237,6 +237,27 @@ public class AMRMClientRelayer extends AbstractService
 return this.rmClient.registerApplicationMaster(request);
   }
 
+  /**
+   * After an RM failover, there might be more than one
+   * allocate/finishApplicationMaster call thread (due to RPC timeout and 
retry)
+   * doing the auto re-register concurrently. As a result, we need to swallow
+   * the already register exception thrown by the new RM.
+   */
+  private void reRegisterApplicationMaster(
+  RegisterApplicationMasterRequest request)
+  throws YarnException, IOException {
+try {
+  registerApplicationMaster(request);
+} catch (InvalidApplicationMasterRequestException e) {
+  if (e.getMessage()
+  .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) {
+LOG.info("Concurrent thread successfully re-registered, moving on.");
+  } else {
+throw e;
+  }
+}
+  }
+
   @Override
   public FinishApplicationMasterResponse finishApplicationMaster(
   FinishApplicationMasterRequest request)
@@ -247,7 +268,7 @@ public class AMRMClientRelayer extends AbstractService
   LOG.warn("Out of sync with RM " + rmId
   + " for " + this.appId + ", hence resyncing.");
   // re register with RM
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   return finishApplicationMaster(request);
 }
   }
@@ -381,7 +402,7 @@ public class AMRMClientRelayer extends AbstractService
   }
 
   // re-register with RM, then retry allocate recursively
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   // Reset responseId after re-register
   allocateRequest.setResponseId(0);
   allocateResponse = allocate(allocateRequest);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
index 2c016d7..fa46960 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
@@ -64,6 +64,11 @@ public class TestAMRMClientRelayer {
 // Whether this mockRM will throw failover exception upon next heartbeat
 // from AM
 private boolean failover = false;
+
+// Whether this mockRM will throw application 

hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.

2018-10-01 Thread gifuma
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 d7d0e55e0 -> 57caab6f9


YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in 
AMRMClientRelayer. Contributed by Botong Huang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/57caab6f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/57caab6f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/57caab6f

Branch: refs/heads/branch-2
Commit: 57caab6f9b66d52d6df2d95e21e8854398489631
Parents: d7d0e55
Author: Giovanni Matteo Fumarola 
Authored: Mon Oct 1 13:12:38 2018 -0700
Committer: Giovanni Matteo Fumarola 
Committed: Mon Oct 1 14:27:01 2018 -0700

--
 .../hadoop/yarn/server/AMRMClientRelayer.java   | 25 ++--
 .../yarn/server/TestAMRMClientRelayer.java  | 25 
 .../amrmproxy/FederationInterceptor.java| 12 ++
 3 files changed, 60 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/57caab6f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
index a7ed373..790147c 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
@@ -232,6 +232,27 @@ public class AMRMClientRelayer extends AbstractService
 return this.rmClient.registerApplicationMaster(request);
   }
 
+  /**
+   * After an RM failover, there might be more than one
+   * allocate/finishApplicationMaster call thread (due to RPC timeout and 
retry)
+   * doing the auto re-register concurrently. As a result, we need to swallow
+   * the already register exception thrown by the new RM.
+   */
+  private void reRegisterApplicationMaster(
+  RegisterApplicationMasterRequest request)
+  throws YarnException, IOException {
+try {
+  registerApplicationMaster(request);
+} catch (InvalidApplicationMasterRequestException e) {
+  if (e.getMessage()
+  .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) {
+LOG.info("Concurrent thread successfully re-registered, moving on.");
+  } else {
+throw e;
+  }
+}
+  }
+
   @Override
   public FinishApplicationMasterResponse finishApplicationMaster(
   FinishApplicationMasterRequest request)
@@ -242,7 +263,7 @@ public class AMRMClientRelayer extends AbstractService
   LOG.warn("Out of sync with RM " + rmId
   + " for " + this.appId + ", hence resyncing.");
   // re register with RM
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   return finishApplicationMaster(request);
 }
   }
@@ -363,7 +384,7 @@ public class AMRMClientRelayer extends AbstractService
   }
 
   // re-register with RM, then retry allocate recursively
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   // Reset responseId after re-register
   allocateRequest.setResponseId(0);
   allocateResponse = allocate(allocateRequest);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/57caab6f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
index 2c016d7..fa46960 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
@@ -64,6 +64,11 @@ public class TestAMRMClientRelayer {
 // Whether this mockRM will throw failover exception upon next heartbeat
 // from AM
 

hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.

2018-10-01 Thread gifuma
Repository: hadoop
Updated Branches:
  refs/heads/trunk cc80ac231 -> 59d5af21b


YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in 
AMRMClientRelayer. Contributed by Botong Huang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59d5af21
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59d5af21
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59d5af21

Branch: refs/heads/trunk
Commit: 59d5af21b7a8f52e8c89cbc2d25fe3d449b2657a
Parents: cc80ac2
Author: Giovanni Matteo Fumarola 
Authored: Mon Oct 1 13:12:38 2018 -0700
Committer: Giovanni Matteo Fumarola 
Committed: Mon Oct 1 13:12:38 2018 -0700

--
 .../hadoop/yarn/server/AMRMClientRelayer.java   | 25 ++--
 .../yarn/server/TestAMRMClientRelayer.java  | 25 
 .../amrmproxy/FederationInterceptor.java| 12 ++
 3 files changed, 60 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
index 2621d3e..ca045d1 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java
@@ -237,6 +237,27 @@ public class AMRMClientRelayer extends AbstractService
 return this.rmClient.registerApplicationMaster(request);
   }
 
+  /**
+   * After an RM failover, there might be more than one
+   * allocate/finishApplicationMaster call thread (due to RPC timeout and 
retry)
+   * doing the auto re-register concurrently. As a result, we need to swallow
+   * the already register exception thrown by the new RM.
+   */
+  private void reRegisterApplicationMaster(
+  RegisterApplicationMasterRequest request)
+  throws YarnException, IOException {
+try {
+  registerApplicationMaster(request);
+} catch (InvalidApplicationMasterRequestException e) {
+  if (e.getMessage()
+  .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) {
+LOG.info("Concurrent thread successfully re-registered, moving on.");
+  } else {
+throw e;
+  }
+}
+  }
+
   @Override
   public FinishApplicationMasterResponse finishApplicationMaster(
   FinishApplicationMasterRequest request)
@@ -247,7 +268,7 @@ public class AMRMClientRelayer extends AbstractService
   LOG.warn("Out of sync with RM " + rmId
   + " for " + this.appId + ", hence resyncing.");
   // re register with RM
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   return finishApplicationMaster(request);
 }
   }
@@ -381,7 +402,7 @@ public class AMRMClientRelayer extends AbstractService
   }
 
   // re-register with RM, then retry allocate recursively
-  registerApplicationMaster(this.amRegistrationRequest);
+  reRegisterApplicationMaster(this.amRegistrationRequest);
   // Reset responseId after re-register
   allocateRequest.setResponseId(0);
   allocateResponse = allocate(allocateRequest);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
index 2c016d7..fa46960 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java
@@ -64,6 +64,11 @@ public class TestAMRMClientRelayer {
 // Whether this mockRM will throw failover exception upon next heartbeat
 // from AM
 private