[36/50] [abbrv] hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.
YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59d5af21 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59d5af21 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59d5af21 Branch: refs/heads/HDFS-13532 Commit: 59d5af21b7a8f52e8c89cbc2d25fe3d449b2657a Parents: cc80ac2 Author: Giovanni Matteo Fumarola Authored: Mon Oct 1 13:12:38 2018 -0700 Committer: Giovanni Matteo Fumarola Committed: Mon Oct 1 13:12:38 2018 -0700 -- .../hadoop/yarn/server/AMRMClientRelayer.java | 25 ++-- .../yarn/server/TestAMRMClientRelayer.java | 25 .../amrmproxy/FederationInterceptor.java| 12 ++ 3 files changed, 60 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java index 2621d3e..ca045d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java @@ -237,6 +237,27 @@ public class AMRMClientRelayer extends AbstractService return this.rmClient.registerApplicationMaster(request); } + /** + * After an RM failover, there might be more than one + * allocate/finishApplicationMaster call thread (due to RPC timeout and retry) + * doing the auto re-register concurrently. As a result, we need to swallow + * the already register exception thrown by the new RM. + */ + private void reRegisterApplicationMaster( + RegisterApplicationMasterRequest request) + throws YarnException, IOException { +try { + registerApplicationMaster(request); +} catch (InvalidApplicationMasterRequestException e) { + if (e.getMessage() + .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) { +LOG.info("Concurrent thread successfully re-registered, moving on."); + } else { +throw e; + } +} + } + @Override public FinishApplicationMasterResponse finishApplicationMaster( FinishApplicationMasterRequest request) @@ -247,7 +268,7 @@ public class AMRMClientRelayer extends AbstractService LOG.warn("Out of sync with RM " + rmId + " for " + this.appId + ", hence resyncing."); // re register with RM - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); return finishApplicationMaster(request); } } @@ -381,7 +402,7 @@ public class AMRMClientRelayer extends AbstractService } // re-register with RM, then retry allocate recursively - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); // Reset responseId after re-register allocateRequest.setResponseId(0); allocateResponse = allocate(allocateRequest); http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java index 2c016d7..fa46960 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java @@ -64,6 +64,11 @@ public class TestAMRMClientRelayer { // Whether this mockRM will throw failover exception upon next heartbeat // from AM private boolean failover = false; + +// Whether this mockRM will throw application
hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.
Repository: hadoop Updated Branches: refs/heads/branch-2 d7d0e55e0 -> 57caab6f9 YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/57caab6f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/57caab6f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/57caab6f Branch: refs/heads/branch-2 Commit: 57caab6f9b66d52d6df2d95e21e8854398489631 Parents: d7d0e55 Author: Giovanni Matteo Fumarola Authored: Mon Oct 1 13:12:38 2018 -0700 Committer: Giovanni Matteo Fumarola Committed: Mon Oct 1 14:27:01 2018 -0700 -- .../hadoop/yarn/server/AMRMClientRelayer.java | 25 ++-- .../yarn/server/TestAMRMClientRelayer.java | 25 .../amrmproxy/FederationInterceptor.java| 12 ++ 3 files changed, 60 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/57caab6f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java index a7ed373..790147c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java @@ -232,6 +232,27 @@ public class AMRMClientRelayer extends AbstractService return this.rmClient.registerApplicationMaster(request); } + /** + * After an RM failover, there might be more than one + * allocate/finishApplicationMaster call thread (due to RPC timeout and retry) + * doing the auto re-register concurrently. As a result, we need to swallow + * the already register exception thrown by the new RM. + */ + private void reRegisterApplicationMaster( + RegisterApplicationMasterRequest request) + throws YarnException, IOException { +try { + registerApplicationMaster(request); +} catch (InvalidApplicationMasterRequestException e) { + if (e.getMessage() + .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) { +LOG.info("Concurrent thread successfully re-registered, moving on."); + } else { +throw e; + } +} + } + @Override public FinishApplicationMasterResponse finishApplicationMaster( FinishApplicationMasterRequest request) @@ -242,7 +263,7 @@ public class AMRMClientRelayer extends AbstractService LOG.warn("Out of sync with RM " + rmId + " for " + this.appId + ", hence resyncing."); // re register with RM - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); return finishApplicationMaster(request); } } @@ -363,7 +384,7 @@ public class AMRMClientRelayer extends AbstractService } // re-register with RM, then retry allocate recursively - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); // Reset responseId after re-register allocateRequest.setResponseId(0); allocateResponse = allocate(allocateRequest); http://git-wip-us.apache.org/repos/asf/hadoop/blob/57caab6f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java index 2c016d7..fa46960 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java @@ -64,6 +64,11 @@ public class TestAMRMClientRelayer { // Whether this mockRM will throw failover exception upon next heartbeat // from AM
hadoop git commit: YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang.
Repository: hadoop Updated Branches: refs/heads/trunk cc80ac231 -> 59d5af21b YARN-8760. [AMRMProxy] Fix concurrent re-register due to YarnRM failover in AMRMClientRelayer. Contributed by Botong Huang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/59d5af21 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/59d5af21 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/59d5af21 Branch: refs/heads/trunk Commit: 59d5af21b7a8f52e8c89cbc2d25fe3d449b2657a Parents: cc80ac2 Author: Giovanni Matteo Fumarola Authored: Mon Oct 1 13:12:38 2018 -0700 Committer: Giovanni Matteo Fumarola Committed: Mon Oct 1 13:12:38 2018 -0700 -- .../hadoop/yarn/server/AMRMClientRelayer.java | 25 ++-- .../yarn/server/TestAMRMClientRelayer.java | 25 .../amrmproxy/FederationInterceptor.java| 12 ++ 3 files changed, 60 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java index 2621d3e..ca045d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/AMRMClientRelayer.java @@ -237,6 +237,27 @@ public class AMRMClientRelayer extends AbstractService return this.rmClient.registerApplicationMaster(request); } + /** + * After an RM failover, there might be more than one + * allocate/finishApplicationMaster call thread (due to RPC timeout and retry) + * doing the auto re-register concurrently. As a result, we need to swallow + * the already register exception thrown by the new RM. + */ + private void reRegisterApplicationMaster( + RegisterApplicationMasterRequest request) + throws YarnException, IOException { +try { + registerApplicationMaster(request); +} catch (InvalidApplicationMasterRequestException e) { + if (e.getMessage() + .contains(AMRMClientUtils.APP_ALREADY_REGISTERED_MESSAGE)) { +LOG.info("Concurrent thread successfully re-registered, moving on."); + } else { +throw e; + } +} + } + @Override public FinishApplicationMasterResponse finishApplicationMaster( FinishApplicationMasterRequest request) @@ -247,7 +268,7 @@ public class AMRMClientRelayer extends AbstractService LOG.warn("Out of sync with RM " + rmId + " for " + this.appId + ", hence resyncing."); // re register with RM - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); return finishApplicationMaster(request); } } @@ -381,7 +402,7 @@ public class AMRMClientRelayer extends AbstractService } // re-register with RM, then retry allocate recursively - registerApplicationMaster(this.amRegistrationRequest); + reRegisterApplicationMaster(this.amRegistrationRequest); // Reset responseId after re-register allocateRequest.setResponseId(0); allocateResponse = allocate(allocateRequest); http://git-wip-us.apache.org/repos/asf/hadoop/blob/59d5af21/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java index 2c016d7..fa46960 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/TestAMRMClientRelayer.java @@ -64,6 +64,11 @@ public class TestAMRMClientRelayer { // Whether this mockRM will throw failover exception upon next heartbeat // from AM private