This is an automated email from the ASF dual-hosted git repository.
wangda pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new a685ffe YARN-9194. Invalid event: REGISTERED and LAUNCH_FAILED at
FAILED, and NullPointerException happens in RM while shutdown a NM. (lujie via
wangda)
a685ffe is described below
commit a685ffe9a9d5cf073faa2625d1125839fb57bae2
Author: Wangda Tan <[email protected]>
AuthorDate: Thu Jan 17 14:20:10 2019 -0800
YARN-9194. Invalid event: REGISTERED and LAUNCH_FAILED at FAILED, and
NullPointerException happens in RM while shutdown a NM. (lujie via wangda)
Change-Id: I4359f59a73a278a941f4bb9d106dd38c9cb471fe
(cherry picked from commit 6d7eedfd28cc1712690db2f6ca8a281b0901ee28)
(cherry picked from commit fe7cb2d84ac160c5fed00640d85e2c5c4c6d2412)
---
.../rmapp/attempt/RMAppAttemptImpl.java | 14 +++-
.../rmapp/attempt/TestRMAppAttemptTransitions.java | 80 +++++++++++++++++++++-
2 files changed, 90 insertions(+), 4 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index bb43693..feb690f 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -437,9 +437,11 @@ public class RMAppAttemptImpl implements RMAppAttempt,
Recoverable {
RMAppAttemptState.FAILED,
EnumSet.of(
RMAppAttemptEventType.LAUNCHED,
+ RMAppAttemptEventType.LAUNCH_FAILED,
RMAppAttemptEventType.EXPIRE,
RMAppAttemptEventType.KILL,
RMAppAttemptEventType.FAIL,
+ RMAppAttemptEventType.REGISTERED,
RMAppAttemptEventType.UNREGISTERED,
RMAppAttemptEventType.STATUS_UPDATE,
RMAppAttemptEventType.CONTAINER_ALLOCATED))
@@ -1203,10 +1205,16 @@ public class RMAppAttemptImpl implements RMAppAttempt,
Recoverable {
}
// Set the masterContainer
- appAttempt.setMasterContainer(amContainerAllocation.getContainers()
- .get(0));
+ Container amContainer = amContainerAllocation.getContainers().get(0);
RMContainerImpl rmMasterContainer = (RMContainerImpl)appAttempt.scheduler
- .getRMContainer(appAttempt.getMasterContainer().getId());
+ .getRMContainer(amContainer.getId());
+ //while one NM is removed, the scheduler will clean the container,the
+ //following CONTAINER_FINISHED event will handle the cleaned container.
+ //so just return RMAppAttemptState.SCHEDULED
+ if (rmMasterContainer == null) {
+ return RMAppAttemptState.SCHEDULED;
+ }
+ appAttempt.setMasterContainer(amContainer);
rmMasterContainer.setAMContainer(true);
// The node set in NMTokenSecrentManager is used for marking whether the
// NMToken has been issued for this node to the AM.
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 4a5c671..faecdb4 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -986,7 +986,7 @@ public class TestRMAppAttemptTransitions {
public void testAttemptAddedAtFinalSaving() {
submitApplicationAttempt();
- // SUBNITED->FINAL_SAVING
+ // SUBMITTED->FINAL_SAVING
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
.getAppAttemptId(), RMAppAttemptEventType.KILL));
assertEquals(RMAppAttemptState.FINAL_SAVING,
@@ -999,6 +999,56 @@ public class TestRMAppAttemptTransitions {
applicationAttempt.getAppAttemptState());
}
+ @Test(timeout = 10000)
+ public void testAttemptRegisteredAtFailed() {
+ Container amContainer = allocateApplicationAttempt();
+ launchApplicationAttempt(amContainer);
+
+ //send CONTAINER_FINISHED event
+ NodeId anyNodeId = NodeId.newInstance("host", 1234);
+ applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
+ applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
+ amContainer.getId(), ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
+ assertEquals(RMAppAttemptState.FINAL_SAVING,
+ applicationAttempt.getAppAttemptState());
+
+ sendAttemptUpdateSavedEvent(applicationAttempt);
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+
+ //send REGISTERED event
+ applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
+ .getAppAttemptId(), RMAppAttemptEventType.REGISTERED));
+
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+ }
+
+ @Test
+ public void testAttemptLaunchFailedAtFailed() {
+ Container amContainer = allocateApplicationAttempt();
+ launchApplicationAttempt(amContainer);
+ //send CONTAINER_FINISHED event
+ NodeId anyNodeId = NodeId.newInstance("host", 1234);
+ applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
+ applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
+ amContainer.getId(), ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
+ assertEquals(RMAppAttemptState.FINAL_SAVING,
+ applicationAttempt.getAppAttemptState());
+ sendAttemptUpdateSavedEvent(applicationAttempt);
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+
+ //send LAUNCH_FAILED event
+ applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
+ .getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED));
+
+ assertEquals(RMAppAttemptState.FAILED,
+ applicationAttempt.getAppAttemptState());
+ }
+
@Test
public void testAMCrashAtAllocated() {
Container amContainer = allocateApplicationAttempt();
@@ -1598,6 +1648,34 @@ public class TestRMAppAttemptTransitions {
assertTrue(found);
}
+ @Test
+ public void testContainerRemovedBeforeAllocate() {
+ scheduleApplicationAttempt();
+
+ // Mock the allocation of AM container
+ Container container = mock(Container.class);
+ Resource resource = BuilderUtils.newResource(2048, 1);
+ when(container.getId()).thenReturn(
+ BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1));
+ when(container.getResource()).thenReturn(resource);
+ Allocation allocation = mock(Allocation.class);
+ when(allocation.getContainers()).
+ thenReturn(Collections.singletonList(container));
+ when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class),
+ any(List.class), any(List.class), any(List.class), any(List.class),
+ any(ContainerUpdates.class))).
+ thenReturn(allocation);
+
+ //container removed, so return null
+ when(scheduler.getRMContainer(container.getId())).
+ thenReturn(null);
+
+ applicationAttempt.handle(
+ new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
+ RMAppAttemptEventType.CONTAINER_ALLOCATED));
+ assertEquals(RMAppAttemptState.SCHEDULED,
+ applicationAttempt.getAppAttemptState());
+ }
@SuppressWarnings("deprecation")
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]