Author: tgraves Date: Mon May 21 20:06:40 2012 New Revision: 1341184 URL: http://svn.apache.org/viewvc?rev=1341184&view=rev Log: merge -r 1341161:1341162 from branch-2. FIXES: MAPREDUCE-3870
Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/CHANGES.txt Mon May 21 20:06:40 2012 @@ -208,6 +208,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4269. documentation: Gridmix has javadoc warnings in StressJobFactory (Jonathon Eagles via tgraves). + MAPREDUCE-3870. Invalid App Metrics + (Bhallamudi Venkata Siva Kamesh via tgraves). + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java Mon May 21 20:06:40 2012 @@ -49,7 +49,7 @@ public class QueueMetrics { @Metric("# of pending apps") MutableGaugeInt appsPending; @Metric("# of apps completed") MutableCounterInt appsCompleted; @Metric("# of apps killed") MutableCounterInt appsKilled; - @Metric("# of apps failed") MutableCounterInt appsFailed; + @Metric("# of apps failed") MutableGaugeInt appsFailed; @Metric("Allocated memory in MB") MutableGaugeInt allocatedMB; @Metric("# of allocated containers") MutableGaugeInt allocatedContainers; @@ -131,15 +131,19 @@ public class QueueMetrics { return metrics; } - public void submitApp(String user) { - appsSubmitted.incr(); + public void submitApp(String user, int attemptId) { + if (attemptId == 1) { + appsSubmitted.incr(); + } else { + appsFailed.decr(); + } appsPending.incr(); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { - userMetrics.submitApp(user); + userMetrics.submitApp(user, attemptId); } if (parent != null) { - parent.submitApp(user); + parent.submitApp(user, attemptId); } } Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java Mon May 21 20:06:40 2012 @@ -631,9 +631,7 @@ public class LeafQueue implements CSQueu } int attemptId = application.getApplicationAttemptId().getAttemptId(); - if (attemptId == 1) { - metrics.submitApp(userName); - } + metrics.submitApp(userName, attemptId); // Inform the parent queue try { Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java Mon May 21 20:06:40 2012 @@ -295,9 +295,7 @@ public class FifoScheduler implements Re new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager, this.rmContext, null); applications.put(appAttemptId, schedulerApp); - if (appAttemptId.getAttemptId() == 1) { - metrics.submitApp(user); - } + metrics.submitApp(user, appAttemptId.getAttemptId()); LOG.info("Application Submission: " + appAttemptId.getApplicationId() + " from " + user + ", currently active: " + applications.size()); rmContext.getDispatcher().getEventHandler().handle( Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java Mon May 21 20:06:40 2012 @@ -31,9 +31,12 @@ import org.apache.hadoop.metrics2.Metric import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; public class TestQueueMetrics { @@ -49,7 +52,7 @@ public class TestQueueMetrics { MetricsSource queueSource= queueSource(ms, queueName); AppSchedulingInfo app = mockApp(user); - metrics.submitApp(user); + metrics.submitApp(user, 1); MetricsSource userSource = userSource(ms, queueName, user); checkApps(queueSource, 1, 1, 0, 0, 0, 0); @@ -72,6 +75,52 @@ public class TestQueueMetrics { checkApps(queueSource, 1, 0, 0, 1, 0, 0); assertNull(userSource); } + + @Test + public void testQueueAppMetricsForMultipleFailures() { + String queueName = "single"; + String user = "alice"; + + QueueMetrics metrics = QueueMetrics.forQueue(ms, queueName, null, false); + MetricsSource queueSource = queueSource(ms, queueName); + AppSchedulingInfo app = mockApp(user); + + metrics.submitApp(user, 1); + MetricsSource userSource = userSource(ms, queueName, user); + checkApps(queueSource, 1, 1, 0, 0, 0, 0); + + metrics.incrAppsRunning(user); + checkApps(queueSource, 1, 0, 1, 0, 0, 0); + + metrics.finishApp(app, RMAppAttemptState.FAILED); + checkApps(queueSource, 1, 0, 0, 0, 1, 0); + + // As the application has failed, framework retries the same application + // based on configuration + metrics.submitApp(user, 2); + checkApps(queueSource, 1, 1, 0, 0, 0, 0); + + metrics.incrAppsRunning(user); + checkApps(queueSource, 1, 0, 1, 0, 0, 0); + + // Suppose say application has failed this time as well. + metrics.finishApp(app, RMAppAttemptState.FAILED); + checkApps(queueSource, 1, 0, 0, 0, 1, 0); + + // As the application has failed, framework retries the same application + // based on configuration + metrics.submitApp(user, 3); + checkApps(queueSource, 1, 1, 0, 0, 0, 0); + + metrics.incrAppsRunning(user); + checkApps(queueSource, 1, 0, 1, 0, 0, 0); + + // Suppose say application has finished. + metrics.finishApp(app, RMAppAttemptState.FINISHED); + checkApps(queueSource, 1, 0, 0, 1, 0, 0); + + assertNull(userSource); + } @Test public void testSingleQueueWithUserMetrics() { String queueName = "single2"; @@ -81,7 +130,7 @@ public class TestQueueMetrics { MetricsSource queueSource = queueSource(ms, queueName); AppSchedulingInfo app = mockApp(user); - metrics.submitApp(user); + metrics.submitApp(user, 1); MetricsSource userSource = userSource(ms, queueName, user); checkApps(queueSource, 1, 1, 0, 0, 0, 0); @@ -127,7 +176,7 @@ public class TestQueueMetrics { MetricsSource queueSource = queueSource(ms, leafQueueName); AppSchedulingInfo app = mockApp(user); - metrics.submitApp(user); + metrics.submitApp(user, 1); MetricsSource userSource = userSource(ms, leafQueueName, user); MetricsSource parentUserSource = userSource(ms, parentQueueName, user); @@ -180,7 +229,7 @@ public class TestQueueMetrics { assertGauge("AppsPending", pending, rb); assertGauge("AppsRunning", running, rb); assertCounter("AppsCompleted", completed, rb); - assertCounter("AppsFailed", failed, rb); + assertGauge("AppsFailed", failed, rb); assertCounter("AppsKilled", killed, rb); } @@ -203,6 +252,9 @@ public class TestQueueMetrics { private static AppSchedulingInfo mockApp(String user) { AppSchedulingInfo app = mock(AppSchedulingInfo.class); when(app.getUser()).thenReturn(user); + ApplicationId appId = BuilderUtils.newApplicationId(1, 1); + ApplicationAttemptId id = BuilderUtils.newApplicationAttemptId(appId, 1); + when(app.getApplicationAttemptId()).thenReturn(id); return app; } Modified: hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java?rev=1341184&r1=1341183&r2=1341184&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java Mon May 21 20:06:40 2012 @@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.factories. import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; @@ -62,6 +63,7 @@ import org.apache.hadoop.yarn.server.res import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -88,7 +90,8 @@ public class TestLeafQueue { @Before public void setUp() throws Exception { - cs = new CapacityScheduler(); + CapacityScheduler spyCs = new CapacityScheduler(); + cs = spy(spyCs); rmContext = TestUtils.getMockRMContext(); csConf = @@ -306,6 +309,14 @@ public class TestLeafQueue { SchedulerApp app_0 = new SchedulerApp(appAttemptId_0, user_0, a, null, rmContext, null); a.submitApplication(app_0, user_0, B); + + when(cs.getApplication(appAttemptId_0)).thenReturn(app_0); + AppRemovedSchedulerEvent event = new AppRemovedSchedulerEvent( + appAttemptId_0, RMAppAttemptState.FAILED); + cs.handle(event); + + assertEquals(0, a.getMetrics().getAppsPending()); + assertEquals(1, a.getMetrics().getAppsFailed()); // Attempt the same application again final ApplicationAttemptId appAttemptId_1 = TestUtils @@ -316,6 +327,16 @@ public class TestLeafQueue { assertEquals(1, a.getMetrics().getAppsSubmitted()); assertEquals(1, a.getMetrics().getAppsPending()); + + when(cs.getApplication(appAttemptId_1)).thenReturn(app_0); + event = new AppRemovedSchedulerEvent(appAttemptId_0, + RMAppAttemptState.FINISHED); + cs.handle(event); + + assertEquals(1, a.getMetrics().getAppsSubmitted()); + assertEquals(0, a.getMetrics().getAppsPending()); + assertEquals(0, a.getMetrics().getAppsFailed()); + assertEquals(1, a.getMetrics().getAppsCompleted()); QueueMetrics userMetrics = a.getMetrics().getUserMetrics(user_0); assertEquals(1, userMetrics.getAppsSubmitted());