YARN-4596. SystemMetricPublisher should not swallow error messages from TimelineClient#putEntities. Contributed by Li Lu
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f3858511 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f3858511 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f3858511 Branch: refs/heads/HDFS-1312 Commit: f385851141522633184ce394899c659af5ace92a Parents: 8bc93db Author: Jian He <[email protected]> Authored: Mon Jan 18 16:58:39 2016 -0800 Committer: Jian He <[email protected]> Committed: Mon Jan 18 16:58:39 2016 -0800 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../distributedshell/ApplicationMaster.java | 25 +++++++++++++++++--- .../metrics/SystemMetricsPublisher.java | 14 ++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/f3858511/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7b88e4e..902c188 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1277,6 +1277,9 @@ Release 2.8.0 - UNRELEASED YARN-4502. Fix two AM containers get allocated when AM restart. (Vinod Kumar Vavilapalli via wangda) + YARN-4596. SystemMetricPublisher should not swallow error messages from + TimelineClient#putEntities. (Li Lu via jianhe) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/f3858511/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index f410c43..95dbddc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -1140,7 +1140,8 @@ public class ApplicationMaster { ugi.doAs(new PrivilegedExceptionAction<TimelinePutResponse>() { @Override public TimelinePutResponse run() throws Exception { - return timelineClient.putEntities(entity); + return processTimelineResponseErrors( + timelineClient.putEntities(entity)); } }); } catch (Exception e) { @@ -1165,7 +1166,8 @@ public class ApplicationMaster { event.addEventInfo("Exit Status", container.getExitStatus()); entity.addEvent(event); try { - timelineClient.putEntities(entity); + TimelinePutResponse response = timelineClient.putEntities(entity); + processTimelineResponseErrors(response); } catch (YarnException | IOException e) { LOG.error("Container end event could not be published for " + container.getContainerId().toString(), e); @@ -1185,7 +1187,8 @@ public class ApplicationMaster { event.setTimestamp(System.currentTimeMillis()); entity.addEvent(event); try { - timelineClient.putEntities(entity); + TimelinePutResponse response = timelineClient.putEntities(entity); + processTimelineResponseErrors(response); } catch (YarnException | IOException e) { LOG.error("App Attempt " + (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end") @@ -1194,6 +1197,22 @@ public class ApplicationMaster { } } + private static TimelinePutResponse processTimelineResponseErrors( + TimelinePutResponse response) { + List<TimelinePutResponse.TimelinePutError> errors = response.getErrors(); + if (errors.size() == 0) { + LOG.debug("Timeline entities are successfully put"); + } else { + for (TimelinePutResponse.TimelinePutError error : errors) { + LOG.error( + "Error when publishing entity [" + error.getEntityType() + "," + + error.getEntityId() + "], server side error code: " + + error.getErrorCode()); + } + } + return response; + } + RMCallbackHandler getRMCallbackHandler() { return new RMCallbackHandler(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f3858511/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java index f240660..84a3b19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; +import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse; import org.apache.hadoop.yarn.client.api.TimelineClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -500,7 +501,18 @@ public class SystemMetricsPublisher extends CompositeService { LOG.debug("Publishing the entity " + entity.getEntityId() + ", JSON-style content: " + TimelineUtils.dumpTimelineRecordtoJSON(entity)); } - client.putEntities(entity); + TimelinePutResponse response = client.putEntities(entity); + List<TimelinePutResponse.TimelinePutError> errors = response.getErrors(); + if (errors.size() == 0) { + LOG.debug("Timeline entities are successfully put"); + } else { + for (TimelinePutResponse.TimelinePutError error : errors) { + LOG.error( + "Error when publishing entity [" + error.getEntityType() + "," + + error.getEntityId() + "], server side error code: " + + error.getErrorCode()); + } + } } catch (Exception e) { LOG.error("Error when publishing entity [" + entity.getEntityType() + "," + entity.getEntityId() + "]", e);
