htran1 commented on a change in pull request #3035: URL: https://github.com/apache/incubator-gobblin/pull/3035#discussion_r440390208
########## File path: gobblin-runtime/src/main/java/org/apache/gobblin/runtime/mapreduce/MRJobLauncher.java ########## @@ -783,8 +786,26 @@ protected void setup(Context context) { if (Boolean.valueOf( configuration.get(ConfigurationKeys.METRICS_ENABLED_KEY, ConfigurationKeys.DEFAULT_METRICS_ENABLED))) { this.jobMetrics = Optional.of(JobMetrics.get(this.jobState)); - this.jobMetrics.get() - .startMetricReportingWithFileSuffix(gobblinJobState, context.getTaskAttemptID().toString()); + try { + this.jobMetrics.get() + .startMetricReportingWithFileSuffix(gobblinJobState, context.getTaskAttemptID().toString()); + } catch (MultiReporterException ex) { + //Fail the task if metric/event reporting failure is configured to be fatal. + boolean isMetricReportingFailureFatal = Boolean.valueOf(configuration + .get(ConfigurationKeys.GOBBLIN_TASK_METRIC_REPORTING_FAILURE_FATAL, + Boolean.toString(ConfigurationKeys.DEFAULT_GOBBLIN_TASK_METRIC_REPORTING_FAILURE_FATAL))); + boolean isEventReportingFailureFatal = Boolean.valueOf(configuration + .get(ConfigurationKeys.GOBBLIN_TASK_EVENT_REPORTING_FAILURE_FATAL, + Boolean.toString(ConfigurationKeys.DEFAULT_GOBBLIN_TASK_EVENT_REPORTING_FAILURE_FATAL))); + for (MetricReporterException e : ex.getExceptions()) { + if ((isMetricReportingFailureFatal && e.getReporterType().equals(ReporterType.METRIC)) || ( Review comment: Maybe the throw can be moved outside the loop so that all the errors are logged. ########## File path: gobblin-runtime/src/main/java/org/apache/gobblin/runtime/services/MetricsReportingService.java ########## @@ -21,25 +21,52 @@ import com.google.common.util.concurrent.AbstractIdleService; +import lombok.extern.slf4j.Slf4j; + import org.apache.gobblin.metrics.GobblinMetrics; +import org.apache.gobblin.metrics.MetricReporterException; +import org.apache.gobblin.metrics.MultiReporterException; +import org.apache.gobblin.metrics.ReporterType; +import org.apache.gobblin.util.PropertiesUtils; /** * A {@link com.google.common.util.concurrent.Service} for handling life cycle events around {@link GobblinMetrics}. */ +@Slf4j public class MetricsReportingService extends AbstractIdleService { + public static final String METRICS_REPORTING_FAILURE_FATAL_KEY = "metrics.reporting.failure.fatal"; + public static final String EVENT_REPORTING_FAILURE_FATAL_KEY = "event.reporting.failure.fatal"; + + public static final String DEFAULT_METRICS_REPORTING_FAILURE_FATAL = "false"; + public static final String DEFAULT_EVENT_REPORTING_FAILURE_FATAL = "false"; private final Properties properties; private final String appId; + private final boolean isMetricReportingFailureFatal; + private final boolean isEventReportingFailureFatal; public MetricsReportingService(Properties properties, String appId) { this.properties = properties; this.appId = appId; + this.isMetricReportingFailureFatal = PropertiesUtils.getPropAsBoolean(properties, METRICS_REPORTING_FAILURE_FATAL_KEY, DEFAULT_METRICS_REPORTING_FAILURE_FATAL); + this.isEventReportingFailureFatal = PropertiesUtils.getPropAsBoolean(properties, EVENT_REPORTING_FAILURE_FATAL_KEY, DEFAULT_EVENT_REPORTING_FAILURE_FATAL); } @Override protected void startUp() throws Exception { - GobblinMetrics.get(this.appId).startMetricReporting(this.properties); + try { + GobblinMetrics.get(this.appId).startMetricReporting(this.properties); + } catch (MultiReporterException ex) { + for (MetricReporterException e: ex.getExceptions()) { Review comment: This shows up three times, so maybe pull into a utility method. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org