This is an automated email from the ASF dual-hosted git repository. abstractdog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tez.git
The following commit(s) were added to refs/heads/master by this push: new 7855c1fe0 TEZ-4506: Report the node of a task attempt failure better. (#307) (Ayush Saxena reviewed by Laszlo Bodor) 7855c1fe0 is described below commit 7855c1fe07961a4c3999edf9776d60326ac35337 Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Fri Sep 1 14:46:23 2023 +0530 TEZ-4506: Report the node of a task attempt failure better. (#307) (Ayush Saxena reviewed by Laszlo Bodor) --- .../org/apache/tez/runtime/task/TaskReporter.java | 6 +++-- .../tez/runtime/task/TaskExecutionTestHelpers.java | 31 ++++++++++++++++------ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java index 81047a9f5..99d8bbca4 100644 --- a/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java +++ b/tez-runtime-internals/src/main/java/org/apache/tez/runtime/task/TaskReporter.java @@ -21,6 +21,7 @@ package org.apache.tez.runtime.task; import java.io.IOException; import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; +import java.net.InetAddress; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -401,9 +402,10 @@ public class TaskReporter implements TaskReporterInterface { if (!finalEventQueued.getAndSet(true)) { List<TezEvent> tezEvents = new ArrayList<TezEvent>(); if (diagnostics == null) { - diagnostics = ExceptionUtils.getStackTrace(t); + diagnostics = "Node: " + InetAddress.getLocalHost() + " : " + ExceptionUtils.getStackTrace(t); } else { - diagnostics = diagnostics + ":" + ExceptionUtils.getStackTrace(t); + diagnostics = + "Node: " + InetAddress.getLocalHost() + " : " + diagnostics + ":" + ExceptionUtils.getStackTrace(t); } if (isKilled) { tezEvents.add(new TezEvent(new TaskAttemptKilledEvent(diagnostics), diff --git a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java index b6000ccae..3e6790c6c 100644 --- a/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java +++ b/tez-runtime-internals/src/test/java/org/apache/tez/runtime/task/TaskExecutionTestHelpers.java @@ -47,6 +47,8 @@ import org.apache.tez.runtime.api.events.TaskAttemptKilledEvent; import org.apache.tez.runtime.api.impl.TezEvent; import org.apache.tez.runtime.api.impl.TezHeartbeatRequest; import org.apache.tez.runtime.api.impl.TezHeartbeatResponse; + +import org.junit.Assert; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -396,19 +398,20 @@ public final class TaskExecutionTestHelpers { for (TezEvent event : requestEvents) { if (event.getEvent() instanceof TaskAttemptFailedEvent) { TaskAttemptFailedEvent failedEvent = (TaskAttemptFailedEvent) event.getEvent(); - if (failedEvent.getDiagnostics().startsWith(diagStart)) { + String diagnostics = getDiagnosticsWithoutNodeIp(failedEvent.getDiagnostics()); + if (diagnostics.startsWith(diagStart)) { if (diagContains != null) { - if (failedEvent.getDiagnostics().contains(diagContains)) { + if (diagnostics.contains(diagContains)) { assertEquals(taskFailureType, failedEvent.getTaskFailureType()); return; } else { fail("Diagnostic message does not contain expected message. Found [" + - failedEvent.getDiagnostics() + "], Expected: [" + diagContains + "]"); + diagnostics + "], Expected: [" + diagContains + "]"); } } } else { fail("Diagnostic message does not start with expected message. Found [" + - failedEvent.getDiagnostics() + "], Expected: [" + diagStart + "]"); + diagnostics + "], Expected: [" + diagStart + "]"); } } } @@ -425,18 +428,19 @@ public final class TaskExecutionTestHelpers { if (event.getEvent() instanceof TaskAttemptKilledEvent) { TaskAttemptKilledEvent killedEvent = (TaskAttemptKilledEvent) event.getEvent(); - if (killedEvent.getDiagnostics().startsWith(diagStart)) { + String diagnostics = getDiagnosticsWithoutNodeIp(killedEvent.getDiagnostics()); + if (diagnostics.startsWith(diagStart)) { if (diagContains != null) { - if (killedEvent.getDiagnostics().contains(diagContains)) { + if (diagnostics.contains(diagContains)) { return; } else { fail("Diagnostic message does not contain expected message. Found [" + - killedEvent.getDiagnostics() + "], Expected: [" + diagContains + "]"); + diagnostics + "], Expected: [" + diagContains + "]"); } } } else { fail("Diagnostic message does not start with expected message. Found [" + - killedEvent.getDiagnostics() + "], Expected: [" + diagStart + "]"); + diagnostics + "], Expected: [" + diagStart + "]"); } } } @@ -518,6 +522,17 @@ public final class TaskExecutionTestHelpers { } } + private static String getDiagnosticsWithoutNodeIp(String diagnostics) { + String diagnosticsWithoutIP = diagnostics; + if (diagnostics != null && diagnostics.startsWith("Node:")) { + diagnosticsWithoutIP = diagnostics.substring(diagnostics.indexOf(" : ") + 3); + String nodeIp = diagnostics.substring(5, diagnostics.indexOf(" : ")); + Assert.assertFalse(nodeIp.isEmpty()); + } + + return diagnosticsWithoutIP; + } + @SuppressWarnings("deprecation") public static ContainerId createContainerId(ApplicationId appId) { ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);