zentol commented on a change in pull request #15049: URL: https://github.com/apache/flink/pull/15049#discussion_r594180402
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/rest/messages/JobExceptionsInfoWithHistory.java ########## @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.rest.messages; + +import org.apache.flink.runtime.rest.handler.job.JobExceptionsHandler; + +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnore; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import static org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * {@code JobExceptionsInfoWithHistory} extends {@link JobExceptionsInfo} providing a history of + * previously caused failures. It's the response type of the {@link JobExceptionsHandler}. + */ +public class JobExceptionsInfoWithHistory extends JobExceptionsInfo implements ResponseBody { + + public static final String FIELD_NAME_EXCEPTION_HISTORY = "exceptionHistory"; + + @JsonProperty(FIELD_NAME_EXCEPTION_HISTORY) + private final JobExceptionHistory exceptionHistory; + + @JsonCreator + public JobExceptionsInfoWithHistory( + @JsonProperty(FIELD_NAME_ROOT_EXCEPTION) String rootException, + @JsonProperty(FIELD_NAME_TIMESTAMP) Long rootTimestamp, + @JsonProperty(FIELD_NAME_ALL_EXCEPTIONS) List<ExecutionExceptionInfo> allExceptions, + @JsonProperty(FIELD_NAME_TRUNCATED) boolean truncated, + @JsonProperty(FIELD_NAME_EXCEPTION_HISTORY) JobExceptionHistory exceptionHistory) { + super(rootException, rootTimestamp, allExceptions, truncated); + this.exceptionHistory = exceptionHistory; + } + + public JobExceptionsInfoWithHistory() { + this( + null, + null, + Collections.emptyList(), + false, + new JobExceptionHistory(Collections.emptyList(), false)); + } + + @JsonIgnore + public JobExceptionHistory getExceptionHistory() { + return exceptionHistory; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + JobExceptionsInfoWithHistory that = (JobExceptionsInfoWithHistory) o; + return this.isTruncated() == that.isTruncated() + && Objects.equals(this.getRootException(), that.getRootException()) + && Objects.equals(this.getRootTimestamp(), that.getRootTimestamp()) + && Objects.equals(this.getAllExceptions(), that.getAllExceptions()) + && Objects.equals(exceptionHistory, that.exceptionHistory); + } + + @Override + public int hashCode() { + return Objects.hash( + isTruncated(), + getRootException(), + getRootTimestamp(), + getAllExceptions(), + exceptionHistory); + } + + @Override + public String toString() { + return new StringJoiner(", ", JobExceptionsInfoWithHistory.class.getSimpleName() + "[", "]") + .add("rootException='" + getRootException() + "'") + .add("rootTimestamp=" + getRootTimestamp()) + .add("allExceptions=" + getAllExceptions()) + .add("truncated=" + isTruncated()) + .add("exceptionHistory=" + exceptionHistory) + .toString(); + } + + /** {@code JobExceptionHistory} collects all previously caught errors. */ + public static final class JobExceptionHistory { + + public static final String FIELD_NAME_ENTRIES = "entries"; + public static final String FIELD_NAME_TRUNCATED = "truncated"; + + @JsonProperty(FIELD_NAME_ENTRIES) + private final List<ExceptionInfo> entries; + + @JsonProperty(FIELD_NAME_TRUNCATED) + private final boolean truncated; + + @JsonCreator + public JobExceptionHistory( + @JsonProperty(FIELD_NAME_ENTRIES) List<ExceptionInfo> entries, + @JsonProperty(FIELD_NAME_TRUNCATED) boolean truncated) { + this.entries = entries; + this.truncated = truncated; + } + + @JsonIgnore + public List<ExceptionInfo> getEntries() { + return entries; + } + + @JsonIgnore + public boolean isTruncated() { + return truncated; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + JobExceptionHistory that = (JobExceptionHistory) o; + return this.isTruncated() == that.isTruncated() + && Objects.equals(entries, that.entries); + } + + @Override + public int hashCode() { + return Objects.hash(entries, truncated); + } + + @Override + public String toString() { + return new StringJoiner(", ", JobExceptionHistory.class.getSimpleName() + "[", "]") + .add("entries=" + entries) + .add("truncated=" + truncated) + .toString(); + } + } + + /** Collects the information of a single exception. */ + public static class ExceptionInfo { + + public static final String FIELD_NAME_EXCEPTION_NAME = "exceptionName"; + public static final String FIELD_NAME_EXCEPTION_STACKTRACE = "stacktrace"; + public static final String FIELD_NAME_EXCEPTION_TIMESTAMP = "timestamp"; + public static final String FIELD_NAME_TASK_NAME = "taskName"; + public static final String FIELD_NAME_LOCATION = "location"; + + @JsonProperty(FIELD_NAME_EXCEPTION_NAME) + private final String exceptionName; + + @JsonProperty(FIELD_NAME_EXCEPTION_STACKTRACE) + private final String stacktrace; + + @JsonProperty(FIELD_NAME_EXCEPTION_TIMESTAMP) + private final Long timestamp; + + @JsonInclude(NON_NULL) + @JsonProperty(FIELD_NAME_TASK_NAME) + @Nullable + private final String taskName; + + @JsonInclude(NON_NULL) + @JsonProperty(FIELD_NAME_LOCATION) + @Nullable + private final String location; + + public ExceptionInfo(String exceptionName, String stacktrace, Long timestamp) { + this(exceptionName, stacktrace, timestamp, null, null); + } + + @JsonCreator + public ExceptionInfo( + @JsonProperty(FIELD_NAME_EXCEPTION_NAME) String exceptionName, + @JsonProperty(FIELD_NAME_EXCEPTION_STACKTRACE) String stacktrace, + @JsonProperty(FIELD_NAME_EXCEPTION_TIMESTAMP) Long timestamp, Review comment: why is this not a `long`? ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/rest/handler/job/JobExceptionsHandler.java ########## @@ -132,7 +142,75 @@ private static JobExceptionsInfo createJobExceptionsInfo( } } - return new JobExceptionsInfo( - rootExceptionMessage, rootTimestamp, taskExceptionList, truncated); + final ErrorInfo rootCause = executionGraph.getFailureInfo(); + return new JobExceptionsInfoWithHistory( + rootCause.getExceptionAsString(), + rootCause.getTimestamp(), + taskExceptionList, + truncated, + createJobExceptionHistory( + executionGraphInfo.getExceptionHistory(), exceptionToReportMaxSize)); + } + + static JobExceptionsInfoWithHistory.JobExceptionHistory createJobExceptionHistory( + Iterable<ExceptionHistoryEntry> historyEntries, int limit) { + // we need to reverse the history to have a stable result when doing paging on it + final List<ExceptionHistoryEntry> reversedHistoryEntries = new ArrayList<>(); + Iterables.addAll(reversedHistoryEntries, historyEntries); + Collections.reverse(reversedHistoryEntries); + + List<JobExceptionsInfoWithHistory.ExceptionInfo> exceptionHistoryEntries = + reversedHistoryEntries.stream() + .limit(limit) + .map(JobExceptionsHandler::createExceptionInfo) + .collect(Collectors.toList()); + + return new JobExceptionsInfoWithHistory.JobExceptionHistory( + exceptionHistoryEntries, + exceptionHistoryEntries.size() < reversedHistoryEntries.size()); + } + + private static JobExceptionsInfoWithHistory.ExceptionInfo createExceptionInfo( + ExceptionHistoryEntry historyEntry) { + if (historyEntry.isGlobal()) { + return new JobExceptionsInfoWithHistory.ExceptionInfo( + historyEntry.getException().getOriginalErrorClassName(), + historyEntry.getExceptionAsString(), + historyEntry.getTimestamp()); + } + + Preconditions.checkArgument( + historyEntry.getFailingTaskName() != null, + "The taskName must not be null for a non-global failure."); + Preconditions.checkArgument( + historyEntry.getTaskManagerLocation() != null, + "The location must not be null for a non-global failure."); + + return new JobExceptionsInfoWithHistory.ExceptionInfo( + historyEntry.getException().getOriginalErrorClassName(), + historyEntry.getExceptionAsString(), + historyEntry.getTimestamp(), + historyEntry.getFailingTaskName(), + toString(historyEntry.getTaskManagerLocation())); + } + + @VisibleForTesting + @Nullable + static String toString(@Nullable TaskManagerLocation location) { + return location != null + ? taskManagerLocationToString(location.getFQDNHostname(), location.dataPort()) + : "(unassigned)"; + } + + @VisibleForTesting + @Nullable + static String toString(@Nullable ExceptionHistoryEntry.ArchivedTaskManagerLocation location) { + return location != null + ? taskManagerLocationToString(location.getFQDNHostname(), location.getPort()) + : null; Review comment: would be good to document that this difference in behavior (compared to the old toString codepath) is intended. ########## File path: flink-runtime-web/web-dashboard/src/app/pages/job/exceptions/job-exceptions.component.html ########## @@ -40,21 +41,28 @@ <tr> <td nzShowExpand [(nzExpand)]="exception.expand"></td> <td>{{exception.timestamp | date:'yyyy-MM-dd HH:mm:ss'}}</td> + <td><div class="name">{{exception.exceptionName}}</div></td> <td> <div class="name"> - {{exception.task}} + {{exception.taskName || "(global failure)"}} </div> </td> - <td>{{exception.location}}</td> + <td>{{exception.location || "(unassigned)"}}</td> </tr> <tr [nzExpand]="exception.expand"> - <td colspan="6" class="expand-td"> - <flink-monaco-editor *ngIf="exception.expand" class="subtask" [value]="exception.exception"></flink-monaco-editor> + <td colspan="5" class="expand-td"> + <flink-monaco-editor *ngIf="exception.expand" class="subtask" [value]="exception.stacktrace"></flink-monaco-editor> </td> </tr> </ng-container> + <tr *ngIf="listOfException.length > 0"> + <td colspan="6"> + <i nz-icon nzType="info-circle" nzTheme="fill"></i> + <i>The exception history is limited to the most recent failures that caused parts of the job or the entire job to restart. The maximum history size can be configured through the Flink configuration.</i> + </td> Review comment: indentation is different to remaining file ########## File path: flink-runtime-web/web-dashboard/src/app/pages/job/exceptions/job-exceptions.component.html ########## @@ -40,21 +41,28 @@ <tr> <td nzShowExpand [(nzExpand)]="exception.expand"></td> <td>{{exception.timestamp | date:'yyyy-MM-dd HH:mm:ss'}}</td> + <td><div class="name">{{exception.exceptionName}}</div></td> Review comment: indentation ########## File path: flink-runtime-web/web-dashboard/src/app/pages/job/exceptions/job-exceptions.component.html ########## @@ -31,6 +31,7 @@ <tr> <th nzShowExpand></th> <th>Time</th> + <th>Exception</th> Review comment: indentation ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
