rkhachatryan commented on a change in pull request #12670: URL: https://github.com/apache/flink/pull/12670#discussion_r440907128
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java
##########
@@ -539,51 +541,62 @@ private void
startTriggeringCheckpoint(CheckpointTriggerRequest request) {
coordinatorsToCheckpoint, pendingCheckpoint, timer),
timer);
- FutureUtils.assertNoException(
- CompletableFuture.allOf(masterStatesComplete,
coordinatorCheckpointsComplete)
- .handleAsync(
- (ignored, throwable) -> {
- final PendingCheckpoint
checkpoint =
-
FutureUtils.getWithoutException(pendingCheckpointCompletableFuture);
-
-
Preconditions.checkState(
- checkpoint !=
null || throwable != null,
- "Either the
pending checkpoint needs to be created or an error must have been occurred.");
-
- if (throwable != null) {
- // the
initialization might not be finished yet
- if (checkpoint
== null) {
-
onTriggerFailure(request, throwable);
- } else {
-
onTriggerFailure(checkpoint, throwable);
- }
+ CompletableFuture
+ .allOf(masterStatesComplete,
coordinatorCheckpointsComplete)
+ .handleAsync(
+ (ignored, throwable) -> {
+ final PendingCheckpoint
checkpoint =
+
FutureUtils.getWithoutException(pendingCheckpointCompletableFuture);
+
+ Preconditions.checkState(
+ checkpoint != null ||
throwable != null,
+ "Either the pending
checkpoint needs to be created or an error must have been occurred.");
+
+ if (throwable != null) {
+ // the initialization
might not be finished yet
+ if (checkpoint == null)
{
+
onTriggerFailure(request, throwable);
} else {
- if
(checkpoint.isDiscarded()) {
-
onTriggerFailure(
-
checkpoint,
-
new CheckpointException(
-
CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE,
-
checkpoint.getFailureCause()));
- } else {
- // no
exception, no discarding, everything is OK
- final
long checkpointId = checkpoint.getCheckpointId();
-
snapshotTaskState(
-
timestamp,
-
checkpointId,
-
checkpoint.getCheckpointStorageLocation(),
-
request.props,
-
executions,
-
request.advanceToEndOfTime);
-
-
coordinatorsToCheckpoint.forEach((ctx) ->
ctx.afterSourceBarrierInjection(checkpointId));
-
-
onTriggerSuccess();
- }
+
onTriggerFailure(checkpoint, throwable);
+ }
+ } else {
+ if
(checkpoint.isDiscarded()) {
+
onTriggerFailure(
+
checkpoint,
+ new
CheckpointException(
+
CheckpointFailureReason.TRIGGER_CHECKPOINT_FAILURE,
+
checkpoint.getFailureCause()));
+ } else {
+ // no
exception, no discarding, everything is OK
+ final long
checkpointId = checkpoint.getCheckpointId();
+
snapshotTaskState(
+
timestamp,
+
checkpointId,
+
checkpoint.getCheckpointStorageLocation(),
+
request.props,
+
executions,
+
request.advanceToEndOfTime);
+
+
coordinatorsToCheckpoint.forEach((ctx) ->
ctx.afterSourceBarrierInjection(checkpointId));
+
+
onTriggerSuccess();
}
+ }
- return null;
- },
- timer));
+ return null;
+ },
+ timer)
+ .whenComplete((unused, error) -> {
+ if (error != null) {
+ if (!isShutdown()) {
+
failureManager.handleJobLevelCheckpointException(new
CheckpointException(EXCEPTION, error), Optional.empty());
+ } else if (error instanceof
RejectedExecutionException) {
+ LOG.debug("Execution
rejected during shutdown");
+ } else {
+ LOG.warn("Error
encountered during shutdown", error);
+ }
+ }
+ });
Review comment:
When publishing the PR I planned to add it later, but I've found it
difficult to reproduce the exact error. Do you have any idea how / whether to
test it?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
