XComp commented on code in PR #27921:
URL: https://github.com/apache/flink/pull/27921#discussion_r3356270379
##########
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptive/Executing.java:
##########
@@ -182,6 +190,106 @@ public ScheduledFuture<?> scheduleOperation(Runnable
callback, Duration delay) {
return context.runIfState(this, callback, delay);
}
+ @Override
+ public void requestActiveCheckpointTrigger() {
+ if (!activeCheckpointTriggerEnabled) {
+ return;
+ }
+ final CheckpointCoordinator checkpointCoordinator =
+ getExecutionGraph().getCheckpointCoordinator();
+ if (checkpointCoordinator == null
+ || !checkpointCoordinator.isPeriodicCheckpointingConfigured())
{
+ getLogger()
+ .debug(
+ "Skipping active checkpoint trigger for rescale:
checkpointing not configured.");
+ return;
+ }
+
+ final Optional<Duration> triggerDelay =
+ checkpointCoordinator.getActiveCheckpointTriggerDelay();
+ if (triggerDelay.isEmpty()) {
+ getLogger()
+ .debug(
+ "Skipping active checkpoint trigger for rescale:
checkpoint already in progress.");
+ return;
+ }
+ scheduleActiveCheckpointTriggerRetry(triggerDelay.get());
+ }
+
+ private void scheduleActiveCheckpointTriggerRetry(Duration delay) {
+ if (activeCheckpointTriggerScheduled) {
+ return;
+ }
+ activeCheckpointTriggerScheduled = true;
+ if (!delay.isZero()) {
+ getLogger()
+ .debug(
+ "Min pause not satisfied, scheduling active
checkpoint trigger retry in {} ms.",
+ delay.toMillis());
+ }
+ context.runIfState(this, this::tryFireActiveCheckpointAfterRetry,
delay);
+ }
+
+ private void tryFireActiveCheckpointAfterRetry() {
+ activeCheckpointTriggerScheduled = false;
+
+ // Parallelism is the only guard re-evaluated here: it can change
between the request
+ // and the scheduled fire (e.g. resources changed again, or the
parallelism was reverted
+ // back to the current value while we waited for min-pause). The null
check and
+ // periodic-checkpoint config are invariants validated at request time.
+ if (!parallelismChanged()) {
+ getLogger()
+ .debug("Active checkpoint trigger for rescale dropped:
parallelism unchanged.");
+ return;
+ }
+ final CheckpointCoordinator checkpointCoordinator =
+ Preconditions.checkNotNull(
+ getExecutionGraph().getCheckpointCoordinator(),
+ "Checkpoint coordinator was non-null when the trigger
was scheduled; "
+ + "an Executing state never drops its
coordinator.");
+ final Optional<Duration> triggerDelay =
+ checkpointCoordinator.getActiveCheckpointTriggerDelay();
+ if (triggerDelay.isEmpty()) {
+ getLogger()
+ .debug(
+ "Active checkpoint trigger for rescale dropped:
checkpoint already in progress after retry.");
+ } else if (triggerDelay.get().isZero()) {
+ fireActiveCheckpointTrigger(checkpointCoordinator);
+ } else {
+ getLogger()
+ .debug(
+ "Active checkpoint trigger for rescale silently
dropped: a periodic checkpoint completed while the trigger was scheduled.");
+ }
+ }
+
+ private void fireActiveCheckpointTrigger(CheckpointCoordinator
checkpointCoordinator) {
+ Preconditions.checkState(
+ activeCheckpointTriggerEnabled,
+ "Active checkpoint trigger fired while the feature is
disabled.");
+ activeCheckpointTriggerScheduled = false;
Review Comment:
Isn't that one obsolete? We're setting it already in the calling method. 🤔
##########
flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java:
##########
@@ -1999,6 +2000,29 @@ public long getCheckpointTimeout() {
return checkpointTimeout;
}
+ /**
+ * Returns the remaining {@link Duration} until {@code
minPauseBetweenCheckpoints} is satisfied
+ * for a new active-trigger checkpoint, computed from the time elapsed
since the last completed
+ * checkpoint (or from the coordinator clock's epoch when no checkpoint
has completed yet —
+ * which is normally far in the past in production). {@link Duration#ZERO}
means the trigger can
+ * fire immediately.
+ *
+ * <p>Returns {@link Optional#empty()} as a fallback if a checkpoint is
already in flight
+ * (triggering or pending), in which case no active trigger should be
scheduled.
+ *
+ * <p>All checks are made under the coordinator lock.
Review Comment:
that's an implementation details that doesn't need to be exposed via JavaDoc
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]