echauchot commented on a change in pull request #13040:
URL: https://github.com/apache/flink/pull/13040#discussion_r481932290
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointsCleaner.java
##########
@@ -0,0 +1,86 @@
+package org.apache.flink.runtime.checkpoint;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.runtime.jobgraph.OperatorID;
+import org.apache.flink.runtime.state.CheckpointStorageLocation;
+import org.apache.flink.runtime.state.StateUtil;
+import org.apache.flink.util.Preconditions;
+import org.apache.flink.util.function.ThrowingConsumer;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Delegate class responsible for checkpoints cleaning and counting the number
of checkpoints yet
+ * to clean.
+ */
+public class CheckpointsCleaner {
+ /** The executor used for asynchronous calls, like potentially blocking
I/O. */
+ private final Executor executor;
+ AtomicInteger numberOfCheckpointsToClean;
+
+ Runnable checkpointCleaningFinishedCallback;
+
+ private static final Logger LOG =
LoggerFactory.getLogger(CheckpointsCleaner.class);
+
+ public CheckpointsCleaner(Executor executor) {
+ this.executor = executor;
+ this.numberOfCheckpointsToClean = new AtomicInteger(0);
+ }
+
+ public void setCheckpointCleaningFinishedCallback(Runnable
checkpointCleaningFinishedCallback) {
+ this.checkpointCleaningFinishedCallback =
checkpointCleaningFinishedCallback;
+ }
+
+ public int getNumberOfCheckpointsToClean() {
+ return numberOfCheckpointsToClean.get();
+ }
+
+ /**
+ * Asynchronously call a discard callback on on the ioExecutor
+ * (FixedThreadPool of configurable size of default 4*CPU cores)
+ * and count the number of checkpoints that are waiting to clean.
+ * @param completedCheckpoint the checkpoint to discard
+ * @param discardCallback the discard callback to call
+ */
+ public void
asyncDiscardCheckpointAndCountCheckpoints(CompletedCheckpoint
completedCheckpoint, ThrowingConsumer<CompletedCheckpoint, Exception>
discardCallback){
+ numberOfCheckpointsToClean.incrementAndGet();
+ executor.execute(() -> {
+ try {
+ discardCallback.accept(completedCheckpoint);
+ } catch (Exception e) {
+ LOG.warn("Could not discard completed
checkpoint {}.", completedCheckpoint.getCheckpointID(), e);
+ }
+ finally {
+ numberOfCheckpointsToClean.decrementAndGet();
+
Preconditions.checkNotNull(checkpointCleaningFinishedCallback);
+ checkpointCleaningFinishedCallback.run();
+ }
+ });
+ }
+
+ public void asyncDiscardPrivateStatesAndCountCheckpoints(
+ Map<OperatorID, OperatorState> operatorStates,
CheckpointStorageLocation targetLocation,
+ long checkpointId, JobID jobId) {
+ numberOfCheckpointsToClean.incrementAndGet();
+ executor.execute(() -> {
+ // discard the private states.
+ // unregistered shared states are still considered
private at this point.
+ try {
+
StateUtil.bestEffortDiscardAllStateObjects(operatorStates.values());
+ targetLocation.disposeOnFailure();
+ } catch (Throwable t) {
+ LOG.warn(
+ "Could not properly dispose the private
states in the pending checkpoint {} of job {}.",
+ checkpointId, jobId, t);
+ } finally {
+ operatorStates.clear();
+ numberOfCheckpointsToClean.decrementAndGet();
Review comment:
ok thanks for this, makes sense
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]