Zakelly commented on code in PR #22590:
URL: https://github.com/apache/flink/pull/22590#discussion_r1223773671


##########
flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/filemerging/FileMergingSnapshotManager.java:
##########
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.checkpoint.filemerging;
+
+import org.apache.flink.api.common.TaskInfo;
+import org.apache.flink.core.fs.FileSystem;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.runtime.state.CheckpointedStateScope;
+import org.apache.flink.runtime.state.TaskStateManager;
+import org.apache.flink.runtime.state.filesystem.FsCheckpointStorageAccess;
+
+import java.io.Closeable;
+
+/**
+ * FileMergingSnapshotManager provides an interface to manage files and meta 
information for
+ * checkpoint files with merging checkpoint files enabled. 
FileMergingSnapshotManager resides on the
+ * TM side.
+ *
+ * <p>TODO (FLINK-32073): create output stream.
+ *
+ * <p>TODO (FLINK-32075): leverage checkpoint notification to delete logical 
files.
+ */
+public interface FileMergingSnapshotManager extends Closeable {
+
+    /**
+     * Initialize the file system, recording the checkpoint path the manager 
should work with.
+     *
+     * <pre>
+     * The layout of checkpoint directory:
+     * /user-defined-checkpoint-dir
+     *     /{job-id} (checkpointBaseDir)
+     *         |
+     *         + --shared/
+     *             |
+     *             + --subtask-1/
+     *                 + -- merged shared state files
+     *             + --subtask-2/
+     *                 + -- merged shared state files
+     *         + --taskowned/
+     *             + -- merged private state files
+     *         + --chk-1/
+     *         + --chk-2/
+     *         + --chk-3/
+     * </pre>
+     *
+     * <p>The reason why initializing directories in this method instead of 
the constructor is that
+     * the FileMergingSnapshotManager itself belongs to the {@link 
TaskStateManager}, which is
+     * initialized when receiving a task, while the base directories for 
checkpoint are created by
+     * {@link FsCheckpointStorageAccess} when the state backend initializing. 
After the checkpoint
+     * directories are initialized, the managed subdirectories are initialized 
here.
+     *
+     * <p>Note: This method may be called several times, the implementation 
should ensure
+     * idempotency, and throw {@link IllegalArgumentException} when any of the 
path in params change
+     * across function calls.
+     *
+     * @param fileSystem The filesystem to write to.
+     * @param checkpointBaseDir The base directory for checkpoints.
+     * @param sharedStateDir The directory for shared checkpoint data.
+     * @param taskOwnedStateDir The name of the directory for state not 
owned/released by the
+     *     master, but by the TaskManagers.
+     * @throws IllegalArgumentException thrown if these three paths are not 
deterministic across
+     *     calls.
+     */
+    void initFileSystem(
+            FileSystem fileSystem,
+            Path checkpointBaseDir,
+            Path sharedStateDir,
+            Path taskOwnedStateDir);
+
+    /**
+     * Register a subtask and create the managed directory for shared states.
+     *
+     * @param subtaskKey the subtask key identifying a subtask.
+     * @see #initFileSystem for layout information.
+     */
+    void registerSubtaskForSharedStates(SubtaskKey subtaskKey);
+
+    /**
+     * Get the managed directory of the file-merging snapshot manager, created 
in {@link
+     * #initFileSystem} or {@link #registerSubtaskForSharedStates}.
+     *
+     * @param subtaskKey the subtask key identifying the subtask.
+     * @param scope the checkpoint scope.
+     * @return the managed directory for one subtask in specified checkpoint 
scope.
+     */
+    Path getManagedDir(SubtaskKey subtaskKey, CheckpointedStateScope scope);
+
+    /** A key identifies a subtask. */
+    final class SubtaskKey {
+        final String taskName;
+        final int subtaskIndex;
+        final int parallelism;
+
+        final int hashCode;
+
+        SubtaskKey(TaskInfo taskInfo) {
+            this.taskName = taskInfo.getTaskName();
+            this.subtaskIndex = taskInfo.getIndexOfThisSubtask();
+            this.parallelism = taskInfo.getNumberOfParallelSubtasks();
+            int hash = taskName.hashCode();
+            hash = 31 * hash + subtaskIndex;
+            hash = 31 * hash + parallelism;
+            this.hashCode = hash;
+        }
+
+        SubtaskKey(String taskName, int subtaskIndex, int parallelism) {
+            this.taskName = taskName;
+            this.subtaskIndex = subtaskIndex;
+            this.parallelism = parallelism;
+            int hash = taskName.hashCode();
+            hash = 31 * hash + subtaskIndex;
+            hash = 31 * hash + parallelism;
+            this.hashCode = hash;
+        }

Review Comment:
   1. It is used in construction of DUMMY_SUBTASK_KEY
   2. I let the above one to all this one.
   3. The static function `of` removed.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to