Zakelly commented on code in PR #22590: URL: https://github.com/apache/flink/pull/22590#discussion_r1223773671
########## flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/filemerging/FileMergingSnapshotManager.java: ########## @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.checkpoint.filemerging; + +import org.apache.flink.api.common.TaskInfo; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.apache.flink.runtime.state.CheckpointedStateScope; +import org.apache.flink.runtime.state.TaskStateManager; +import org.apache.flink.runtime.state.filesystem.FsCheckpointStorageAccess; + +import java.io.Closeable; + +/** + * FileMergingSnapshotManager provides an interface to manage files and meta information for + * checkpoint files with merging checkpoint files enabled. FileMergingSnapshotManager resides on the + * TM side. + * + * <p>TODO (FLINK-32073): create output stream. + * + * <p>TODO (FLINK-32075): leverage checkpoint notification to delete logical files. + */ +public interface FileMergingSnapshotManager extends Closeable { + + /** + * Initialize the file system, recording the checkpoint path the manager should work with. + * + * <pre> + * The layout of checkpoint directory: + * /user-defined-checkpoint-dir + * /{job-id} (checkpointBaseDir) + * | + * + --shared/ + * | + * + --subtask-1/ + * + -- merged shared state files + * + --subtask-2/ + * + -- merged shared state files + * + --taskowned/ + * + -- merged private state files + * + --chk-1/ + * + --chk-2/ + * + --chk-3/ + * </pre> + * + * <p>The reason why initializing directories in this method instead of the constructor is that + * the FileMergingSnapshotManager itself belongs to the {@link TaskStateManager}, which is + * initialized when receiving a task, while the base directories for checkpoint are created by + * {@link FsCheckpointStorageAccess} when the state backend initializing. After the checkpoint + * directories are initialized, the managed subdirectories are initialized here. + * + * <p>Note: This method may be called several times, the implementation should ensure + * idempotency, and throw {@link IllegalArgumentException} when any of the path in params change + * across function calls. + * + * @param fileSystem The filesystem to write to. + * @param checkpointBaseDir The base directory for checkpoints. + * @param sharedStateDir The directory for shared checkpoint data. + * @param taskOwnedStateDir The name of the directory for state not owned/released by the + * master, but by the TaskManagers. + * @throws IllegalArgumentException thrown if these three paths are not deterministic across + * calls. + */ + void initFileSystem( + FileSystem fileSystem, + Path checkpointBaseDir, + Path sharedStateDir, + Path taskOwnedStateDir); + + /** + * Register a subtask and create the managed directory for shared states. + * + * @param subtaskKey the subtask key identifying a subtask. + * @see #initFileSystem for layout information. + */ + void registerSubtaskForSharedStates(SubtaskKey subtaskKey); + + /** + * Get the managed directory of the file-merging snapshot manager, created in {@link + * #initFileSystem} or {@link #registerSubtaskForSharedStates}. + * + * @param subtaskKey the subtask key identifying the subtask. + * @param scope the checkpoint scope. + * @return the managed directory for one subtask in specified checkpoint scope. + */ + Path getManagedDir(SubtaskKey subtaskKey, CheckpointedStateScope scope); + + /** A key identifies a subtask. */ + final class SubtaskKey { + final String taskName; + final int subtaskIndex; + final int parallelism; + + final int hashCode; + + SubtaskKey(TaskInfo taskInfo) { + this.taskName = taskInfo.getTaskName(); + this.subtaskIndex = taskInfo.getIndexOfThisSubtask(); + this.parallelism = taskInfo.getNumberOfParallelSubtasks(); + int hash = taskName.hashCode(); + hash = 31 * hash + subtaskIndex; + hash = 31 * hash + parallelism; + this.hashCode = hash; + } + + SubtaskKey(String taskName, int subtaskIndex, int parallelism) { + this.taskName = taskName; + this.subtaskIndex = subtaskIndex; + this.parallelism = parallelism; + int hash = taskName.hashCode(); + hash = 31 * hash + subtaskIndex; + hash = 31 * hash + parallelism; + this.hashCode = hash; + } Review Comment: 1. It is used in construction of DUMMY_SUBTASK_KEY 2. I let the above one to all this one. 3. The static function `of` removed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
