[ https://issues.apache.org/jira/browse/FLINK-4939?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15628538#comment-15628538 ]
ASF GitHub Bot commented on FLINK-4939: --------------------------------------- Github user zentol commented on a diff in the pull request: https://github.com/apache/flink/pull/2707#discussion_r86108509 --- Diff: flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/operators/GenericWriteAheadSink.java --- @@ -77,63 +86,96 @@ public GenericWriteAheadSink(CheckpointCommitter committer, TypeSerializer<IN> s public void open() throws Exception { super.open(); committer.setOperatorId(id); - committer.setOperatorSubtaskId(getRuntimeContext().getIndexOfThisSubtask()); committer.open(); - cleanState(); - checkpointStreamFactory = - getContainingTask().createCheckpointStreamFactory(this); + + checkpointStreamFactory = getContainingTask() + .createCheckpointStreamFactory(this); + + cleanRestoredHandles(); } public void close() throws Exception { committer.close(); } /** - * Saves a handle in the state. + * Called when a checkpoint barrier arrives. + * Closes any open streams to the backend and marks them as pending for + * committing to the final output system, e.g. Cassandra. * - * @param checkpointId - * @throws IOException + * @param checkpointId the id of the latest received checkpoint. + * @throws IOException in case something went wrong when handling the stream to the backend. */ private void saveHandleInState(final long checkpointId, final long timestamp) throws Exception { + Preconditions.checkNotNull(this.pendingHandles, "The operator has not been properly initialized."); + //only add handle if a new OperatorState was created since the last snapshot if (out != null) { StreamStateHandle handle = out.closeAndGetHandle(); - if (state.pendingHandles.containsKey(checkpointId)) { + + PendingCheckpointId pendingCheckpoint = new PendingCheckpointId( + checkpointId, getRuntimeContext().getIndexOfThisSubtask()); + + if (pendingHandles.containsKey(pendingCheckpoint)) { //we already have a checkpoint stored for that ID that may have been partially written, //so we discard this "alternate version" and use the stored checkpoint handle.discardState(); } else { - state.pendingHandles.put(checkpointId, new Tuple2<>(timestamp, handle)); + this.pendingHandles.put(pendingCheckpoint, new PendingHandle(timestamp, handle)); } out = null; } } @Override - public void snapshotState(FSDataOutputStream out, - long checkpointId, - long timestamp) throws Exception { + public void snapshotState(FSDataOutputStream out, long checkpointId, long timestamp) throws Exception { saveHandleInState(checkpointId, timestamp); - InstantiationUtil.serializeObject(out, state); + DataOutputViewStreamWrapper outStream = new DataOutputViewStreamWrapper(out); + outStream.writeInt(pendingHandles.size()); + for (Map.Entry<PendingCheckpointId, PendingHandle> pendingCheckpoint : pendingHandles.entrySet()) { + pendingCheckpoint.getKey().serialize(outStream); + pendingCheckpoint.getValue().serialize(outStream); + } } @Override public void restoreState(FSDataInputStream in) throws Exception { - this.state = InstantiationUtil.deserializeObject(in, getUserCodeClassloader()); + final DataInputViewStreamWrapper inStream = new DataInputViewStreamWrapper(in); + int noOfPendingHandles = inStream.readInt(); --- End diff -- noOfPendingHandlers -> numPendingHandles > GenericWriteAheadSink: Decouple the creating from the committing subtask for > a pending checkpoint > ------------------------------------------------------------------------------------------------- > > Key: FLINK-4939 > URL: https://issues.apache.org/jira/browse/FLINK-4939 > Project: Flink > Issue Type: Improvement > Components: Cassandra Connector > Reporter: Kostas Kloudas > Assignee: Kostas Kloudas > Fix For: 1.2.0 > > > So far the GenericWriteAheadSink expected that > the subtask that wrote a pending checkpoint to the > state backend, will be also the one to commit it to > the third-party storage system. > This issue targets at removing this assumption. To do this > the CheckpointCommitter has to be able to dynamically > take the subtaskIdx as a parameter when asking > if a checkpoint was committed and also change the > state kept by the GenericWriteAheadSink to also > include that subtask index of the subtask that wrote > the pending checkpoint. > This change is also necessary for making the operator rescalable. -- This message was sent by Atlassian JIRA (v6.3.4#6332)