gaoyunhaii commented on a change in pull request #17:
URL: https://github.com/apache/flink-ml/pull/17#discussion_r741842056
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -224,4 +261,28 @@ public boolean isTerminated() {
return totalRecord == 0 || (hasCriteriaStream &&
totalCriteriaRecord == 0);
}
}
+
+ private static class CheckpointStatus {
+
+ private final long totalHeadParallelism;
+
+ private final List<CompletableFuture<byte[]>> stateFutures = new
ArrayList<>();
+
+ private int notifiedCoordinatorParallelism;
Review comment:
I think `int` should be enough for representing the parallelism ?
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -93,27 +98,28 @@ private SharedProgressAligner(
this.executor = Objects.requireNonNull(executor);
this.statusByEpoch = new HashMap<>();
- this.alignedConsumers = new HashMap<>();
+ this.listeners = new HashMap<>();
+ this.checkpointStatuses = new HashMap<>();
}
public void registerAlignedConsumer(
- OperatorID operatorID, Consumer<GloballyAlignedEvent>
alignedConsumer) {
+ OperatorID operatorID, SharedProgressAlignerListener
alignedConsumer) {
Review comment:
I'll use registerAlignedListener
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -93,27 +98,28 @@ private SharedProgressAligner(
this.executor = Objects.requireNonNull(executor);
this.statusByEpoch = new HashMap<>();
- this.alignedConsumers = new HashMap<>();
+ this.listeners = new HashMap<>();
+ this.checkpointStatuses = new HashMap<>();
}
public void registerAlignedConsumer(
- OperatorID operatorID, Consumer<GloballyAlignedEvent>
alignedConsumer) {
+ OperatorID operatorID, SharedProgressAlignerListener
alignedConsumer) {
runInEventLoop(
- () -> this.alignedConsumers.put(operatorID, alignedConsumer),
- "Register consumer %s",
+ () -> this.listeners.put(operatorID, alignedConsumer),
+ "Register listeners %s",
operatorID.toHexString());
}
public void unregisterConsumer(OperatorID operatorID) {
synchronized (this) {
Review comment:
The synchronized should not required in both places since now we put all
the operations into the same thread. I'll remove the synchronized here.
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/InputOperator.java
##########
@@ -53,13 +53,14 @@ public void processElement(StreamRecord<T> streamRecord)
throws Exception {
@Override
public void endInput() throws Exception {
- if (insertMaxEpochWatermark) {
- reusable.replace(
- IterationRecord.newEpochWatermark(
- Integer.MAX_VALUE,
- OperatorUtils.getUniqueSenderId(
- getOperatorID(),
getRuntimeContext().getIndexOfThisSubtask())));
- output.collect(reusable);
- }
+ // if (insertMaxEpochWatermark) {
Review comment:
I'll remove this code~
##########
File path:
flink-ml-iteration/src/test/java/org/apache/flink/iteration/itcases/operators/ReduceAllRoundProcessFunction.java
##########
@@ -49,16 +59,53 @@
private transient OutputTag<OutputRecord<Integer>> outputTag;
+ private transient ListState<Map<Integer, Integer>> sumByRoundsState;
+
+ private transient ListState<Integer> cachedRecordsState;
+
public ReduceAllRoundProcessFunction(boolean sync, int maxRound) {
this.sync = sync;
this.maxRound = maxRound;
}
@Override
- public void open(Configuration parameters) throws Exception {
- super.open(parameters);
+ public void initializeState(FunctionInitializationContext
functionInitializationContext)
+ throws Exception {
this.sumByEpochs = new HashMap<>();
cachedRecords = new ArrayList<>();
Review comment:
I'll remove all `this.`
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -224,4 +261,28 @@ public boolean isTerminated() {
return totalRecord == 0 || (hasCriteriaStream &&
totalCriteriaRecord == 0);
}
}
+
+ private static class CheckpointStatus {
+
+ private final long totalHeadParallelism;
+
+ private final List<CompletableFuture<byte[]>> stateFutures = new
ArrayList<>();
+
+ private int notifiedCoordinatorParallelism;
Review comment:
I think `int` should be enough for representing the parallelism ?
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -93,27 +98,28 @@ private SharedProgressAligner(
this.executor = Objects.requireNonNull(executor);
this.statusByEpoch = new HashMap<>();
- this.alignedConsumers = new HashMap<>();
+ this.listeners = new HashMap<>();
+ this.checkpointStatuses = new HashMap<>();
}
public void registerAlignedConsumer(
- OperatorID operatorID, Consumer<GloballyAlignedEvent>
alignedConsumer) {
+ OperatorID operatorID, SharedProgressAlignerListener
alignedConsumer) {
Review comment:
I'll use registerAlignedListener
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/coordinator/SharedProgressAligner.java
##########
@@ -93,27 +98,28 @@ private SharedProgressAligner(
this.executor = Objects.requireNonNull(executor);
this.statusByEpoch = new HashMap<>();
- this.alignedConsumers = new HashMap<>();
+ this.listeners = new HashMap<>();
+ this.checkpointStatuses = new HashMap<>();
}
public void registerAlignedConsumer(
- OperatorID operatorID, Consumer<GloballyAlignedEvent>
alignedConsumer) {
+ OperatorID operatorID, SharedProgressAlignerListener
alignedConsumer) {
runInEventLoop(
- () -> this.alignedConsumers.put(operatorID, alignedConsumer),
- "Register consumer %s",
+ () -> this.listeners.put(operatorID, alignedConsumer),
+ "Register listeners %s",
operatorID.toHexString());
}
public void unregisterConsumer(OperatorID operatorID) {
synchronized (this) {
Review comment:
The synchronized should not required in both places since now we put all
the operations into the same thread. I'll remove the synchronized here.
##########
File path:
flink-ml-iteration/src/main/java/org/apache/flink/iteration/operator/InputOperator.java
##########
@@ -53,13 +53,14 @@ public void processElement(StreamRecord<T> streamRecord)
throws Exception {
@Override
public void endInput() throws Exception {
- if (insertMaxEpochWatermark) {
- reusable.replace(
- IterationRecord.newEpochWatermark(
- Integer.MAX_VALUE,
- OperatorUtils.getUniqueSenderId(
- getOperatorID(),
getRuntimeContext().getIndexOfThisSubtask())));
- output.collect(reusable);
- }
+ // if (insertMaxEpochWatermark) {
Review comment:
I'll remove this code~
##########
File path:
flink-ml-iteration/src/test/java/org/apache/flink/iteration/itcases/operators/ReduceAllRoundProcessFunction.java
##########
@@ -49,16 +59,53 @@
private transient OutputTag<OutputRecord<Integer>> outputTag;
+ private transient ListState<Map<Integer, Integer>> sumByRoundsState;
+
+ private transient ListState<Integer> cachedRecordsState;
+
public ReduceAllRoundProcessFunction(boolean sync, int maxRound) {
this.sync = sync;
this.maxRound = maxRound;
}
@Override
- public void open(Configuration parameters) throws Exception {
- super.open(parameters);
+ public void initializeState(FunctionInitializationContext
functionInitializationContext)
+ throws Exception {
this.sumByEpochs = new HashMap<>();
cachedRecords = new ArrayList<>();
Review comment:
I'll remove all `this.`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]