guoweiM commented on a change in pull request #16:
URL: https://github.com/apache/flink-ml/pull/16#discussion_r739982935



##########
File path: 
flink-ml-iteration/src/main/java/org/apache/flink/iteration/Iterations.java
##########
@@ -112,15 +145,400 @@ public static DataStreamList 
iterateBoundedStreamsUntilTermination(
             ReplayableDataStreamList dataStreams,
             IterationConfig config,
             IterationBody body) {
-        Preconditions.checkArgument(
-                config.getOperatorLifeCycle() == 
IterationConfig.OperatorLifeCycle.ALL_ROUND);
-        
Preconditions.checkArgument(dataStreams.getReplayedDataStreams().size() == 0);
+        OperatorWrapper wrapper =
+                config.getOperatorLifeCycle() == 
IterationConfig.OperatorLifeCycle.ALL_ROUND
+                        ? new AllRoundOperatorWrapper<>()
+                        : new PerRoundOperatorWrapper<>();
 
-        return IterationFactory.createIteration(
+        List<DataStream<?>> allDatastreams = new ArrayList<>();
+        allDatastreams.addAll(dataStreams.getReplayedDataStreams());
+        allDatastreams.addAll(dataStreams.getNonReplayedStreams());
+
+        Set<Integer> replayedIndices =
+                IntStream.range(0, dataStreams.getReplayedDataStreams().size())
+                        .boxed()
+                        .collect(Collectors.toSet());
+
+        return createIteration(
                 initVariableStreams,
-                new DataStreamList(dataStreams.getNonReplayedStreams()),
+                new DataStreamList(allDatastreams),
+                replayedIndices,
                 body,
-                new AllRoundOperatorWrapper(),
+                wrapper,
                 true);
     }
+
+    @SuppressWarnings({"unchecked", "rawtypes"})
+    private static DataStreamList createIteration(
+            DataStreamList initVariableStreams,
+            DataStreamList dataStreams,
+            Set<Integer> replayedDataStreamIndices,
+            IterationBody body,
+            OperatorWrapper<?, IterationRecord<?>> initialOperatorWrapper,
+            boolean mayHaveCriteria) {
+        checkState(initVariableStreams.size() > 0, "There should be at least 
one variable stream");
+
+        IterationID iterationId = new IterationID();
+
+        List<TypeInformation<?>> initVariableTypeInfos = 
getTypeInfos(initVariableStreams);
+        List<TypeInformation<?>> dataStreamTypeInfos = 
getTypeInfos(dataStreams);
+
+        // Add heads and inputs
+        int totalInitVariableParallelism =
+                map(
+                                initVariableStreams,
+                                dataStream ->
+                                        dataStream.getParallelism() > 0
+                                                ? dataStream.getParallelism()
+                                                : dataStream
+                                                        
.getExecutionEnvironment()
+                                                        .getConfig()
+                                                        .getParallelism())
+                        .stream()
+                        .mapToInt(i -> i)
+                        .sum();
+        DataStreamList initVariableInputs = addInputs(initVariableStreams, 
false);
+        DataStreamList headStreams =
+                addHeads(
+                        initVariableStreams,
+                        initVariableInputs,
+                        iterationId,
+                        totalInitVariableParallelism,
+                        false,
+                        0);
+
+        DataStreamList dataStreamInputs = addInputs(dataStreams, true);
+        if (replayedDataStreamIndices.size() > 0) {
+            dataStreamInputs =
+                    addReplayer(
+                            headStreams.get(0),
+                            dataStreams,
+                            dataStreamInputs,
+                            replayedDataStreamIndices);
+        }
+
+        // Create the iteration body. We map the inputs of iteration body into 
the draft sources,
+        // which serve as the start points to build the draft subgraph.
+        StreamExecutionEnvironment env = 
initVariableStreams.get(0).getExecutionEnvironment();
+        DraftExecutionEnvironment draftEnv =
+                new DraftExecutionEnvironment(env, initialOperatorWrapper);
+        DataStreamList draftHeadStreams =
+                addDraftSources(headStreams, draftEnv, initVariableTypeInfos);
+        DataStreamList draftDataStreamInputs =
+                addDraftSources(dataStreamInputs, draftEnv, 
dataStreamTypeInfos);
+
+        IterationBodyResult iterationBodyResult =
+                body.process(draftHeadStreams, draftDataStreamInputs);
+        
ensuresTransformationAdded(iterationBodyResult.getFeedbackVariableStreams(), 
draftEnv);
+        ensuresTransformationAdded(iterationBodyResult.getOutputStreams(), 
draftEnv);
+        draftEnv.copyToActualEnvironment();
+
+        // Add tails and co-locate them with the heads.
+        DataStreamList feedbackStreams =
+                
getActualDataStreams(iterationBodyResult.getFeedbackVariableStreams(), 
draftEnv);
+        checkState(
+                feedbackStreams.size() == initVariableStreams.size(),
+                "The number of feedback streams "
+                        + feedbackStreams.size()
+                        + " does not match the initialized one "
+                        + initVariableStreams.size());
+        for (int i = 0; i < feedbackStreams.size(); ++i) {
+            checkState(
+                    feedbackStreams.get(i).getParallelism() == 
headStreams.get(i).getParallelism(),
+                    String.format(
+                            "The feedback stream %d have different parallelism 
%d with the initial stream, which is %d",
+                            i,
+                            feedbackStreams.get(i).getParallelism(),
+                            headStreams.get(i).getParallelism()));
+        }
+
+        DataStreamList tails = addTails(feedbackStreams, iterationId, 0);
+        for (int i = 0; i < headStreams.size(); ++i) {
+            String coLocationGroupKey = "co-" + iterationId.toHexString() + 
"-" + i;
+            
headStreams.get(i).getTransformation().setCoLocationGroupKey(coLocationGroupKey);
+            
tails.get(i).getTransformation().setCoLocationGroupKey(coLocationGroupKey);
+        }
+
+        checkState(
+                mayHaveCriteria || 
iterationBodyResult.getTerminationCriteria() == null,
+                "The current iteration type does not support the termination 
criteria.");
+
+        if (iterationBodyResult.getTerminationCriteria() != null) {
+            addCriteriaStream(
+                    iterationBodyResult.getTerminationCriteria(),
+                    iterationId,
+                    env,
+                    draftEnv,
+                    initVariableStreams,
+                    headStreams,
+                    totalInitVariableParallelism);
+        }
+
+        return 
addOutputs(getActualDataStreams(iterationBodyResult.getOutputStreams(), 
draftEnv));
+    }
+
+    private static DataStreamList addReplayer(
+            DataStream<?> firstHeadStream,
+            DataStreamList originalDataStreams,
+            DataStreamList dataStreamInputs,
+            Set<Integer> replayedDataStreamIndices) {
+
+        List<DataStream<?>> result = new ArrayList<>(dataStreamInputs.size());
+        for (int i = 0; i < dataStreamInputs.size(); ++i) {
+            if (!replayedDataStreamIndices.contains(i)) {
+                result.add(dataStreamInputs.get(i));
+                continue;
+            }
+
+            // Notes that the HeadOperator would broadcast the globally 
aligned events,
+            // thus the operator does not require emit to the sideoutput 
specially.
+            DataStream<?> replayedInput =
+                    ((SingleOutputStreamOperator<IterationRecord<?>>) 
firstHeadStream)
+                            
.getSideOutput(HeadOperator.ALIGN_NOTIFY_OUTPUT_TAG)
+                            .map(x -> x, dataStreamInputs.get(i).getType())
+                            .setParallelism(1)
+                            .name("signal-change-typeinfo")
+                            .broadcast()
+                            .union(dataStreamInputs.get(i))
+                            .transform(
+                                    "Replayer-"
+                                            + originalDataStreams
+                                                    .get(i)
+                                                    .getTransformation()
+                                                    .getName(),
+                                    dataStreamInputs.get(i).getType(),
+                                    (OneInputStreamOperator) new 
ReplayOperator<>())
+                            
.setParallelism(dataStreamInputs.get(i).getParallelism());
+            result.add(replayedInput);
+        }
+
+        return new DataStreamList(result);
+    }
+
+    private static void addCriteriaStream(
+            DataStream<?> draftCriteriaStream,
+            IterationID iterationId,
+            StreamExecutionEnvironment env,
+            DraftExecutionEnvironment draftEnv,
+            DataStreamList initVariableStreams,
+            DataStreamList headStreams,
+            int totalInitVariableParallelism) {
+        // deal with the criteria streams

Review comment:
       deals?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to