[GitHub] [flink] reswqa commented on a diff in pull request #21199: [FLINK-29767] Adaptive batch scheduler supports hybrid shuffle

GitBox Thu, 08 Dec 2022 02:18:17 -0800


reswqa commented on code in PR #21199:
URL: https://github.com/apache/flink/pull/21199#discussion_r1043179571



##########
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/strategy/VertexwiseSchedulingStrategy.java:
##########
@@ -125,24 +122,64 @@ private void maybeScheduleVertices(final 
Set<ExecutionVertexID> vertices) {
             newVertices.clear();
         }
 
-        final Set<ExecutionVertexID> verticesToDeploy =
-                allCandidates.stream()
-                        .filter(
-                                vertexId -> {
-                                    SchedulingExecutionVertex vertex =
-                                            
schedulingTopology.getVertex(vertexId);
-                                    checkState(vertex.getState() == 
ExecutionState.CREATED);
-                                    return 
inputConsumableDecider.isInputConsumable(
-                                            vertexId,
-                                            Collections.emptySet(),
-                                            consumableStatusCache);
-                                })
-                        .collect(Collectors.toSet());
+        final Set<ExecutionVertexID> verticesToDeploy = new HashSet<>();
+
+        Set<ExecutionVertexID> nextVertices = allCandidates;
+        while (!nextVertices.isEmpty()) {
+            nextVertices = addToDeployAndGetVertices(nextVertices, 
verticesToDeploy);
+        }
 
         scheduleVerticesOneByOne(verticesToDeploy);
         scheduledVertices.addAll(verticesToDeploy);
     }
 
+    private Set<ExecutionVertexID> addToDeployAndGetVertices(
+            Set<ExecutionVertexID> currentVertices, Set<ExecutionVertexID> 
verticesToDeploy) {
+        Set<ExecutionVertexID> nextVertices = new HashSet<>();
+        // cache consumedPartitionGroup's consumable status to avoid compute 
repeatedly.
+        final Map<ConsumedPartitionGroup, Boolean> consumableStatusCache = new 
HashMap<>();
+        final Set<ConsumerVertexGroup> visitedConsumerVertexGroup = new 
HashSet<>();
+
+        for (ExecutionVertexID currentVertex : currentVertices) {
+            if (isVertexSchedulable(currentVertex, consumableStatusCache, 
verticesToDeploy)) {
+                verticesToDeploy.add(currentVertex);
+                Set<ConsumerVertexGroup> canBePipelinedConsumerVertexGroups =
+                        IterableUtils.toStream(
+                                        schedulingTopology
+                                                .getVertex(currentVertex)
+                                                .getProducedResults())
+                                
.map(SchedulingResultPartition::getConsumerVertexGroups)
+                                .flatMap(Collection::stream)
+                                .filter(
+                                        (consumerVertexGroup) ->
+                                                consumerVertexGroup
+                                                        
.getResultPartitionType()
+                                                        
.canBePipelinedConsumed())
+                                .collect(Collectors.toSet());
+                for (ConsumerVertexGroup consumerVertexGroup : 
canBePipelinedConsumerVertexGroups) {
+                    if 
(!visitedConsumerVertexGroup.contains(consumerVertexGroup)) {
+                        visitedConsumerVertexGroup.add(consumerVertexGroup);
+                        nextVertices.addAll(
+                                canBePipelinedConsumerVertexGroups.stream()

Review Comment:
   Yes, you're right. Fix this.



##########
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/strategy/VertexwiseSchedulingStrategy.java:
##########
@@ -125,24 +122,64 @@ private void maybeScheduleVertices(final 
Set<ExecutionVertexID> vertices) {
             newVertices.clear();
         }
 
-        final Set<ExecutionVertexID> verticesToDeploy =
-                allCandidates.stream()
-                        .filter(
-                                vertexId -> {
-                                    SchedulingExecutionVertex vertex =
-                                            
schedulingTopology.getVertex(vertexId);
-                                    checkState(vertex.getState() == 
ExecutionState.CREATED);
-                                    return 
inputConsumableDecider.isInputConsumable(
-                                            vertexId,
-                                            Collections.emptySet(),
-                                            consumableStatusCache);
-                                })
-                        .collect(Collectors.toSet());
+        final Set<ExecutionVertexID> verticesToDeploy = new HashSet<>();
+
+        Set<ExecutionVertexID> nextVertices = allCandidates;
+        while (!nextVertices.isEmpty()) {
+            nextVertices = addToDeployAndGetVertices(nextVertices, 
verticesToDeploy);
+        }
 
         scheduleVerticesOneByOne(verticesToDeploy);
         scheduledVertices.addAll(verticesToDeploy);
     }
 
+    private Set<ExecutionVertexID> addToDeployAndGetVertices(
+            Set<ExecutionVertexID> currentVertices, Set<ExecutionVertexID> 
verticesToDeploy) {
+        Set<ExecutionVertexID> nextVertices = new HashSet<>();
+        // cache consumedPartitionGroup's consumable status to avoid compute 
repeatedly.
+        final Map<ConsumedPartitionGroup, Boolean> consumableStatusCache = new 
HashMap<>();
+        final Set<ConsumerVertexGroup> visitedConsumerVertexGroup = new 
HashSet<>();
+
+        for (ExecutionVertexID currentVertex : currentVertices) {
+            if (isVertexSchedulable(currentVertex, consumableStatusCache, 
verticesToDeploy)) {
+                verticesToDeploy.add(currentVertex);
+                Set<ConsumerVertexGroup> canBePipelinedConsumerVertexGroups =
+                        IterableUtils.toStream(
+                                        schedulingTopology
+                                                .getVertex(currentVertex)
+                                                .getProducedResults())
+                                
.map(SchedulingResultPartition::getConsumerVertexGroups)
+                                .flatMap(Collection::stream)
+                                .filter(
+                                        (consumerVertexGroup) ->
+                                                consumerVertexGroup
+                                                        
.getResultPartitionType()
+                                                        
.canBePipelinedConsumed())
+                                .collect(Collectors.toSet());
+                for (ConsumerVertexGroup consumerVertexGroup : 
canBePipelinedConsumerVertexGroups) {
+                    if 
(!visitedConsumerVertexGroup.contains(consumerVertexGroup)) {
+                        visitedConsumerVertexGroup.add(consumerVertexGroup);
+                        nextVertices.addAll(
+                                canBePipelinedConsumerVertexGroups.stream()

Review Comment:
   Yes, you're right. Fixed this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [flink] reswqa commented on a diff in pull request #21199: [FLINK-29767] Adaptive batch scheduler supports hybrid shuffle

Reply via email to