zhuzhurk commented on a change in pull request #18102:
URL: https://github.com/apache/flink/pull/18102#discussion_r786442228
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph(
topologicallySortedJobVertices)
.getAllPipelinedRegions();
- ExecutionGraphIndex executionGraphIndex =
- computeExecutionGraphIndex(
- executionGraph.getAllExecutionVertices(),
- logicalPipelinedRegions,
- edgeManager);
-
- IndexedPipelinedRegions indexedPipelinedRegions =
- computePipelinedRegions(
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(),
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get,
- executionGraphIndex.executionVerticesById::get,
- executionGraphIndex.resultPartitionsById::get);
-
- ensureCoLocatedVerticesInSameRegion(
- indexedPipelinedRegions.pipelinedRegions, executionGraph);
-
- return new DefaultExecutionTopology(
- executionGraphIndex.executionVerticesById,
- executionGraphIndex.executionVerticesList,
- executionGraphIndex.resultPartitionsById,
- indexedPipelinedRegions.pipelinedRegionsByVertex,
- indexedPipelinedRegions.pipelinedRegions,
- edgeManager);
- }
-
- private static ExecutionGraphIndex computeExecutionGraphIndex(
- Iterable<ExecutionVertex> executionVertices,
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- EdgeManager edgeManager) {
- Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById =
new HashMap<>();
- List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>();
- Map<IntermediateResultPartitionID, DefaultResultPartition>
resultPartitionsById =
- new HashMap<>();
- Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion = new
IdentityHashMap<>();
-
- Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionByJobVertexId =
+ Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionsByJobVertexId =
new HashMap<>();
for (DefaultLogicalPipelinedRegion logicalPipelinedRegion :
logicalPipelinedRegions) {
for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
- logicalPipelinedRegionByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
+ logicalPipelinedRegionsByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
}
}
+ return logicalPipelinedRegionsByJobVertexId;
+ }
+
+ public void notifyExecutionGraphUpdated(
+ final DefaultExecutionGraph executionGraph,
+ final List<ExecutionJobVertex> newJobVertices) {
+
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ final Set<JobVertexID> newVertexSet =
Review comment:
NIT: newVertexSet -> newJobVertexIds
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph(
topologicallySortedJobVertices)
.getAllPipelinedRegions();
- ExecutionGraphIndex executionGraphIndex =
- computeExecutionGraphIndex(
- executionGraph.getAllExecutionVertices(),
- logicalPipelinedRegions,
- edgeManager);
-
- IndexedPipelinedRegions indexedPipelinedRegions =
- computePipelinedRegions(
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(),
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get,
- executionGraphIndex.executionVerticesById::get,
- executionGraphIndex.resultPartitionsById::get);
-
- ensureCoLocatedVerticesInSameRegion(
- indexedPipelinedRegions.pipelinedRegions, executionGraph);
-
- return new DefaultExecutionTopology(
- executionGraphIndex.executionVerticesById,
- executionGraphIndex.executionVerticesList,
- executionGraphIndex.resultPartitionsById,
- indexedPipelinedRegions.pipelinedRegionsByVertex,
- indexedPipelinedRegions.pipelinedRegions,
- edgeManager);
- }
-
- private static ExecutionGraphIndex computeExecutionGraphIndex(
- Iterable<ExecutionVertex> executionVertices,
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- EdgeManager edgeManager) {
- Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById =
new HashMap<>();
- List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>();
- Map<IntermediateResultPartitionID, DefaultResultPartition>
resultPartitionsById =
- new HashMap<>();
- Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion = new
IdentityHashMap<>();
-
- Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionByJobVertexId =
+ Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionsByJobVertexId =
new HashMap<>();
for (DefaultLogicalPipelinedRegion logicalPipelinedRegion :
logicalPipelinedRegions) {
for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
- logicalPipelinedRegionByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
+ logicalPipelinedRegionsByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
}
}
+ return logicalPipelinedRegionsByJobVertexId;
+ }
+
+ public void notifyExecutionGraphUpdated(
+ final DefaultExecutionGraph executionGraph,
+ final List<ExecutionJobVertex> newJobVertices) {
+
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ final Set<JobVertexID> newVertexSet =
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertexId)
+ .collect(Collectors.toSet());
+
+ // any PIPELINED input should be from within this new set so that
existing pipelined regions
+ // will not change
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertex)
+ .flatMap(v -> v.getInputs().stream())
+ .map(JobEdge::getSource)
+ .filter(r -> r.getResultType().isPipelined())
+ .map(IntermediateDataSet::getProducer)
+ .map(JobVertex::getID)
+ .forEach(id -> checkState(newVertexSet.contains(id)));
+
+ final Iterable<ExecutionVertex> newAddedExecutionVertices =
+ newJobVertices.stream()
+ .flatMap(jobVertex ->
Stream.of(jobVertex.getTaskVertices()))
+ .collect(Collectors.toList());
+
+ updateExecutionGraphIndex(newAddedExecutionVertices);
+
+ updatePipelinedRegions(newAddedExecutionVertices);
+
+ ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph);
+ }
+
+ public static DefaultExecutionTopology fromExecutionGraph(
+ DefaultExecutionGraph executionGraph) {
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ EdgeManager edgeManager = executionGraph.getEdgeManager();
+
+ Iterable<ExecutionVertex> executionVertices =
executionGraph.getAllExecutionVertices();
+
+ DefaultExecutionTopology schedulingTopology =
+ new DefaultExecutionTopology(
+ () ->
+ IterableUtils.toStream(executionVertices)
+ .map(ExecutionVertex::getID)
+ .collect(Collectors.toList()),
+ edgeManager,
+
computeLogicalPipelinedRegionsByJobVertexId(executionGraph));
+
+ schedulingTopology.notifyExecutionGraphUpdated(
+ executionGraph,
+
IterableUtils.toStream(executionGraph.getVerticesTopologically())
+ .filter(ExecutionJobVertex::isInitialized)
+ .collect(Collectors.toList()));
+
+ return schedulingTopology;
+ }
+
+ private void updateExecutionGraphIndex(Iterable<ExecutionVertex>
executionVertices) {
+
Review comment:
NIT: unnecessary empty line
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph(
topologicallySortedJobVertices)
.getAllPipelinedRegions();
- ExecutionGraphIndex executionGraphIndex =
- computeExecutionGraphIndex(
- executionGraph.getAllExecutionVertices(),
- logicalPipelinedRegions,
- edgeManager);
-
- IndexedPipelinedRegions indexedPipelinedRegions =
- computePipelinedRegions(
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(),
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get,
- executionGraphIndex.executionVerticesById::get,
- executionGraphIndex.resultPartitionsById::get);
-
- ensureCoLocatedVerticesInSameRegion(
- indexedPipelinedRegions.pipelinedRegions, executionGraph);
-
- return new DefaultExecutionTopology(
- executionGraphIndex.executionVerticesById,
- executionGraphIndex.executionVerticesList,
- executionGraphIndex.resultPartitionsById,
- indexedPipelinedRegions.pipelinedRegionsByVertex,
- indexedPipelinedRegions.pipelinedRegions,
- edgeManager);
- }
-
- private static ExecutionGraphIndex computeExecutionGraphIndex(
- Iterable<ExecutionVertex> executionVertices,
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- EdgeManager edgeManager) {
- Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById =
new HashMap<>();
- List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>();
- Map<IntermediateResultPartitionID, DefaultResultPartition>
resultPartitionsById =
- new HashMap<>();
- Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion = new
IdentityHashMap<>();
-
- Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionByJobVertexId =
+ Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionsByJobVertexId =
new HashMap<>();
for (DefaultLogicalPipelinedRegion logicalPipelinedRegion :
logicalPipelinedRegions) {
for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
- logicalPipelinedRegionByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
+ logicalPipelinedRegionsByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
}
}
+ return logicalPipelinedRegionsByJobVertexId;
+ }
+
+ public void notifyExecutionGraphUpdated(
+ final DefaultExecutionGraph executionGraph,
+ final List<ExecutionJobVertex> newJobVertices) {
+
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ final Set<JobVertexID> newVertexSet =
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertexId)
+ .collect(Collectors.toSet());
+
+ // any PIPELINED input should be from within this new set so that
existing pipelined regions
+ // will not change
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertex)
+ .flatMap(v -> v.getInputs().stream())
+ .map(JobEdge::getSource)
+ .filter(r -> r.getResultType().isPipelined())
+ .map(IntermediateDataSet::getProducer)
+ .map(JobVertex::getID)
+ .forEach(id -> checkState(newVertexSet.contains(id)));
+
+ final Iterable<ExecutionVertex> newAddedExecutionVertices =
Review comment:
newAddedExecutionVertices -> newlyAddedExecutionVertices or
newExecutionVertices
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph(
topologicallySortedJobVertices)
.getAllPipelinedRegions();
- ExecutionGraphIndex executionGraphIndex =
- computeExecutionGraphIndex(
- executionGraph.getAllExecutionVertices(),
- logicalPipelinedRegions,
- edgeManager);
-
- IndexedPipelinedRegions indexedPipelinedRegions =
- computePipelinedRegions(
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(),
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get,
- executionGraphIndex.executionVerticesById::get,
- executionGraphIndex.resultPartitionsById::get);
-
- ensureCoLocatedVerticesInSameRegion(
- indexedPipelinedRegions.pipelinedRegions, executionGraph);
-
- return new DefaultExecutionTopology(
- executionGraphIndex.executionVerticesById,
- executionGraphIndex.executionVerticesList,
- executionGraphIndex.resultPartitionsById,
- indexedPipelinedRegions.pipelinedRegionsByVertex,
- indexedPipelinedRegions.pipelinedRegions,
- edgeManager);
- }
-
- private static ExecutionGraphIndex computeExecutionGraphIndex(
- Iterable<ExecutionVertex> executionVertices,
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- EdgeManager edgeManager) {
- Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById =
new HashMap<>();
- List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>();
- Map<IntermediateResultPartitionID, DefaultResultPartition>
resultPartitionsById =
- new HashMap<>();
- Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion = new
IdentityHashMap<>();
-
- Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionByJobVertexId =
+ Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionsByJobVertexId =
new HashMap<>();
for (DefaultLogicalPipelinedRegion logicalPipelinedRegion :
logicalPipelinedRegions) {
for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
- logicalPipelinedRegionByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
+ logicalPipelinedRegionsByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
}
}
+ return logicalPipelinedRegionsByJobVertexId;
+ }
+
+ public void notifyExecutionGraphUpdated(
+ final DefaultExecutionGraph executionGraph,
+ final List<ExecutionJobVertex> newJobVertices) {
+
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ final Set<JobVertexID> newVertexSet =
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertexId)
+ .collect(Collectors.toSet());
+
+ // any PIPELINED input should be from within this new set so that
existing pipelined regions
+ // will not change
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertex)
+ .flatMap(v -> v.getInputs().stream())
+ .map(JobEdge::getSource)
+ .filter(r -> r.getResultType().isPipelined())
+ .map(IntermediateDataSet::getProducer)
+ .map(JobVertex::getID)
+ .forEach(id -> checkState(newVertexSet.contains(id)));
+
+ final Iterable<ExecutionVertex> newAddedExecutionVertices =
+ newJobVertices.stream()
+ .flatMap(jobVertex ->
Stream.of(jobVertex.getTaskVertices()))
+ .collect(Collectors.toList());
+
+ updateExecutionGraphIndex(newAddedExecutionVertices);
+
+ updatePipelinedRegions(newAddedExecutionVertices);
+
+ ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph);
+ }
+
+ public static DefaultExecutionTopology fromExecutionGraph(
+ DefaultExecutionGraph executionGraph) {
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ EdgeManager edgeManager = executionGraph.getEdgeManager();
+
+ Iterable<ExecutionVertex> executionVertices =
executionGraph.getAllExecutionVertices();
+
+ DefaultExecutionTopology schedulingTopology =
+ new DefaultExecutionTopology(
+ () ->
+ IterableUtils.toStream(executionVertices)
+ .map(ExecutionVertex::getID)
+ .collect(Collectors.toList()),
+ edgeManager,
+
computeLogicalPipelinedRegionsByJobVertexId(executionGraph));
+
+ schedulingTopology.notifyExecutionGraphUpdated(
+ executionGraph,
+
IterableUtils.toStream(executionGraph.getVerticesTopologically())
+ .filter(ExecutionJobVertex::isInitialized)
+ .collect(Collectors.toList()));
+
+ return schedulingTopology;
+ }
+
+ private void updateExecutionGraphIndex(Iterable<ExecutionVertex>
executionVertices) {
Review comment:
> updateExecutionGraphIndex
The name is outdated.
Maybe rename it to `generateNewExecutionVerticesAndResultPartitions`, or any
other good ideas?
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -283,13 +309,25 @@ private static DefaultExecutionVertex
generateSchedulingExecutionVertex(
return schedulingVertex;
}
- private static IndexedPipelinedRegions computePipelinedRegions(
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- Function<DefaultLogicalPipelinedRegion,
List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion,
- Function<ExecutionVertexID, DefaultExecutionVertex>
executionVertexRetriever,
- Function<IntermediateResultPartitionID, DefaultResultPartition>
- resultPartitionRetriever) {
+ private void updatePipelinedRegions(Iterable<ExecutionVertex>
executionVertices) {
Review comment:
executionVertices -> newExecutionVertices
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java
##########
@@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph(
topologicallySortedJobVertices)
.getAllPipelinedRegions();
- ExecutionGraphIndex executionGraphIndex =
- computeExecutionGraphIndex(
- executionGraph.getAllExecutionVertices(),
- logicalPipelinedRegions,
- edgeManager);
-
- IndexedPipelinedRegions indexedPipelinedRegions =
- computePipelinedRegions(
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(),
-
executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get,
- executionGraphIndex.executionVerticesById::get,
- executionGraphIndex.resultPartitionsById::get);
-
- ensureCoLocatedVerticesInSameRegion(
- indexedPipelinedRegions.pipelinedRegions, executionGraph);
-
- return new DefaultExecutionTopology(
- executionGraphIndex.executionVerticesById,
- executionGraphIndex.executionVerticesList,
- executionGraphIndex.resultPartitionsById,
- indexedPipelinedRegions.pipelinedRegionsByVertex,
- indexedPipelinedRegions.pipelinedRegions,
- edgeManager);
- }
-
- private static ExecutionGraphIndex computeExecutionGraphIndex(
- Iterable<ExecutionVertex> executionVertices,
- Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions,
- EdgeManager edgeManager) {
- Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById =
new HashMap<>();
- List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>();
- Map<IntermediateResultPartitionID, DefaultResultPartition>
resultPartitionsById =
- new HashMap<>();
- Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>>
- sortedExecutionVerticesInPipelinedRegion = new
IdentityHashMap<>();
-
- Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionByJobVertexId =
+ Map<JobVertexID, DefaultLogicalPipelinedRegion>
logicalPipelinedRegionsByJobVertexId =
new HashMap<>();
for (DefaultLogicalPipelinedRegion logicalPipelinedRegion :
logicalPipelinedRegions) {
for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
- logicalPipelinedRegionByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
+ logicalPipelinedRegionsByJobVertexId.put(vertex.getId(),
logicalPipelinedRegion);
}
}
+ return logicalPipelinedRegionsByJobVertexId;
+ }
+
+ public void notifyExecutionGraphUpdated(
+ final DefaultExecutionGraph executionGraph,
+ final List<ExecutionJobVertex> newJobVertices) {
+
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ final Set<JobVertexID> newVertexSet =
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertexId)
+ .collect(Collectors.toSet());
+
+ // any PIPELINED input should be from within this new set so that
existing pipelined regions
+ // will not change
+ newJobVertices.stream()
+ .map(ExecutionJobVertex::getJobVertex)
+ .flatMap(v -> v.getInputs().stream())
+ .map(JobEdge::getSource)
+ .filter(r -> r.getResultType().isPipelined())
+ .map(IntermediateDataSet::getProducer)
+ .map(JobVertex::getID)
+ .forEach(id -> checkState(newVertexSet.contains(id)));
+
+ final Iterable<ExecutionVertex> newAddedExecutionVertices =
+ newJobVertices.stream()
+ .flatMap(jobVertex ->
Stream.of(jobVertex.getTaskVertices()))
+ .collect(Collectors.toList());
+
+ updateExecutionGraphIndex(newAddedExecutionVertices);
+
+ updatePipelinedRegions(newAddedExecutionVertices);
+
+ ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph);
+ }
+
+ public static DefaultExecutionTopology fromExecutionGraph(
+ DefaultExecutionGraph executionGraph) {
+ checkNotNull(executionGraph, "execution graph can not be null");
+
+ EdgeManager edgeManager = executionGraph.getEdgeManager();
+
+ Iterable<ExecutionVertex> executionVertices =
executionGraph.getAllExecutionVertices();
+
+ DefaultExecutionTopology schedulingTopology =
+ new DefaultExecutionTopology(
+ () ->
+ IterableUtils.toStream(executionVertices)
+ .map(ExecutionVertex::getID)
+ .collect(Collectors.toList()),
+ edgeManager,
+
computeLogicalPipelinedRegionsByJobVertexId(executionGraph));
+
+ schedulingTopology.notifyExecutionGraphUpdated(
+ executionGraph,
+
IterableUtils.toStream(executionGraph.getVerticesTopologically())
+ .filter(ExecutionJobVertex::isInitialized)
+ .collect(Collectors.toList()));
+
+ return schedulingTopology;
+ }
+
+ private void updateExecutionGraphIndex(Iterable<ExecutionVertex>
executionVertices) {
Review comment:
executionVertices -> newExecutionVertices
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]