zhuzhurk commented on a change in pull request #18102: URL: https://github.com/apache/flink/pull/18102#discussion_r786442228
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph( topologicallySortedJobVertices) .getAllPipelinedRegions(); - ExecutionGraphIndex executionGraphIndex = - computeExecutionGraphIndex( - executionGraph.getAllExecutionVertices(), - logicalPipelinedRegions, - edgeManager); - - IndexedPipelinedRegions indexedPipelinedRegions = - computePipelinedRegions( - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(), - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get, - executionGraphIndex.executionVerticesById::get, - executionGraphIndex.resultPartitionsById::get); - - ensureCoLocatedVerticesInSameRegion( - indexedPipelinedRegions.pipelinedRegions, executionGraph); - - return new DefaultExecutionTopology( - executionGraphIndex.executionVerticesById, - executionGraphIndex.executionVerticesList, - executionGraphIndex.resultPartitionsById, - indexedPipelinedRegions.pipelinedRegionsByVertex, - indexedPipelinedRegions.pipelinedRegions, - edgeManager); - } - - private static ExecutionGraphIndex computeExecutionGraphIndex( - Iterable<ExecutionVertex> executionVertices, - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - EdgeManager edgeManager) { - Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById = new HashMap<>(); - List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>(); - Map<IntermediateResultPartitionID, DefaultResultPartition> resultPartitionsById = - new HashMap<>(); - Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>(); - - Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionByJobVertexId = + Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>(); for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) { for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) { - logicalPipelinedRegionByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); + logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); } } + return logicalPipelinedRegionsByJobVertexId; + } + + public void notifyExecutionGraphUpdated( + final DefaultExecutionGraph executionGraph, + final List<ExecutionJobVertex> newJobVertices) { + + checkNotNull(executionGraph, "execution graph can not be null"); + + final Set<JobVertexID> newVertexSet = Review comment: NIT: newVertexSet -> newJobVertexIds ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph( topologicallySortedJobVertices) .getAllPipelinedRegions(); - ExecutionGraphIndex executionGraphIndex = - computeExecutionGraphIndex( - executionGraph.getAllExecutionVertices(), - logicalPipelinedRegions, - edgeManager); - - IndexedPipelinedRegions indexedPipelinedRegions = - computePipelinedRegions( - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(), - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get, - executionGraphIndex.executionVerticesById::get, - executionGraphIndex.resultPartitionsById::get); - - ensureCoLocatedVerticesInSameRegion( - indexedPipelinedRegions.pipelinedRegions, executionGraph); - - return new DefaultExecutionTopology( - executionGraphIndex.executionVerticesById, - executionGraphIndex.executionVerticesList, - executionGraphIndex.resultPartitionsById, - indexedPipelinedRegions.pipelinedRegionsByVertex, - indexedPipelinedRegions.pipelinedRegions, - edgeManager); - } - - private static ExecutionGraphIndex computeExecutionGraphIndex( - Iterable<ExecutionVertex> executionVertices, - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - EdgeManager edgeManager) { - Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById = new HashMap<>(); - List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>(); - Map<IntermediateResultPartitionID, DefaultResultPartition> resultPartitionsById = - new HashMap<>(); - Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>(); - - Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionByJobVertexId = + Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>(); for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) { for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) { - logicalPipelinedRegionByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); + logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); } } + return logicalPipelinedRegionsByJobVertexId; + } + + public void notifyExecutionGraphUpdated( + final DefaultExecutionGraph executionGraph, + final List<ExecutionJobVertex> newJobVertices) { + + checkNotNull(executionGraph, "execution graph can not be null"); + + final Set<JobVertexID> newVertexSet = + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertexId) + .collect(Collectors.toSet()); + + // any PIPELINED input should be from within this new set so that existing pipelined regions + // will not change + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertex) + .flatMap(v -> v.getInputs().stream()) + .map(JobEdge::getSource) + .filter(r -> r.getResultType().isPipelined()) + .map(IntermediateDataSet::getProducer) + .map(JobVertex::getID) + .forEach(id -> checkState(newVertexSet.contains(id))); + + final Iterable<ExecutionVertex> newAddedExecutionVertices = + newJobVertices.stream() + .flatMap(jobVertex -> Stream.of(jobVertex.getTaskVertices())) + .collect(Collectors.toList()); + + updateExecutionGraphIndex(newAddedExecutionVertices); + + updatePipelinedRegions(newAddedExecutionVertices); + + ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph); + } + + public static DefaultExecutionTopology fromExecutionGraph( + DefaultExecutionGraph executionGraph) { + checkNotNull(executionGraph, "execution graph can not be null"); + + EdgeManager edgeManager = executionGraph.getEdgeManager(); + + Iterable<ExecutionVertex> executionVertices = executionGraph.getAllExecutionVertices(); + + DefaultExecutionTopology schedulingTopology = + new DefaultExecutionTopology( + () -> + IterableUtils.toStream(executionVertices) + .map(ExecutionVertex::getID) + .collect(Collectors.toList()), + edgeManager, + computeLogicalPipelinedRegionsByJobVertexId(executionGraph)); + + schedulingTopology.notifyExecutionGraphUpdated( + executionGraph, + IterableUtils.toStream(executionGraph.getVerticesTopologically()) + .filter(ExecutionJobVertex::isInitialized) + .collect(Collectors.toList())); + + return schedulingTopology; + } + + private void updateExecutionGraphIndex(Iterable<ExecutionVertex> executionVertices) { + Review comment: NIT: unnecessary empty line ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph( topologicallySortedJobVertices) .getAllPipelinedRegions(); - ExecutionGraphIndex executionGraphIndex = - computeExecutionGraphIndex( - executionGraph.getAllExecutionVertices(), - logicalPipelinedRegions, - edgeManager); - - IndexedPipelinedRegions indexedPipelinedRegions = - computePipelinedRegions( - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(), - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get, - executionGraphIndex.executionVerticesById::get, - executionGraphIndex.resultPartitionsById::get); - - ensureCoLocatedVerticesInSameRegion( - indexedPipelinedRegions.pipelinedRegions, executionGraph); - - return new DefaultExecutionTopology( - executionGraphIndex.executionVerticesById, - executionGraphIndex.executionVerticesList, - executionGraphIndex.resultPartitionsById, - indexedPipelinedRegions.pipelinedRegionsByVertex, - indexedPipelinedRegions.pipelinedRegions, - edgeManager); - } - - private static ExecutionGraphIndex computeExecutionGraphIndex( - Iterable<ExecutionVertex> executionVertices, - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - EdgeManager edgeManager) { - Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById = new HashMap<>(); - List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>(); - Map<IntermediateResultPartitionID, DefaultResultPartition> resultPartitionsById = - new HashMap<>(); - Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>(); - - Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionByJobVertexId = + Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>(); for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) { for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) { - logicalPipelinedRegionByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); + logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); } } + return logicalPipelinedRegionsByJobVertexId; + } + + public void notifyExecutionGraphUpdated( + final DefaultExecutionGraph executionGraph, + final List<ExecutionJobVertex> newJobVertices) { + + checkNotNull(executionGraph, "execution graph can not be null"); + + final Set<JobVertexID> newVertexSet = + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertexId) + .collect(Collectors.toSet()); + + // any PIPELINED input should be from within this new set so that existing pipelined regions + // will not change + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertex) + .flatMap(v -> v.getInputs().stream()) + .map(JobEdge::getSource) + .filter(r -> r.getResultType().isPipelined()) + .map(IntermediateDataSet::getProducer) + .map(JobVertex::getID) + .forEach(id -> checkState(newVertexSet.contains(id))); + + final Iterable<ExecutionVertex> newAddedExecutionVertices = Review comment: newAddedExecutionVertices -> newlyAddedExecutionVertices or newExecutionVertices ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph( topologicallySortedJobVertices) .getAllPipelinedRegions(); - ExecutionGraphIndex executionGraphIndex = - computeExecutionGraphIndex( - executionGraph.getAllExecutionVertices(), - logicalPipelinedRegions, - edgeManager); - - IndexedPipelinedRegions indexedPipelinedRegions = - computePipelinedRegions( - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(), - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get, - executionGraphIndex.executionVerticesById::get, - executionGraphIndex.resultPartitionsById::get); - - ensureCoLocatedVerticesInSameRegion( - indexedPipelinedRegions.pipelinedRegions, executionGraph); - - return new DefaultExecutionTopology( - executionGraphIndex.executionVerticesById, - executionGraphIndex.executionVerticesList, - executionGraphIndex.resultPartitionsById, - indexedPipelinedRegions.pipelinedRegionsByVertex, - indexedPipelinedRegions.pipelinedRegions, - edgeManager); - } - - private static ExecutionGraphIndex computeExecutionGraphIndex( - Iterable<ExecutionVertex> executionVertices, - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - EdgeManager edgeManager) { - Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById = new HashMap<>(); - List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>(); - Map<IntermediateResultPartitionID, DefaultResultPartition> resultPartitionsById = - new HashMap<>(); - Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>(); - - Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionByJobVertexId = + Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>(); for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) { for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) { - logicalPipelinedRegionByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); + logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); } } + return logicalPipelinedRegionsByJobVertexId; + } + + public void notifyExecutionGraphUpdated( + final DefaultExecutionGraph executionGraph, + final List<ExecutionJobVertex> newJobVertices) { + + checkNotNull(executionGraph, "execution graph can not be null"); + + final Set<JobVertexID> newVertexSet = + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertexId) + .collect(Collectors.toSet()); + + // any PIPELINED input should be from within this new set so that existing pipelined regions + // will not change + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertex) + .flatMap(v -> v.getInputs().stream()) + .map(JobEdge::getSource) + .filter(r -> r.getResultType().isPipelined()) + .map(IntermediateDataSet::getProducer) + .map(JobVertex::getID) + .forEach(id -> checkState(newVertexSet.contains(id))); + + final Iterable<ExecutionVertex> newAddedExecutionVertices = + newJobVertices.stream() + .flatMap(jobVertex -> Stream.of(jobVertex.getTaskVertices())) + .collect(Collectors.toList()); + + updateExecutionGraphIndex(newAddedExecutionVertices); + + updatePipelinedRegions(newAddedExecutionVertices); + + ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph); + } + + public static DefaultExecutionTopology fromExecutionGraph( + DefaultExecutionGraph executionGraph) { + checkNotNull(executionGraph, "execution graph can not be null"); + + EdgeManager edgeManager = executionGraph.getEdgeManager(); + + Iterable<ExecutionVertex> executionVertices = executionGraph.getAllExecutionVertices(); + + DefaultExecutionTopology schedulingTopology = + new DefaultExecutionTopology( + () -> + IterableUtils.toStream(executionVertices) + .map(ExecutionVertex::getID) + .collect(Collectors.toList()), + edgeManager, + computeLogicalPipelinedRegionsByJobVertexId(executionGraph)); + + schedulingTopology.notifyExecutionGraphUpdated( + executionGraph, + IterableUtils.toStream(executionGraph.getVerticesTopologically()) + .filter(ExecutionJobVertex::isInitialized) + .collect(Collectors.toList())); + + return schedulingTopology; + } + + private void updateExecutionGraphIndex(Iterable<ExecutionVertex> executionVertices) { Review comment: > updateExecutionGraphIndex The name is outdated. Maybe rename it to `generateNewExecutionVerticesAndResultPartitions`, or any other good ideas? ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -283,13 +309,25 @@ private static DefaultExecutionVertex generateSchedulingExecutionVertex( return schedulingVertex; } - private static IndexedPipelinedRegions computePipelinedRegions( - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - Function<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion, - Function<ExecutionVertexID, DefaultExecutionVertex> executionVertexRetriever, - Function<IntermediateResultPartitionID, DefaultResultPartition> - resultPartitionRetriever) { + private void updatePipelinedRegions(Iterable<ExecutionVertex> executionVertices) { Review comment: executionVertices -> newExecutionVertices ########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adapter/DefaultExecutionTopology.java ########## @@ -159,50 +162,79 @@ public static DefaultExecutionTopology fromExecutionGraph( topologicallySortedJobVertices) .getAllPipelinedRegions(); - ExecutionGraphIndex executionGraphIndex = - computeExecutionGraphIndex( - executionGraph.getAllExecutionVertices(), - logicalPipelinedRegions, - edgeManager); - - IndexedPipelinedRegions indexedPipelinedRegions = - computePipelinedRegions( - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion.keySet(), - executionGraphIndex.sortedExecutionVerticesInPipelinedRegion::get, - executionGraphIndex.executionVerticesById::get, - executionGraphIndex.resultPartitionsById::get); - - ensureCoLocatedVerticesInSameRegion( - indexedPipelinedRegions.pipelinedRegions, executionGraph); - - return new DefaultExecutionTopology( - executionGraphIndex.executionVerticesById, - executionGraphIndex.executionVerticesList, - executionGraphIndex.resultPartitionsById, - indexedPipelinedRegions.pipelinedRegionsByVertex, - indexedPipelinedRegions.pipelinedRegions, - edgeManager); - } - - private static ExecutionGraphIndex computeExecutionGraphIndex( - Iterable<ExecutionVertex> executionVertices, - Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions, - EdgeManager edgeManager) { - Map<ExecutionVertexID, DefaultExecutionVertex> executionVerticesById = new HashMap<>(); - List<DefaultExecutionVertex> executionVerticesList = new ArrayList<>(); - Map<IntermediateResultPartitionID, DefaultResultPartition> resultPartitionsById = - new HashMap<>(); - Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> - sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>(); - - Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionByJobVertexId = + Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>(); for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) { for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) { - logicalPipelinedRegionByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); + logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion); } } + return logicalPipelinedRegionsByJobVertexId; + } + + public void notifyExecutionGraphUpdated( + final DefaultExecutionGraph executionGraph, + final List<ExecutionJobVertex> newJobVertices) { + + checkNotNull(executionGraph, "execution graph can not be null"); + + final Set<JobVertexID> newVertexSet = + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertexId) + .collect(Collectors.toSet()); + + // any PIPELINED input should be from within this new set so that existing pipelined regions + // will not change + newJobVertices.stream() + .map(ExecutionJobVertex::getJobVertex) + .flatMap(v -> v.getInputs().stream()) + .map(JobEdge::getSource) + .filter(r -> r.getResultType().isPipelined()) + .map(IntermediateDataSet::getProducer) + .map(JobVertex::getID) + .forEach(id -> checkState(newVertexSet.contains(id))); + + final Iterable<ExecutionVertex> newAddedExecutionVertices = + newJobVertices.stream() + .flatMap(jobVertex -> Stream.of(jobVertex.getTaskVertices())) + .collect(Collectors.toList()); + + updateExecutionGraphIndex(newAddedExecutionVertices); + + updatePipelinedRegions(newAddedExecutionVertices); + + ensureCoLocatedVerticesInSameRegion(pipelinedRegions, executionGraph); + } + + public static DefaultExecutionTopology fromExecutionGraph( + DefaultExecutionGraph executionGraph) { + checkNotNull(executionGraph, "execution graph can not be null"); + + EdgeManager edgeManager = executionGraph.getEdgeManager(); + + Iterable<ExecutionVertex> executionVertices = executionGraph.getAllExecutionVertices(); + + DefaultExecutionTopology schedulingTopology = + new DefaultExecutionTopology( + () -> + IterableUtils.toStream(executionVertices) + .map(ExecutionVertex::getID) + .collect(Collectors.toList()), + edgeManager, + computeLogicalPipelinedRegionsByJobVertexId(executionGraph)); + + schedulingTopology.notifyExecutionGraphUpdated( + executionGraph, + IterableUtils.toStream(executionGraph.getVerticesTopologically()) + .filter(ExecutionJobVertex::isInitialized) + .collect(Collectors.toList())); + + return schedulingTopology; + } + + private void updateExecutionGraphIndex(Iterable<ExecutionVertex> executionVertices) { Review comment: executionVertices -> newExecutionVertices -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org