StephanEwen commented on a change in pull request #8756: [FLINK-12406] [Runtime] Report BLOCKING_PERSISTENT result partition meta back to client URL: https://github.com/apache/flink/pull/8756#discussion_r299701049
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/ExecutionGraph.java ########## @@ -814,6 +820,69 @@ public Executor getFutureExecutor() { entry -> serializeAccumulator(entry.getKey(), entry.getValue()))); } + @Override + public BlockingPersistentResultPartitionMeta getBlockingPersistentResultPartitionMeta() { + Map<AbstractID, Map<AbstractID, ResultPartitionDescriptor>> resultPartitionDescriptors = new HashMap<>(); + + // keep record of all failed IntermediateDataSetID + Set<AbstractID> failedIntermediateDataSetIds = new HashSet<>(); + + for (ExecutionVertex executionVertex : getAllExecutionVertices()) { + for (IntermediateResultPartition intermediateResultPartition : executionVertex.getProducedPartitions().values()) { + if (intermediateResultPartition.getResultType() == ResultPartitionType.BLOCKING_PERSISTENT) { + try { + addLocation(resultPartitionDescriptors, intermediateResultPartition); + } catch (Throwable throwable) { + LOG.error("Failed to get location of ResultPartition: " + intermediateResultPartition.getPartitionId(), throwable); + failedIntermediateDataSetIds.add( + new AbstractID(intermediateResultPartition.getIntermediateResult().getId())); + } + } + } + } + + return new BlockingPersistentResultPartitionMeta(resultPartitionDescriptors, failedIntermediateDataSetIds); + } + + /** + * + * @param resultPartitionDescriptors + * @param intermediateResultPartition + * throw exception if any error occurs. + */ + public void addLocation( + Map<AbstractID, Map<AbstractID, ResultPartitionDescriptor>> resultPartitionDescriptors, + IntermediateResultPartition intermediateResultPartition) { + + IntermediateDataSetID dataSetID = intermediateResultPartition.getIntermediateResult().getId(); + + Map<AbstractID, ResultPartitionDescriptor> map = resultPartitionDescriptors.computeIfAbsent( + new AbstractID(dataSetID), key -> new HashMap<>() + ); + + TaskManagerLocation taskManagerLocation = null; + + // The taskManagerLocation should be ready already since the previous job is done. + try { + taskManagerLocation = intermediateResultPartition + .getProducer().getCurrentExecutionAttempt().getTaskManagerLocationFuture().get(1, TimeUnit.SECONDS); Review comment: This is a blocking waiting call, which cannot be used in a non blocking data structure like the execution graph. The call to the future needs to complete or fail instantly. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services