tillrohrmann commented on a change in pull request #7227: [FLINK-11059]
[runtime] do not add releasing failed slot to free slots
URL: https://github.com/apache/flink/pull/7227#discussion_r293372113
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/taskexecutor/TaskExecutor.java
##########
@@ -1410,6 +1417,58 @@ private void timeoutSlot(AllocationID allocationId,
UUID ticket) {
}
}
+ /**
+ * Syncs the TaskExecutor's view on its allocated slots with the
JobMaster's view.
+ * Slots which are no longer reported by the JobMaster are being freed.
+ * Slots which the JobMaster thinks it still owns but which are no
longer allocated to it
+ * will be failed via {@link JobMasterGateway#failSlot}.
+ *
+ * @param allocatedSlotReport represents the JobMaster's view on the
current slot allocation state
+ */
+ private void syncSlotsWithSnapshotFromJobMaster(AllocatedSlotReport
allocatedSlotReport) {
+ JobManagerConnection jobManagerConnection =
jobManagerTable.get(allocatedSlotReport.getJobId());
+ if (jobManagerConnection != null) {
+ final JobMasterGateway jobMasterGateway =
jobManagerConnection.getJobManagerGateway();
+
+ failNoLongerAllocatedSlots(allocatedSlotReport,
jobMasterGateway);
+
+ freeNoLongerUsedSlots(allocatedSlotReport);
+ } else {
+ log.debug("Ignoring allocated slot report from job {}
because there is no active leader.",
+ allocatedSlotReport.getJobId());
+ }
+ }
+
+ private void failNoLongerAllocatedSlots(AllocatedSlotReport
allocatedSlotReport, JobMasterGateway jobMasterGateway) {
+ for (AllocatedSlotInformation allocatedSlotInformation :
allocatedSlotReport.getSlots()) {
+ final AllocationID allocationId =
allocatedSlotInformation.getAllocationId();
+ if (!taskSlotTable.isAllocated(
+ allocatedSlotInformation.getSlotIndex(),
+ allocatedSlotReport.getJobId(),
+ allocationId)) {
+ jobMasterGateway.failSlot(
+ getResourceID(),
+ allocationId,
+ new
FlinkException(String.format("Slot {} on TaskExecutor {} is not allocated by
job {}.",
+
allocatedSlotInformation.getSlotIndex(), getResourceID(),
allocatedSlotReport.getJobId())));
+ }
+ }
+ }
+
+ private void freeNoLongerUsedSlots(AllocatedSlotReport
allocatedSlotReport) {
+ final Iterator<AllocationID> slotsTaskManagerSide =
taskSlotTable.getActiveSlots(allocatedSlotReport.getJobId());
+ final Set<AllocationID> activeSlots =
Sets.newHashSet(slotsTaskManagerSide);
+ final Set<AllocationID> reportedSlots =
allocatedSlotReport.getSlots().stream()
+
.map(AllocatedSlotInformation::getAllocationId).collect(Collectors.toSet());
+
+ final Sets.SetView<AllocationID> difference =
Sets.difference(activeSlots, reportedSlots);
+
+ for (AllocationID allocationID : difference) {
+ freeSlotInternal(allocationID, new
FlinkException(String.format("{} is no longer allocated by job {}",
Review comment:
I think `String.format` expects `%s` instead of `{}`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services