zhijiangW commented on a change in pull request #8687:
[FLINK-12612][coordination] Track stored partition on the TaskExecutor
URL: https://github.com/apache/flink/pull/8687#discussion_r293235990
##########
File path:
flink-runtime/src/test/java/org/apache/flink/runtime/taskexecutor/TaskExecutorPartitionLifecycleTest.java
##########
@@ -102,6 +126,115 @@ public void setup() {
rpc = new TestingRpcService();
}
+ @Test
+ public void testConnectionTerminationAfterExternalRelease() throws
IOException, InterruptedException, ExecutionException, TimeoutException {
+ final JobMasterId jobMasterId = JobMasterId.generate();
+
+ final LibraryCacheManager libraryCacheManager =
mock(LibraryCacheManager.class);
+
when(libraryCacheManager.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader());
+
+ final JobMasterGateway jobMasterGateway =
mock(JobMasterGateway.class);
+
when(jobMasterGateway.getFencingToken()).thenReturn(jobMasterId);
+
+ final TaskManagerActions taskManagerActions = new
NoOpTaskManagerActions();
+ final JobManagerConnection jobManagerConnection = new
JobManagerConnection(
+ jobId,
+ ResourceID.generate(),
+ jobMasterGateway,
+ taskManagerActions,
+ mock(CheckpointResponder.class),
+ new TestGlobalAggregateManager(),
+ libraryCacheManager,
+ new NoOpResultPartitionConsumableNotifier(),
+ mock(PartitionProducerStateChecker.class));
+
+ final JobManagerTable jobManagerTable = new JobManagerTable();
+ jobManagerTable.put(jobId, jobManagerConnection);
+
+ final AtomicBoolean hasPartitionsOccupyingLocalResources = new
AtomicBoolean(true);
+ final TestJobAwareShuffleEnvironment jobAwareShuffleEnvironment
= new TestJobAwareShuffleEnvironment(jobId ->
hasPartitionsOccupyingLocalResources.get());
+
+ final TaskManagerServices taskManagerServices = new
TaskManagerServicesBuilder()
+ .setJobManagerTable(jobManagerTable)
+ .setShuffleEnvironment(jobAwareShuffleEnvironment)
+ .build();
+
+ final TestingTaskExecutor taskManager =
createTestingTaskExecutor(taskManagerServices, new
HeartbeatServices(Long.MAX_VALUE, Long.MAX_VALUE));
+
+ try {
+ taskManager.start();
+ taskManager.waitUntilStarted();
+
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
+
+ taskManager.releasePartitions(jobId,
Collections.singletonList(new ResultPartitionID()));
+ // connection should be kept alive since the
environment still says we have local resources
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
+
+ hasPartitionsOccupyingLocalResources.set(false);
+
+ taskManager.releasePartitions(jobId,
Collections.singletonList(new ResultPartitionID()));
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
+ } finally {
+ RpcUtils.terminateRpcEndpoint(taskManager, timeout);
+ }
+ }
+
+ @Test
+ public void testConnectionTerminationAfterInternalRelease() throws
Exception {
+ final JobMasterId jobMasterId = JobMasterId.generate();
+
+ final LibraryCacheManager libraryCacheManager =
mock(LibraryCacheManager.class);
+
when(libraryCacheManager.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader());
+
+ final JobMasterGateway jobMasterGateway =
mock(JobMasterGateway.class);
+
when(jobMasterGateway.getFencingToken()).thenReturn(jobMasterId);
+
+ final TaskManagerActions taskManagerActions = new
NoOpTaskManagerActions();
+ final JobManagerConnection jobManagerConnection = new
JobManagerConnection(
+ jobId,
+ ResourceID.generate(),
+ jobMasterGateway,
+ taskManagerActions,
+ mock(CheckpointResponder.class),
+ new TestGlobalAggregateManager(),
+ libraryCacheManager,
+ new NoOpResultPartitionConsumableNotifier(),
+ mock(PartitionProducerStateChecker.class));
+
+ final JobManagerTable jobManagerTable = new JobManagerTable();
+ jobManagerTable.put(jobId, jobManagerConnection);
+
+ final AtomicBoolean hasPartitionsOccupyingLocalResources = new
AtomicBoolean(true);
+ final TestJobAwareShuffleEnvironment jobAwareShuffleEnvironment
= new TestJobAwareShuffleEnvironment(jobId ->
hasPartitionsOccupyingLocalResources.get());
+
+ final TaskManagerServices taskManagerServices = new
TaskManagerServicesBuilder()
+ .setJobManagerTable(jobManagerTable)
+ .setShuffleEnvironment(jobAwareShuffleEnvironment)
+ .build();
+
+ final TestingTaskExecutor taskManager =
createTestingTaskExecutor(taskManagerServices, new
HeartbeatServices(Long.MAX_VALUE, Long.MAX_VALUE));
+
+ try {
+ taskManager.start();
+ taskManager.waitUntilStarted();
+
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
+ assertNotNull(jobAwareShuffleEnvironment.listener);
+
+ jobAwareShuffleEnvironment.listener.accept(jobId);
+ // connection should be kept alive since the
environment still says we have local resources
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
+
+ hasPartitionsOccupyingLocalResources.set(false);
+
+ jobAwareShuffleEnvironment.listener.accept(jobId);
+
assertTrue(taskManagerServices.getJobManagerTable().contains(jobId));
Review comment:
we should expect `assertFalse` here. I guess it might exist concurrent issue
to cause result delay.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services