himanshug commented on a change in pull request #4550: Prioritized locking
URL: https://github.com/apache/incubator-druid/pull/4550#discussion_r331318193
##########
File path:
indexing-service/src/test/java/io/druid/indexing/overlord/TaskLockboxTest.java
##########
@@ -207,4 +258,206 @@ public void testSyncFromStorage() throws
EntryExistsException
Assert.assertEquals(beforeLocksInStorage, afterLocksInStorage);
}
+
+ @Test
+ public void testDoInCriticalSectionWithSharedLock() throws Exception
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-01-02");
+ final Task task = NoopTask.create();
+ lockbox.add(task);
+ Assert.assertTrue(lockbox.tryLock(TaskLockType.SHARED, task,
interval).isOk());
+
+ Assert.assertFalse(
+ lockbox.doInCriticalSection(
+ task,
+ Collections.singletonList(interval),
+ CriticalAction.<Boolean>builder().onValidLocks(() ->
true).onInvalidLocks(() -> false).build()
+ )
+ );
+ }
+
+ @Test
+ public void testDoInCriticalSectionWithExclusiveLock() throws Exception
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-01-02");
+ final Task task = NoopTask.create();
+ lockbox.add(task);
+ final TaskLock lock = lockbox.tryLock(TaskLockType.EXCLUSIVE, task,
interval).getTaskLock();
+ Assert.assertNotNull(lock);
+
+ Assert.assertTrue(
+ lockbox.doInCriticalSection(
+ task,
+ Collections.singletonList(interval),
+ CriticalAction.<Boolean>builder().onValidLocks(() ->
true).onInvalidLocks(() -> false).build()
+ )
+ );
+ }
+
+ @Test
+ public void testDoInCriticalSectionWithSmallerInterval() throws Exception
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-02-01");
+ final Interval smallInterval = Intervals.of("2017-01-10/2017-01-11");
+ final Task task = NoopTask.create();
+ lockbox.add(task);
+ final TaskLock lock = lockbox.tryLock(TaskLockType.EXCLUSIVE, task,
interval).getTaskLock();
+ Assert.assertNotNull(lock);
+
+ Assert.assertTrue(
+ lockbox.doInCriticalSection(
+ task,
+ Collections.singletonList(smallInterval),
+ CriticalAction.<Boolean>builder().onValidLocks(() ->
true).onInvalidLocks(() -> false).build()
+ )
+ );
+ }
+
+ @Test
+ public void testPreemptionAndDoInCriticalSection() throws Exception
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-01-02");
+ for (int i = 0; i < 5; i++) {
+ final Task task = NoopTask.create();
+ lockbox.add(task);
+ taskStorage.insert(task, TaskStatus.running(task.getId()));
+ Assert.assertTrue(lockbox.tryLock(TaskLockType.SHARED, task,
interval).isOk());
+ }
+
+ final Task highPriorityTask = NoopTask.create(100);
+ lockbox.add(highPriorityTask);
+ taskStorage.insert(highPriorityTask,
TaskStatus.running(highPriorityTask.getId()));
+ final TaskLock lock = lockbox.tryLock(TaskLockType.EXCLUSIVE,
highPriorityTask, interval).getTaskLock();
+ Assert.assertNotNull(lock);
+
+ Assert.assertTrue(
+ lockbox.doInCriticalSection(
+ highPriorityTask,
+ Collections.singletonList(interval),
+ CriticalAction.<Boolean>builder().onValidLocks(() ->
true).onInvalidLocks(() -> false).build()
+ )
+ );
+ }
+
+ @Test
+ public void testDoInCriticalSectionWithRevokedLock() throws Exception
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-01-02");
+ final Task lowPriorityTask = NoopTask.create("task1", 0);
+ final Task highPriorityTask = NoopTask.create("task2", 10);
+ lockbox.add(lowPriorityTask);
+ lockbox.add(highPriorityTask);
+ taskStorage.insert(lowPriorityTask,
TaskStatus.running(lowPriorityTask.getId()));
+ taskStorage.insert(highPriorityTask,
TaskStatus.running(highPriorityTask.getId()));
+
+ final TaskLock lowPriorityLock = lockbox.tryLock(TaskLockType.EXCLUSIVE,
lowPriorityTask, interval).getTaskLock();
+ Assert.assertNotNull(lowPriorityLock);
+ Assert.assertTrue(lockbox.tryLock(TaskLockType.EXCLUSIVE,
highPriorityTask, interval).isOk());
+
Assert.assertTrue(Iterables.getOnlyElement(lockbox.findLocksForTask(lowPriorityTask)).isRevoked());
+
+ Assert.assertFalse(
+ lockbox.doInCriticalSection(
+ lowPriorityTask,
+ Collections.singletonList(interval),
+ CriticalAction.<Boolean>builder().onValidLocks(() ->
true).onInvalidLocks(() -> false).build()
+ )
+ );
+ }
+
+ @Test(timeout = 5000L)
+ public void testAcquireLockAfterRevoked() throws EntryExistsException,
InterruptedException
+ {
+ final Interval interval = Intervals.of("2017-01-01/2017-01-02");
+ final Task lowPriorityTask = NoopTask.create("task1", 0);
+ final Task highPriorityTask = NoopTask.create("task2", 10);
+ lockbox.add(lowPriorityTask);
+ lockbox.add(highPriorityTask);
+ taskStorage.insert(lowPriorityTask,
TaskStatus.running(lowPriorityTask.getId()));
+ taskStorage.insert(highPriorityTask,
TaskStatus.running(highPriorityTask.getId()));
+
+ final TaskLock lowPriorityLock = lockbox.lock(TaskLockType.EXCLUSIVE,
lowPriorityTask, interval).getTaskLock();
+ Assert.assertNotNull(lowPriorityLock);
+ Assert.assertTrue(lockbox.tryLock(TaskLockType.EXCLUSIVE,
highPriorityTask, interval).isOk());
+
Assert.assertTrue(Iterables.getOnlyElement(lockbox.findLocksForTask(lowPriorityTask)).isRevoked());
+
+ lockbox.unlock(highPriorityTask, interval);
+
+ // Acquire again
+ final LockResult lockResult = lockbox.lock(TaskLockType.EXCLUSIVE,
lowPriorityTask, interval);
+ Assert.assertFalse(lockResult.isOk());
Review comment:
I "think" here is what led to the problem state...
1. a kill task is submitted, TaskQueue called `isReady()` on it which
created a lock in DB for it.
2. above task hasn't started running yet, a index task is submitted over
same interval. TaskQueue called `isReady()` on it which revoked kill task's
lock.
3. none of the above tasks are running just yet
4. overlord restarted
5. kill task's `isReady()` now returns false forever and `TaskQueue` never
gives it to `TaskRunner`
5. index task's `isReady()` said true, it was run and finished successfully.
I think the problem happens because `isReady()` doesn't differentiate
between not getting a lock and having a revoked lock . Ideally `TaskQueue`
should notice that kill task's lock is revoked and should set its state to
failed.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]