This is an automated email from the ASF dual-hosted git repository.

heneveld pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brooklyn-server.git


The following commit(s) were added to refs/heads/master by this push:
     new 5c275f3b64 don't remove the location from an entity on stop until it 
is confirmed released
5c275f3b64 is described below

commit 5c275f3b64e8b6c98c6605e39a7dc7d2af1fab1a
Author: Alex Heneveld <[email protected]>
AuthorDate: Fri Jan 27 16:26:59 2023 +0000

    don't remove the location from an entity on stop until it is confirmed 
released
    
    previously there was a window where a location instance might be removed 
from the parent
    before the provisioner releases it, and if the provisioner fails in that 
window the
    reference to the location is lost and cannot be subsequently deleted
---
 .../org/apache/brooklyn/core/entity/Entities.java  |  10 +-
 .../apache/brooklyn/util/core/task/BasicTask.java  |   2 +-
 .../core/mgmt/rebind/RebindTestFixture.java        |  12 +-
 .../lifecycle/MachineLifecycleEffectorTasks.java   |  28 ++-
 .../software/base/SoftwareProcessEntityTest.java   | 265 +++++++++++----------
 .../SoftwareProcessRebindNotRunningEntityTest.java |  59 +++--
 6 files changed, 217 insertions(+), 159 deletions(-)

diff --git a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java 
b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
index cadc693060..dd80796639 100644
--- a/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
+++ b/core/src/main/java/org/apache/brooklyn/core/entity/Entities.java
@@ -693,6 +693,9 @@ public class Entities {
      * Actual actions performed will depend on the entity type and its current 
state.
      */
     public static void destroy(Entity e, boolean unmanageOnErrors) {
+        destroy(e, unmanageOnErrors, null);
+    }
+    public static void destroy(Entity e, boolean unmanageOnErrors, Duration 
timeout) {
         if (isManaged(e)) {
             if (isReadOnly(e)) {
                 unmanage(e);
@@ -701,7 +704,7 @@ public class Entities {
                 List<Exception> errors = MutableList.of();
 
                 try {
-                    if (e instanceof Startable) Entities.invokeEffector(e, e, 
Startable.STOP).getUnchecked();
+                    if (e instanceof Startable) Entities.invokeEffector(e, e, 
Startable.STOP).getUnchecked(timeout);
                 } catch (Exception error) {
                     Exceptions.propagateIfFatal(error);
                     if (!unmanageOnErrors) Exceptions.propagate(error);
@@ -780,6 +783,9 @@ public class Entities {
      * Apps will be stopped+destroyed+unmanaged concurrently, waiting for all 
to complete.
      */
     public static void destroyAll(final ManagementContext mgmt) {
+        destroyAll(mgmt, null);
+    }
+    public static void destroyAll(final ManagementContext mgmt, Duration 
timeout) {
         final int MAX_THREADS = 100;
         
         if (mgmt instanceof NonDeploymentManagementContext) {
@@ -803,7 +809,7 @@ public class Entities {
                     public void run() {
                         log.debug("destroying app "+app+" (managed? 
"+isManaged(app)+"; mgmt is "+mgmt+")");
                         try {
-                            destroy(app, true);
+                            destroy(app, true, timeout);
                             log.debug("destroyed app "+app+"; mgmt now "+mgmt);
                         } catch (Exception e) {
                             log.warn("problems destroying app "+app+" (mgmt 
now "+mgmt+", will rethrow at least one exception): "+e);
diff --git 
a/core/src/main/java/org/apache/brooklyn/util/core/task/BasicTask.java 
b/core/src/main/java/org/apache/brooklyn/util/core/task/BasicTask.java
index 6616eb88d8..44d380fa30 100644
--- a/core/src/main/java/org/apache/brooklyn/util/core/task/BasicTask.java
+++ b/core/src/main/java/org/apache/brooklyn/util/core/task/BasicTask.java
@@ -495,7 +495,7 @@ public class BasicTask<T> implements TaskInternal<T> {
             if (cancelled) throw new CancellationException();
             if (internalFuture == null) {
                 synchronized (this) {
-                    long remaining = end - System.currentTimeMillis();
+                    long remaining = end==null ? 100 : end - 
System.currentTimeMillis();
                     if (internalFuture==null && remaining>0)
                         wait(remaining);
                 }
diff --git 
a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindTestFixture.java
 
b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindTestFixture.java
index 3c79afc5fc..1159775654 100644
--- 
a/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindTestFixture.java
+++ 
b/core/src/test/java/org/apache/brooklyn/core/mgmt/rebind/RebindTestFixture.java
@@ -232,14 +232,18 @@ public abstract class RebindTestFixture<T extends 
StartableApplication> {
 
     @AfterMethod(alwaysRun=true)
     public void tearDown() throws Exception {
-        if (origApp != null) 
Entities.destroyAll(origApp.getManagementContext());
-        if (newApp != null) Entities.destroyAll(newApp.getManagementContext());
-        if (newManagementContext != null) 
Entities.destroyAll(newManagementContext);
+        tearDown(null);
+    }
+
+    protected void tearDown(Duration timeout) throws Exception {
+        if (origApp != null) 
Entities.destroyAll(origApp.getManagementContext(), timeout);
+        if (newApp != null) Entities.destroyAll(newApp.getManagementContext(), 
timeout);
+        if (newManagementContext != null) 
Entities.destroyAll(newManagementContext, timeout);
         origApp = null;
         newApp = null;
         newManagementContext = null;
 
-        if (origManagementContext != null) 
Entities.destroyAll(origManagementContext);
+        if (origManagementContext != null) 
Entities.destroyAll(origManagementContext, timeout);
         if (mementoDir != null) 
FileBasedObjectStore.deleteCompletely(mementoDir);
         if (mementoDirBackup != null) 
FileBasedObjectStore.deleteCompletely(mementoDir);
         origManagementContext = null;
diff --git 
a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
 
b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
index 8807d001d1..b63f6e08a0 100644
--- 
a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
+++ 
b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
@@ -1101,8 +1101,25 @@ public abstract class MachineLifecycleEffectorTasks {
     }
 
     protected StopMachineDetails<Integer> 
stopProvisionedMachine(MachineProvisioningLocation<MachineLocation> 
provisioner, Location machine, ConfigBag parameters) {
-        clearEntityLocationAttributes(machine);
-        provisioner.release((MachineLocation) machine);
+        boolean succeeded = false;
+        try {
+            provisioner.release((MachineLocation) machine);
+            clearEntityLocationAttributes(machine, true);
+            succeeded = true;
+        } finally {
+            if (!succeeded) {
+                if (!Locations.isManaged(machine)) {
+                    log.warn("Stopping "+machine+" failed, will rethrow 
exception shortly, but as the location is no longer managed it is being cleared 
on any entities");
+                    // there was a failure, but before failing, it proceeded 
far enough that the location is no longer managed by brooklyn
+                    // which means the location will not be usable, so let's 
clear it
+                    clearEntityLocationAttributes(machine, true);
+                } else {
+                    log.debug("Stopping "+machine+" failed; previously 
attributes would have been cleared but now they are being kept to facilitate 
deletion in future");
+                    // 2023-01 we no longer remove sensors
+                    // clearEntityLocationAttributes(machine, false);
+                }
+            }
+        }
         return new StopMachineDetails<Integer>("Decommissioned "+machine, 1) 
{};
     }
 
@@ -1141,7 +1158,7 @@ public abstract class MachineLifecycleEffectorTasks {
             throw new UnsupportedOperationException("Location provisioner 
cannot suspend machines: " + provisioner);
         }
 
-        clearEntityLocationAttributes(machine);
+        clearEntityLocationAttributes(machine, false);
         
SuspendsMachines.class.cast(provisioner).suspendMachine(MachineLocation.class.cast(machine));
 
         return new StopMachineDetails<>("Suspended " + machine, 1);
@@ -1152,7 +1169,10 @@ public abstract class MachineLifecycleEffectorTasks {
      * and removes the given machine from its locations.
      */
     protected void clearEntityLocationAttributes(Location machine) {
-        entity().removeLocations(ImmutableList.of(machine));
+        clearEntityLocationAttributes(machine, true);
+    }
+    protected void clearEntityLocationAttributes(Location machine, boolean 
removeLocation) {
+        if (removeLocation) 
entity().removeLocations(ImmutableList.of(machine));
         entity().sensors().set(Attributes.HOSTNAME, null);
         entity().sensors().set(Attributes.ADDRESS, null);
         entity().sensors().set(Attributes.SUBNET_HOSTNAME, null);
diff --git 
a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessEntityTest.java
 
b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessEntityTest.java
index 635f2fd09c..674907b1c1 100644
--- 
a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessEntityTest.java
+++ 
b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessEntityTest.java
@@ -444,138 +444,139 @@ public class SoftwareProcessEntityTest extends 
BrooklynAppUnitTestSupport {
         
doTestReleaseEvenIfErrorDuringStop(SimulatedFailInChildOnStopDriver.class);
     }
 
-    @Test
-    public void testDoubleStopEntity() {
-        ReflectiveEntityDriverFactory f = 
((BasicEntityDriverManager)mgmt.getEntityDriverManager()).getReflectiveDriverFactory();
-        f.addClassFullNameMapping(EmptySoftwareProcessDriver.class.getName(), 
MinimalEmptySoftwareProcessTestDriver.class.getName());
-
-        // Second stop on SoftwareProcess will return early, while the first 
stop is still in progress
-        // This causes the app to shutdown prematurely, leaking machines.
-        EntityManager emgr = mgmt.getEntityManager();
-        EntitySpec<TestApplication> appSpec = 
EntitySpec.create(TestApplication.class);
-        TestApplication app = emgr.createEntity(appSpec);
-        emgr.manage(app);
-        EntitySpec<?> latchEntitySpec = 
EntitySpec.create(EmptySoftwareProcess.class);
-        Entity entity = app.createAndManageChild(latchEntitySpec);
-
-        final ReleaseLatchLocation loc = 
mgmt.getLocationManager().createLocation(LocationSpec.create(ReleaseLatchLocation.class));
-        try {
-            app.start(ImmutableSet.of(loc));
-            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
-    
-            final Task<Void> firstStop = entity.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());
-            // Wait until first task tries to release the location, at this 
point the entity's reference 
-            // to the location is already cleared.
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(loc.isBlocked());
-                }
-            });
-
-            assertEquals(ServiceStateLogic.getExpectedState(entity), 
Lifecycle.STOPPING);
-            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPING);
-
-            // Subsequent stops will end quickly - no location to release,
-            // while the first one is still releasing the machine.
-            final Task<Void> secondStop = entity.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());;
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(secondStop.isDone());
-                }
-            });
-    
-            // Entity state is supposed to be STOPPED even though first 
location is still releasing. This is because the second
-            // release completed successfully. It's debatable whether this is 
the right behaviour - we could be calling the STOP
-            // effector exactly because the first call is blocked. The test is 
asserting the current behaviour.
-            assertEquals(ServiceStateLogic.getExpectedState(entity), 
Lifecycle.STOPPED);
-            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPED);
-            Asserts.succeedsContinually(new Runnable() {
-                @Override
-                public void run() {
-                    assertFalse(firstStop.isDone());
-                }
-            });
-
-            loc.unblock();
-
-            // After the location is released, first task ends as well.
-            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPED);
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(firstStop.isDone());
-                }
-            });
-
-        } finally {
-            loc.unblock();
-        }
-
-    }
-
-    @Test
-    public void testDoubleStopApp() {
-        ReflectiveEntityDriverFactory f = 
((BasicEntityDriverManager)mgmt.getEntityDriverManager()).getReflectiveDriverFactory();
-        f.addClassFullNameMapping(EmptySoftwareProcessDriver.class.getName(), 
MinimalEmptySoftwareProcessTestDriver.class.getName());
-
-        // Second stop on SoftwareProcess will return early, while the first 
stop is still in progress
-        // This causes the app to shutdown prematurely, leaking machines.
-        EntityManager emgr = mgmt.getEntityManager();
-        EntitySpec<TestApplication> appSpec = 
EntitySpec.create(TestApplication.class);
-        final TestApplication app = emgr.createEntity(appSpec);
-        emgr.manage(app);
-        EntitySpec<?> latchEntitySpec = 
EntitySpec.create(EmptySoftwareProcess.class);
-        final Entity entity = app.createAndManageChild(latchEntitySpec);
-
-        final ReleaseLatchLocation loc = 
mgmt.getLocationManager().createLocation(LocationSpec.create(ReleaseLatchLocation.class));
-        try {
-            app.start(ImmutableSet.of(loc));
-            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
-    
-            final Task<Void> firstStop = app.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());
-            // Wait until first task tries to release the location, at this 
point the entity's reference 
-            // to the location is already cleared.
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(loc.isBlocked());
-                }
-            });
-    
-            // Subsequent stops will end quickly - no location to release,
-            // while the first one is still releasing the machine.
-            final Task<Void> secondStop = app.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());;
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(secondStop.isDone());
-                }
-            });
-    
-            // Since second stop succeeded the app will get unmanaged.
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(!Entities.isManaged(entity));
-                    assertTrue(!Entities.isManaged(app));
-                }
-            });
-    
-            // Unmanage will cancel the first task
-            Asserts.succeedsEventually(new Runnable() {
-                @Override
-                public void run() {
-                    assertTrue(firstStop.isDone());
-                }
-            });
-        } finally {
-            // We still haven't unblocked the location release, but entity is 
already unmanaged.
-            // Double STOP on an application could leak locations!!!
-            loc.unblock();
-        }
-    }
+    // 2023-01 semantics changed, so that first stop does not clear location 
until after
+//    @Test
+//    public void testDoubleStopEntity() {
+//        ReflectiveEntityDriverFactory f = 
((BasicEntityDriverManager)mgmt.getEntityDriverManager()).getReflectiveDriverFactory();
+//        
f.addClassFullNameMapping(EmptySoftwareProcessDriver.class.getName(), 
MinimalEmptySoftwareProcessTestDriver.class.getName());
+//
+//        // Second stop on SoftwareProcess will return early, while the first 
stop is still in progress
+//        // This causes the app to shutdown prematurely, leaking machines.
+//        EntityManager emgr = mgmt.getEntityManager();
+//        EntitySpec<TestApplication> appSpec = 
EntitySpec.create(TestApplication.class);
+//        TestApplication app = emgr.createEntity(appSpec);
+//        emgr.manage(app);
+//        EntitySpec<?> latchEntitySpec = 
EntitySpec.create(EmptySoftwareProcess.class);
+//        Entity entity = app.createAndManageChild(latchEntitySpec);
+//
+//        final ReleaseLatchLocation loc = 
mgmt.getLocationManager().createLocation(LocationSpec.create(ReleaseLatchLocation.class));
+//        try {
+//            app.start(ImmutableSet.of(loc));
+//            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
+//
+//            final Task<Void> firstStop = entity.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());
+//            // Wait until first task tries to release the location, at this 
point the entity's reference
+//            // to the location is already cleared.
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(loc.isBlocked());
+//                }
+//            });
+//
+//            assertEquals(ServiceStateLogic.getExpectedState(entity), 
Lifecycle.STOPPING);
+//            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPING);
+//
+//            // Subsequent stops will end quickly - no location to release,
+//            // while the first one is still releasing the machine.
+//            final Task<Void> secondStop = entity.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());;
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(secondStop.isDone());
+//                }
+//            });
+//
+//            // Entity state is supposed to be STOPPED even though first 
location is still releasing. This is because the second
+//            // release completed successfully. It's debatable whether this 
is the right behaviour - we could be calling the STOP
+//            // effector exactly because the first call is blocked. The test 
is asserting the current behaviour.
+//            assertEquals(ServiceStateLogic.getExpectedState(entity), 
Lifecycle.STOPPED);
+//            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPED);
+//            Asserts.succeedsContinually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertFalse(firstStop.isDone());
+//                }
+//            });
+//
+//            loc.unblock();
+//
+//            // After the location is released, first task ends as well.
+//            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.STOPPED);
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(firstStop.isDone());
+//                }
+//            });
+//
+//        } finally {
+//            loc.unblock();
+//        }
+//
+//    }
+//
+//    @Test
+//    public void testDoubleStopApp() {
+//        ReflectiveEntityDriverFactory f = 
((BasicEntityDriverManager)mgmt.getEntityDriverManager()).getReflectiveDriverFactory();
+//        
f.addClassFullNameMapping(EmptySoftwareProcessDriver.class.getName(), 
MinimalEmptySoftwareProcessTestDriver.class.getName());
+//
+//        // Second stop on SoftwareProcess will return early, while the first 
stop is still in progress
+//        // This causes the app to shutdown prematurely, leaking machines.
+//        EntityManager emgr = mgmt.getEntityManager();
+//        EntitySpec<TestApplication> appSpec = 
EntitySpec.create(TestApplication.class);
+//        final TestApplication app = emgr.createEntity(appSpec);
+//        emgr.manage(app);
+//        EntitySpec<?> latchEntitySpec = 
EntitySpec.create(EmptySoftwareProcess.class);
+//        final Entity entity = app.createAndManageChild(latchEntitySpec);
+//
+//        final ReleaseLatchLocation loc = 
mgmt.getLocationManager().createLocation(LocationSpec.create(ReleaseLatchLocation.class));
+//        try {
+//            app.start(ImmutableSet.of(loc));
+//            EntityAsserts.assertAttributeEquals(entity, 
Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
+//
+//            final Task<Void> firstStop = app.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());
+//            // Wait until first task tries to release the location, at this 
point the entity's reference
+//            // to the location is already cleared.
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(loc.isBlocked());
+//                }
+//            });
+//
+//            // Subsequent stops will end quickly - no location to release,
+//            // while the first one is still releasing the machine.
+//            final Task<Void> secondStop = app.invoke(Startable.STOP, 
ImmutableMap.<String, Object>of());;
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(secondStop.isDone());
+//                }
+//            });
+//
+//            // Since second stop succeeded the app will get unmanaged.
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(!Entities.isManaged(entity));
+//                    assertTrue(!Entities.isManaged(app));
+//                }
+//            });
+//
+//            // Unmanage will cancel the first task
+//            Asserts.succeedsEventually(new Runnable() {
+//                @Override
+//                public void run() {
+//                    assertTrue(firstStop.isDone());
+//                }
+//            });
+//        } finally {
+//            // We still haven't unblocked the location release, but entity 
is already unmanaged.
+//            // Double STOP on an application could leak locations!!!
+//            loc.unblock();
+//        }
+//    }
 
     @Test
     public void testOpenPortsWithPortRangeConfig() throws Exception {
diff --git 
a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
 
b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
index 6a58efd0ee..7d1849af97 100644
--- 
a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
+++ 
b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
@@ -23,10 +23,7 @@ import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertTrue;
 
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.Callable;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Executors;
@@ -60,6 +57,7 @@ import org.apache.brooklyn.core.test.entity.TestApplication;
 import org.apache.brooklyn.location.byon.FixedListMachineProvisioningLocation;
 import org.apache.brooklyn.location.ssh.SshMachineLocation;
 import org.apache.brooklyn.test.Asserts;
+import org.apache.brooklyn.util.collections.MutableSet;
 import org.apache.brooklyn.util.core.internal.ssh.RecordingSshTool;
 import 
org.apache.brooklyn.util.core.internal.ssh.RecordingSshTool.CustomResponse;
 import 
org.apache.brooklyn.util.core.internal.ssh.RecordingSshTool.CustomResponseGenerator;
@@ -129,10 +127,12 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
                 while (latch.getCount() > 0) {
                     latch.countDown();
                 }
+                if (latch instanceof TerminableCountDownLatch) 
((TerminableCountDownLatch)latch).terminate();
             }
-            super.tearDown();
+            super.tearDown(Duration.millis(10));   // stops here can be 
blocked, don't wait on them
             if (executor != null) executor.shutdownNow();
         } finally {
+            latches.clear();
             RecordingSshTool.clear();
         }
     }
@@ -181,7 +181,7 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             @Override
             public CustomResponse generate(ExecParams execParams) throws 
Exception {
                 launchCalledLatch.countDown();
-                launchBlockedLatch.await();
+                awaitOrFail(launchBlockedLatch, Duration.TEN_SECONDS);
                 return new CustomResponse(0, "", "");
             }});
         
@@ -209,7 +209,7 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             @Override
             public CustomResponse generate(ExecParams execParams) throws 
Exception {
                 stopCalledLatch.countDown();
-                stopBlockedLatch.await();
+                awaitOrFail(stopBlockedLatch, Duration.TEN_SECONDS);
                 return new CustomResponse(0, "", "");
             }});
         
@@ -292,7 +292,9 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
         
         
         assertMarkedAsOnfire(newEntity, Lifecycle.STOPPING);
-        assertMarkedAsVmLost(newEntity, Lifecycle.STOPPING);
+
+        // 2023-01 we no longer remove VMs until we know they have been 
released, so this error does not appear
+        //assertMarkedAsVmLost(newEntity, Lifecycle.STOPPING);
 
         // Expect the marker to have been cleared on rebind (sensible because 
task is not running).
         EntityAsserts.assertAttributeEquals(newEntity, 
AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE, null);
@@ -307,7 +309,7 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             @Override
             public CustomResponse generate(ExecParams execParams) throws 
Exception {
                 launchCalledLatch.countDown();
-                launchBlockedLatch.await();
+                awaitOrFail(launchBlockedLatch, Duration.TEN_SECONDS);
                 return new CustomResponse(0, "", "");
             }});
         
@@ -370,13 +372,38 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             }});
     }
 
-    protected void awaitOrFail(CountDownLatch latch, Duration timeout) throws 
Exception {
+    protected static void awaitOrFail(CountDownLatch latch, Duration timeout) 
throws Exception {
+        if (latch instanceof TerminableCountDownLatch && 
((TerminableCountDownLatch)latch).terminated) return;
+
         boolean success = latch.await(timeout.toMilliseconds(), 
TimeUnit.MILLISECONDS);
         assertTrue(success, "latch "+latch+" not satisfied in "+timeout);
     }
-    
+
+    public static class TerminableCountDownLatch extends CountDownLatch {
+        public TerminableCountDownLatch(int count) {
+            super(count);
+        }
+        public boolean terminated = false;
+        public void terminate() {
+            terminated = true;
+            awaitingThreads.forEach(Thread::interrupt);
+        }
+
+        public Set<Thread> awaitingThreads = 
Collections.synchronizedSet(MutableSet.of());
+        @Override
+        public boolean await(long timeout, TimeUnit unit) throws 
InterruptedException {
+            if (terminated) throw new IllegalStateException("Not permitted to 
await on a terminated latch");
+            try {
+                awaitingThreads.add(Thread.currentThread());
+                return super.await(timeout, unit);
+            } finally {
+                awaitingThreads.remove(Thread.currentThread());
+            }
+        }
+    }
+
     protected CountDownLatch newLatch(int count) {
-        CountDownLatch result = new CountDownLatch(count);
+        CountDownLatch result = new TerminableCountDownLatch(count);
         latches.add(result);
         return result;
     }
@@ -443,8 +470,8 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             
             if (calledLatch != null) calledLatch.countDown();
             try {
-                if (blockedLatch != null) blockedLatch.await();
-            } catch (InterruptedException e) {
+                if (blockedLatch != null) awaitOrFail(blockedLatch, 
Duration.TEN_SECONDS);
+            } catch (Exception e) {
                 throw Exceptions.propagate(e);
             }
             return 
getManagementContext().getLocationManager().createLocation(machineSpec);
@@ -459,8 +486,8 @@ public class SoftwareProcessRebindNotRunningEntityTest 
extends RebindTestFixture
             
             if (calledLatch != null) calledLatch.countDown();
             try {
-                if (blockedLatch != null) blockedLatch.await();
-            } catch (InterruptedException e) {
+                if (blockedLatch != null) awaitOrFail(blockedLatch, 
Duration.TEN_SECONDS);
+            } catch (Exception e) {
                 throw Exceptions.propagate(e);
             }
         }

Reply via email to