Repository: incubator-brooklyn Updated Branches: refs/heads/master 42e9aad4e -> 75194760d
misc cleanups for HA and shutdown * HA list in GUI reports if data is stale (makes it obvious is some servers are likely dead) * On "Clear HA nodes" false masters are removed (fix bug where lots of masters, including stale, aren't cleared) * On shutdown, RebindManager.waitForPending wasn't doing the right thing when invoked by stop, meaning state wasn't being written (really bad if you click "stop all apps", as the final deletion isn't actually persisted!) Project: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/commit/eef78912 Tree: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/tree/eef78912 Diff: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/diff/eef78912 Branch: refs/heads/master Commit: eef78912bb1ccc8193d63b6f5e83a71f6083bc72 Parents: fa09efc Author: Alex Heneveld <[email protected]> Authored: Sat May 9 12:58:29 2015 +0100 Committer: Alex Heneveld <[email protected]> Committed: Sun May 10 10:02:33 2015 -0700 ---------------------------------------------------------------------- .../catalog/internal/CatalogInitialization.java | 2 +- .../rebind/PeriodicDeltaChangeListener.java | 169 ++++++++++--------- .../brooklyn/entity/rebind/RebindIteration.java | 6 +- .../entity/rebind/RebindManagerImpl.java | 4 +- .../ha/HighAvailabilityManagerImpl.java | 10 +- .../brooklyn/management/ha/HotStandbyTest.java | 14 +- .../main/webapp/assets/js/view/ha-summary.js | 98 ++++++----- .../brooklyn/rest/filter/LoggingFilter.java | 14 +- .../brooklyn/rest/resources/ServerResource.java | 69 +++++--- .../java/brooklyn/util/javalang/Threads.java | 12 +- 10 files changed, 231 insertions(+), 167 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/main/java/brooklyn/catalog/internal/CatalogInitialization.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/brooklyn/catalog/internal/CatalogInitialization.java b/core/src/main/java/brooklyn/catalog/internal/CatalogInitialization.java index 1710384..ded7dc4 100644 --- a/core/src/main/java/brooklyn/catalog/internal/CatalogInitialization.java +++ b/core/src/main/java/brooklyn/catalog/internal/CatalogInitialization.java @@ -128,7 +128,7 @@ public class CatalogInitialization implements ManagementContextInjectable { if (!catalog.getCatalog().isLoaded()) { catalog.load(); } else { - if (hasRunOfficial || hasRunBestEffort) { + if (needsInitial && (hasRunOfficial || hasRunBestEffort)) { // an indication that something caused it to load early; not severe, but unusual log.warn("Catalog initialization has not properly run but management context has a catalog; re-populating, possibly overwriting items installed during earlier access (it may have been an early web request)"); catalog.reset(ImmutableList.<CatalogItem<?,?>>of()); http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/main/java/brooklyn/entity/rebind/PeriodicDeltaChangeListener.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/brooklyn/entity/rebind/PeriodicDeltaChangeListener.java b/core/src/main/java/brooklyn/entity/rebind/PeriodicDeltaChangeListener.java index 281f4fa..cd33f01 100644 --- a/core/src/main/java/brooklyn/entity/rebind/PeriodicDeltaChangeListener.java +++ b/core/src/main/java/brooklyn/entity/rebind/PeriodicDeltaChangeListener.java @@ -25,7 +25,6 @@ import java.util.concurrent.Callable; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicLong; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,12 +53,11 @@ import brooklyn.util.task.ScheduledTask; import brooklyn.util.task.Tasks; import brooklyn.util.time.CountdownTimer; import brooklyn.util.time.Duration; -import brooklyn.util.time.Time; -import com.google.common.collect.Lists; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; /** @@ -164,14 +162,12 @@ public class PeriodicDeltaChangeListener implements ChangeListener { private final PersistenceExceptionHandler exceptionHandler; private final Duration period; - - private final AtomicLong writeCount = new AtomicLong(); - + private DeltaCollector deltaCollector = new DeltaCollector(); private volatile boolean running = false; - private volatile boolean stopped = false; + private volatile boolean stopping = false, stopCompleted = false; private volatile ScheduledTask scheduledTask; @@ -180,7 +176,7 @@ public class PeriodicDeltaChangeListener implements ChangeListener { private final boolean persistFeedsEnabled; private final Semaphore persistingMutex = new Semaphore(1); - private final Object startMutex = new Object(); + private final Object startStopMutex = new Object(); private PersistenceActivityMetrics metrics; @@ -198,42 +194,20 @@ public class PeriodicDeltaChangeListener implements ChangeListener { @SuppressWarnings("unchecked") public void start() { - synchronized (startMutex) { + synchronized (startStopMutex) { if (running || (scheduledTask!=null && !scheduledTask.isDone())) { LOG.warn("Request to start "+this+" when already running - "+scheduledTask+"; ignoring"); return; } - stopped = false; + stopCompleted = false; running = true; Callable<Task<?>> taskFactory = new Callable<Task<?>>() { @Override public Task<Void> call() { return Tasks.<Void>builder().dynamic(false).name("periodic-persister").body(new Callable<Void>() { public Void call() { - Stopwatch timer = Stopwatch.createStarted(); - try { - persistNow(); - metrics.noteSuccess(Duration.of(timer)); - return null; - } catch (RuntimeInterruptedException e) { - LOG.debug("Interrupted persisting change-delta (rethrowing)", e); - metrics.noteFailure(Duration.of(timer)); - metrics.noteError(e.toString()); - Thread.currentThread().interrupt(); - return null; - } catch (Exception e) { - // Don't rethrow: the behaviour of executionManager is different from a scheduledExecutorService, - // if we throw an exception, then our task will never get executed again - LOG.error("Problem persisting change-delta", e); - metrics.noteFailure(Duration.of(timer)); - metrics.noteError(e.toString()); - return null; - } catch (Throwable t) { - LOG.warn("Problem persisting change-delta (rethrowing)", t); - metrics.noteFailure(Duration.of(timer)); - metrics.noteError(t.toString()); - throw Exceptions.propagate(t); - } + persistNowSafely(false); + return null; }}).build(); } }; @@ -247,68 +221,73 @@ public class PeriodicDeltaChangeListener implements ChangeListener { stop(Duration.TEN_SECONDS, Duration.ONE_SECOND); } void stop(Duration timeout, Duration graceTimeoutForSubsequentOperations) { - stopped = true; - running = false; - - if (scheduledTask != null) { - CountdownTimer expiry = timeout.countdownTimer(); - scheduledTask.cancel(false); + synchronized (startStopMutex) { + running = false; try { - waitForPendingComplete(expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); - } catch (Exception e) { - throw Exceptions.propagate(e); - } - scheduledTask.blockUntilEnded(expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); - scheduledTask.cancel(true); - boolean reallyEnded = Tasks.blockUntilInternalTasksEnded(scheduledTask, expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); - if (!reallyEnded) { - LOG.warn("Persistence tasks took too long to complete when stopping persistence (ignoring): "+scheduledTask); - } - scheduledTask = null; - } + stopping = true; + + if (scheduledTask != null) { + CountdownTimer expiry = timeout.countdownTimer(); + try { + scheduledTask.cancel(false); + waitForPendingComplete(expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); + } catch (Exception e) { + throw Exceptions.propagate(e); + } + scheduledTask.blockUntilEnded(expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); + scheduledTask.cancel(true); + boolean reallyEnded = Tasks.blockUntilInternalTasksEnded(scheduledTask, expiry.getDurationRemaining().lowerBound(Duration.ZERO).add(graceTimeoutForSubsequentOperations)); + if (!reallyEnded) { + LOG.warn("Persistence tasks took too long to terminate, when stopping persistence, although pending changes were persisted (ignoring): "+scheduledTask); + } + scheduledTask = null; + } - // Discard all state that was waiting to be persisted - synchronized (this) { - deltaCollector = new DeltaCollector(); + // Discard all state that was waiting to be persisted + synchronized (this) { + deltaCollector = new DeltaCollector(); + } + } finally { + stopCompleted = true; + stopping = false; + } } } /** - * This method must only be used for testing. If required in production, then revisit implementation! * @deprecated since 0.7.0, use {@link #waitForPendingComplete(Duration)} */ @VisibleForTesting public void waitForPendingComplete(long timeout, TimeUnit unit) throws InterruptedException, TimeoutException { waitForPendingComplete(Duration.of(timeout, unit)); } + /** Waits for any in-progress writes to be completed then for or any unwritten data to be written. */ @VisibleForTesting public void waitForPendingComplete(Duration timeout) throws InterruptedException, TimeoutException { - // Every time we finish writing, we increment a counter. We note the current val, and then - // wait until we can guarantee that a complete additional write has been done. Not sufficient - // to wait for `writeCount > origWriteCount` because we might have read the value when almost - // finished a write. + if (!isActive() && !stopping) return; - long startTime = System.currentTimeMillis(); - long maxEndtime = timeout.isPositive() ? startTime + timeout.toMillisecondsRoundingUp() : Long.MAX_VALUE; - long origWriteCount = writeCount.get(); - while (true) { - if (!isActive()) { - return; // no pending activity; - } else if (writeCount.get() > (origWriteCount+1)) { - return; - } - - if (System.currentTimeMillis() > maxEndtime) { - throw new TimeoutException("Timeout waiting for pending complete of rebind-periodic-delta, after "+Time.makeTimeStringRounded(timeout)); + CountdownTimer timer = timeout.isPositive() ? CountdownTimer.newInstanceStarted(timeout) : CountdownTimer.newInstancePaused(Duration.PRACTICALLY_FOREVER); + // wait for mutex, so we aren't tricked by an in-progress who has already recycled the collector + if (persistingMutex.tryAcquire(timer.getDurationRemaining().toMilliseconds(), TimeUnit.MILLISECONDS)) { + try { + // now no one else is writing + if (!deltaCollector.isEmpty()) { + // but there is data that needs to be written + persistNowSafely(true); + } + } finally { + persistingMutex.release(); } - Thread.sleep(1); + } else { + // someone else has been writing for the entire time + throw new TimeoutException("Timeout waiting for completion of in-progress write of rebind-periodic-delta, after "+timer.getDurationElapsed()); } } /** - * Indicates whether to persist things now. Even when not active, we will still store what needs - * to be persisted unless {@link #isStopped()}. + * Indicates whether persistence is active. + * Even when not active, changes will still be tracked unless {@link #isStopped()}. */ private boolean isActive() { return running && persister != null && !isStopped(); @@ -318,7 +297,7 @@ public class PeriodicDeltaChangeListener implements ChangeListener { * Whether we have been stopped, in which case will not persist or store anything. */ private boolean isStopped() { - return stopped || executionContext.isShutdown(); + return stopping || stopCompleted || executionContext.isShutdown(); } private void addReferencedObjects(DeltaCollector deltaCollector) { @@ -348,13 +327,40 @@ public class PeriodicDeltaChangeListener implements ChangeListener { } @VisibleForTesting - public void persistNow() { - if (!isActive()) { + public boolean persistNowSafely(boolean alreadyHasMutex) { + Stopwatch timer = Stopwatch.createStarted(); + try { + persistNowInternal(alreadyHasMutex); + metrics.noteSuccess(Duration.of(timer)); + return true; + } catch (RuntimeInterruptedException e) { + LOG.debug("Interrupted persisting change-delta (rethrowing)", e); + metrics.noteFailure(Duration.of(timer)); + metrics.noteError(e.toString()); + Thread.currentThread().interrupt(); + return false; + } catch (Exception e) { + // Don't rethrow: the behaviour of executionManager is different from a scheduledExecutorService, + // if we throw an exception, then our task will never get executed again + LOG.error("Problem persisting change-delta", e); + metrics.noteFailure(Duration.of(timer)); + metrics.noteError(e.toString()); + return false; + } catch (Throwable t) { + LOG.warn("Problem persisting change-delta (rethrowing)", t); + metrics.noteFailure(Duration.of(timer)); + metrics.noteError(t.toString()); + throw Exceptions.propagate(t); + } + } + + protected void persistNowInternal(boolean alreadyHasMutex) { + if (!isActive() && !stopping) { return; } try { - persistingMutex.acquire(); - if (!isActive()) return; + if (!alreadyHasMutex) persistingMutex.acquire(); + if (!isActive() && !stopping) return; // Atomically switch the delta, so subsequent modifications will be done in the // next scheduled persist @@ -419,8 +425,7 @@ public class PeriodicDeltaChangeListener implements ChangeListener { LOG.debug("Problem persisting, but no longer active (ignoring)", e); } } finally { - writeCount.incrementAndGet(); - persistingMutex.release(); + if (!alreadyHasMutex) persistingMutex.release(); } } http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/main/java/brooklyn/entity/rebind/RebindIteration.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/brooklyn/entity/rebind/RebindIteration.java b/core/src/main/java/brooklyn/entity/rebind/RebindIteration.java index 136cb5b..6124a54 100644 --- a/core/src/main/java/brooklyn/entity/rebind/RebindIteration.java +++ b/core/src/main/java/brooklyn/entity/rebind/RebindIteration.java @@ -301,7 +301,7 @@ public abstract class RebindIteration { @SuppressWarnings("deprecation") protected void rebuildCatalog() { - // build catalog early so we can load other things + // Build catalog early so we can load other things checkEnteringPhase(2); // Instantiate catalog items @@ -341,7 +341,7 @@ public abstract class RebindIteration { } } - // see notes in CatalogInitialization + // See notes in CatalogInitialization Collection<CatalogItem<?, ?>> catalogItems = rebindContext.getCatalogItems(); CatalogInitialization catInit = ((ManagementContextInternal)managementContext).getCatalogInitialization(); @@ -398,6 +398,8 @@ public abstract class RebindIteration { } // TODO in read-only mode, perhaps do this less frequently than entities etc ? + // both in RW and in RO mode, the first run reads the initialization data; + // maybe not desired for RO as it defers problems, although if it's standalone it is desired catInit.populateCatalog(needsInitialCatalog, itemsForResettingCatalog); } http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java b/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java index 71d5218..caf04c5 100644 --- a/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java +++ b/core/src/main/java/brooklyn/entity/rebind/RebindManagerImpl.java @@ -447,7 +447,9 @@ public class RebindManagerImpl implements RebindManager { } persistenceStoreAccess.checkpoint(memento, exceptionHandler); } else { - persistenceRealChangeListener.persistNow(); + if (!persistenceRealChangeListener.persistNowSafely(false)) { + throw new IllegalStateException("Forced persistence failed; see logs fore more detail"); + } } } http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java index 70bb13d..ab033fd 100644 --- a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java +++ b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java @@ -73,6 +73,7 @@ import brooklyn.util.time.Time; import com.google.common.annotations.Beta; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; +import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Stopwatch; import com.google.common.base.Ticker; @@ -606,9 +607,14 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager { return; } brooklyn.management.ha.ManagementPlaneSyncRecordDeltaImpl.Builder db = ManagementPlaneSyncRecordDeltaImpl.builder(); - for (Map.Entry<String,ManagementNodeSyncRecord> node: plane.getManagementNodes().entrySet()) - if (!ManagementNodeState.MASTER.equals(node.getValue().getStatus())) + for (Map.Entry<String,ManagementNodeSyncRecord> node: plane.getManagementNodes().entrySet()) { + // only keep a node if it both claims master and is recognised as master; + // else ex-masters who died are kept around! + if (!ManagementNodeState.MASTER.equals(node.getValue().getStatus()) || + !Objects.equal(plane.getMasterNodeId(), node.getValue().getNodeId())) { db.removedNodeId(node.getKey()); + } + } persister.delta(db.build()); // then get, so model is updated loadManagementPlaneSyncRecord(true); http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/core/src/test/java/brooklyn/management/ha/HotStandbyTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/brooklyn/management/ha/HotStandbyTest.java b/core/src/test/java/brooklyn/management/ha/HotStandbyTest.java index 9c36eb8..f33f716 100644 --- a/core/src/test/java/brooklyn/management/ha/HotStandbyTest.java +++ b/core/src/test/java/brooklyn/management/ha/HotStandbyTest.java @@ -179,7 +179,7 @@ public class HotStandbyTest { // TestApplication app = ApplicationBuilder.newManagedApp(EntitySpec.create(TestApplication.class).impl(TestApplicationNoEnrichersImpl.class), n1.mgmt); app.setDisplayName("First App"); app.start(MutableList.<Location>of()); - app.setConfig(TestEntity.CONF_NAME, "first-app"); + app.config().set(TestEntity.CONF_NAME, "first-app"); app.setAttribute(TestEntity.SEQUENCE, 3); forcePersistNow(n1); @@ -251,7 +251,7 @@ public class HotStandbyTest { // test changes app.setDisplayName("First App Renamed"); - app.setConfig(TestEntity.CONF_NAME, "first-app-renamed"); + app.config().set(TestEntity.CONF_NAME, "first-app-renamed"); app.setAttribute(TestEntity.SEQUENCE, 4); appRO = expectRebindSequenceNumber(n1, n2, app, 4, true); @@ -262,7 +262,7 @@ public class HotStandbyTest { // and change again for good measure! app.setDisplayName("First App"); - app.setConfig(TestEntity.CONF_NAME, "first-app-restored"); + app.config().set(TestEntity.CONF_NAME, "first-app-restored"); app.setAttribute(TestEntity.SEQUENCE, 5); appRO = expectRebindSequenceNumber(n1, n2, app, 5, true); @@ -296,7 +296,7 @@ public class HotStandbyTest { TestEntity child = app.addChild(EntitySpec.create(TestEntity.class).configure(TestEntity.CONF_NAME, "first-child")); Entities.manage(child); TestApplication app2 = TestApplication.Factory.newManagedInstanceForTests(n1.mgmt); - app2.setConfig(TestEntity.CONF_NAME, "second-app"); + app2.config().set(TestEntity.CONF_NAME, "second-app"); app.setAttribute(TestEntity.SEQUENCE, 4); appRO = expectRebindSequenceNumber(n1, n2, app, 4, immediate); @@ -425,7 +425,7 @@ public class HotStandbyTest { TestApplication app = createFirstAppAndPersist(n1); noteUsedMemory("Finished seeding"); Long initialUsed = usedMemory.peekLast(); - app.setConfig(TestEntity.CONF_OBJECT, new BigObject(SIZE*1000*1000)); + app.config().set(TestEntity.CONF_OBJECT, new BigObject(SIZE*1000*1000)); assertUsedMemoryMaxDelta("Set a big config object", SIZE_UP_BOUND); forcePersistNow(n1); assertUsedMemoryMaxDelta("Persisted a big config object", SIZE_IN_XML); @@ -443,7 +443,7 @@ public class HotStandbyTest { } assertUsedMemoryMaxDelta("And more rebinds and more persists", GRACE); - app.setConfig(TestEntity.CONF_OBJECT, "big is now small"); + app.config().set(TestEntity.CONF_OBJECT, "big is now small"); assertUsedMemoryMaxDelta("Big made small at primary", -SIZE_DOWN_BOUND); forcePersistNow(n1); assertUsedMemoryMaxDelta("And persisted", -SIZE_IN_XML_DOWN); @@ -521,7 +521,7 @@ public class HotStandbyTest { TestEntity child = app.addChild(EntitySpec.create(TestEntity.class).configure(TestEntity.CONF_NAME, "first-child")); Entities.manage(child); TestApplication app2 = TestApplication.Factory.newManagedInstanceForTests(n1.mgmt); - app2.setConfig(TestEntity.CONF_NAME, "second-app"); + app2.config().set(TestEntity.CONF_NAME, "second-app"); forcePersistNow(n1); n2.ha.setPriority(1); http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/usage/jsgui/src/main/webapp/assets/js/view/ha-summary.js ---------------------------------------------------------------------- diff --git a/usage/jsgui/src/main/webapp/assets/js/view/ha-summary.js b/usage/jsgui/src/main/webapp/assets/js/view/ha-summary.js index 2032d1d..250977e 100644 --- a/usage/jsgui/src/main/webapp/assets/js/view/ha-summary.js +++ b/usage/jsgui/src/main/webapp/assets/js/view/ha-summary.js @@ -29,10 +29,11 @@ define([ "<% if (nodeUri && !isTerminated) { %><a href='<%= nodeUri %>'><%= nodeId %></a><% } else { %><%= nodeId %><% } %>" + "<% if (isSelf) { %><span class='pull-right badge badge-success'>this</span><% } %>" + "</td>" + - "<td><%= status %></td>" + + "<td><% if (isPretendMaster) {%>EX-MASTER<%} else {%><%= status %><%} if (isStale) { %> (stale)<% } %></td>" + "<td><%= timestampDisplayPrefix %><span class='timestamp' data-timestamp='<%= timestamp %>'><%= timestampDisplay %><span><%= timestampDisplaySuffix %></td>" + "</tr>"); - var noServers = "<tr><td colspan='3'><i>Failed to load servers!</i></td></tr>"; + var noServers = "<tr><td colspan='3'><i>Failed to load data of servers</i></td></tr>"; + var waitingServers = "<tr><td colspan='3'><i>Waiting on detail for servers...</i></td></tr>"; var HASummaryView = Backbone.View.extend({ initialize: function() { @@ -49,53 +50,74 @@ define([ }, render: function() { this.$el.html(template()); - if (serverStatus.loaded) { - this.renderNodeStatus(); - } + this.renderNodeStatus(); return this; }, renderNodeStatus: function() { + var $target = this.$(".ha-summary-table-body"); + if (!serverStatus.loaded) { + $target.html(waitingServers); + return; + } + var serverHa = serverStatus.get("ha") || {}; var master = serverHa.masterId, self = serverHa.ownId, - nodes = serverHa.nodes, - $target = this.$(".ha-summary-table-body"); - $target.empty(); + nodes = serverHa.nodes; + // undefined check just in case server returns something odd if (nodes == undefined || _.isEmpty(nodes)) { $target.html(noServers); - } else { - _.each(nodes, function (n) { - var node = _.clone(n); - node.timestampDisplayPrefix = ""; - node.timestampDisplaySuffix = ""; - if (node['remoteTimestamp']) { - node.timestamp = node.remoteTimestamp; - } else { - node.timestamp = node.localTimestamp; - node.timestampDisplaySuffix = " (local)"; - } - if (node.timestamp >= moment().utc() + 10*1000) { - // if server reports time significantly in future, report this, with no timestampe - node.timestampDisplayPrefix = "server clock in future by "+ - moment.duration(moment(node.timestamp).diff(moment())).humanize(); - node.timestamp = ""; - node.timestampDisplay = ""; - } else { - // else use timestamp - if (node.timestamp >= moment().utc()) { - // but if just a little bit in future, backdate to show "a few seconds ago" - node.timestamp = moment().utc()-1; - } - node.timestampDisplay = moment(node.timestamp).fromNow(); + return; + } + + $target.empty(); + var masterTimestamp; + _.each(nodes, function (n) { + if (n.nodeId == master && n.remoteTimestamp) { + masterTimestamp = n.remoteTimestamp; } - - node.isSelf = node.nodeId == self; - node.isMaster = self == master; - node.isTerminated = node.status == "TERMINATED"; - $target.append(nodeRowTemplate(node)); }); - } + + _.each(nodes, function (n) { + var node = _.clone(n); + node.timestampDisplayPrefix = ""; + node.timestampDisplaySuffix = ""; + if (node['remoteTimestamp']) { + node.timestamp = node.remoteTimestamp; + } else { + node.timestamp = node.localTimestamp; + node.timestampDisplaySuffix = " (local)"; + } + if (node.timestamp >= moment().utc() + 10*1000) { + // if server reports time significantly in future, report this, with no timestampe + node.timestampDisplayPrefix = "server clock in future by "+ + moment.duration(moment(node.timestamp).diff(moment())).humanize(); + node.timestamp = ""; + node.timestampDisplay = ""; + } else { + // else use timestamp + if (node.timestamp >= moment().utc()) { + // but if just a little bit in future, backdate to show "a few seconds ago" + node.timestamp = moment().utc()-1; + } + node.timestampDisplay = moment(node.timestamp).fromNow(); + } + + node.isSelf = node.nodeId == self; + node.isMaster = self == master; + if (node.status == "TERMINATED") { + node.isTerminated = true; + node.isPretendMaster = false; + node.isStale = false; + } else { + node.isTerminated = false; + node.isPretendMaster = (!node.isMaster && node.status == "MASTER" && master != node.nodeId); + node.isStale = (masterTimestamp && node.timestamp + 30*1000 < masterTimestamp); + } + + $target.append(nodeRowTemplate(node)); + }); }, updateTimestamps: function() { this.$(".timestamp").each(function(index, t) { http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/usage/rest-server/src/main/java/brooklyn/rest/filter/LoggingFilter.java ---------------------------------------------------------------------- diff --git a/usage/rest-server/src/main/java/brooklyn/rest/filter/LoggingFilter.java b/usage/rest-server/src/main/java/brooklyn/rest/filter/LoggingFilter.java index 1576555..b57ff72 100644 --- a/usage/rest-server/src/main/java/brooklyn/rest/filter/LoggingFilter.java +++ b/usage/rest-server/src/main/java/brooklyn/rest/filter/LoggingFilter.java @@ -67,7 +67,6 @@ public class LoggingFilter implements Filter { HttpServletRequest httpRequest = (HttpServletRequest) request; HttpServletResponse httpResponse = (HttpServletResponse) response; - String uri = httpRequest.getRequestURI(); String rid = RequestTaggingFilter.getTag(); boolean isInteresting = INTERESTING_METHODS.contains(httpRequest.getMethod()), shouldLog = (isInteresting && LOG.isDebugEnabled()) || LOG.isTraceEnabled(), @@ -75,8 +74,8 @@ public class LoggingFilter implements Filter { Stopwatch timer = Stopwatch.createUnstarted(); try { if (shouldLog) { - String message = "{} starting request {} {}"; - Object[] args = new Object[]{rid, httpRequest.getMethod(), uri}; + String message = "Request {} starting: {} {} from {}"; + Object[] args = new Object[]{rid, httpRequest.getMethod(), httpRequest.getRequestURI(), httpRequest.getRemoteAddr()}; if (isInteresting) { LOG.debug(message, args); } else { @@ -89,7 +88,7 @@ public class LoggingFilter implements Filter { } catch (Throwable e) { requestErrored = true; - LOG.warn("REST API request " + rid + " failed: " + e, e); + LOG.warn("Request " + rid + " ("+httpRequest.getMethod()+" "+httpRequest.getRequestURI()+" from "+httpRequest.getRemoteAddr()+") failed: " + e, e); // Propagate for handling by other filter throw Exceptions.propagate(e); } finally { @@ -111,10 +110,11 @@ public class LoggingFilter implements Filter { private String getRequestCompletedMessage(boolean includeHeaders, Duration elapsed, String id, HttpServletRequest httpRequest, HttpServletResponse httpResponse) { - StringBuilder message = new StringBuilder(id) - .append(" complete in roughly ") + StringBuilder message = new StringBuilder("Request ") + .append(id) + .append(" completed in ") .append(elapsed) - .append(". Responding ") + .append(": response ") .append(httpResponse.getStatus()) .append(" for ") .append(httpRequest.getMethod()) http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/usage/rest-server/src/main/java/brooklyn/rest/resources/ServerResource.java ---------------------------------------------------------------------- diff --git a/usage/rest-server/src/main/java/brooklyn/rest/resources/ServerResource.java b/usage/rest-server/src/main/java/brooklyn/rest/resources/ServerResource.java index a9a2225..03d51fc 100644 --- a/usage/rest-server/src/main/java/brooklyn/rest/resources/ServerResource.java +++ b/usage/rest-server/src/main/java/brooklyn/rest/resources/ServerResource.java @@ -126,45 +126,62 @@ public class ServerResource extends AbstractBrooklynRestResource implements Serv new Thread("shutdown") { public void run() { - if (stopAppsFirst) { - CountdownTimer shutdownTimeoutTimer = null; - if (!shutdownTimeout.equals(Duration.ZERO)) { - shutdownTimeoutTimer = shutdownTimeout.countdownTimer(); - } + boolean terminateTried = false; + try { + if (stopAppsFirst) { + CountdownTimer shutdownTimeoutTimer = null; + if (!shutdownTimeout.equals(Duration.ZERO)) { + shutdownTimeoutTimer = shutdownTimeout.countdownTimer(); + } - List<Task<?>> stoppers = new ArrayList<Task<?>>(); - for (Application app: mgmt().getApplications()) { - if (app instanceof StartableApplication) - stoppers.add(Entities.invokeEffector((EntityLocal)app, app, StartableApplication.STOP)); - } + List<Task<?>> stoppers = new ArrayList<Task<?>>(); + for (Application app: mgmt().getApplications()) { + if (app instanceof StartableApplication) + stoppers.add(Entities.invokeEffector((EntityLocal)app, app, StartableApplication.STOP)); + } - try { for (Task<?> t: stoppers) { if (!waitAppShutdown(shutdownTimeoutTimer, t)) { //app stop error hasAppErrorsOrTimeout.set(true); } } - } catch (TimeoutException e) { + } + + terminateTried = true; + ((ManagementContextInternal)mgmt()).terminate(); + + } catch (Throwable e) { + Throwable interesting = Exceptions.getFirstInteresting(e); + if (interesting instanceof TimeoutException) { //timeout while waiting for apps to stop + log.warn("Timeout shutting down: "+Exceptions.collapseText(e)); + log.debug("Timeout shutting down: "+e, e); hasAppErrorsOrTimeout.set(true); + + } else { + // swallow fatal, so we notify the outer loop to continue with shutdown + log.error("Unexpected error shutting down: "+Exceptions.collapseText(e), e); + } - - if (hasAppErrorsOrTimeout.get() && !forceShutdownOnError) { - complete(); - //There are app errors, don't exit the process. - return; + hasAppErrorsOrTimeout.set(true); + + if (!terminateTried) { + ((ManagementContextInternal)mgmt()).terminate(); } + } finally { + + complete(); + + if (!hasAppErrorsOrTimeout.get() || forceShutdownOnError) { + //give the http request a chance to complete gracefully + Time.sleep(delayForHttpReturn); + System.exit(0); + } + + // There are app errors, don't exit the process, allowing any exception to continue throwing + log.warn("Abandoning shutdown because there were errors and shutdown was not forced."); } - - ((ManagementContextInternal)mgmt()).terminate(); - - complete(); - - //give the http request a chance to complete gracefully - Time.sleep(delayForHttpReturn); - - System.exit(0); } private void complete() { http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/eef78912/utils/common/src/main/java/brooklyn/util/javalang/Threads.java ---------------------------------------------------------------------- diff --git a/utils/common/src/main/java/brooklyn/util/javalang/Threads.java b/utils/common/src/main/java/brooklyn/util/javalang/Threads.java index 1760ae2..b3af17b 100644 --- a/utils/common/src/main/java/brooklyn/util/javalang/Threads.java +++ b/utils/common/src/main/java/brooklyn/util/javalang/Threads.java @@ -21,6 +21,8 @@ package brooklyn.util.javalang; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import brooklyn.util.exceptions.Exceptions; + public class Threads { private static final Logger log = LoggerFactory.getLogger(Threads.class); @@ -44,7 +46,15 @@ public class Threads { return Runtime.getRuntime().removeShutdownHook(hook); } catch (IllegalStateException e) { // probably shutdown in progress - log.debug("cannot remove shutdown hook "+hook+": "+e); + String text = Exceptions.collapseText(e); + if (text.contains("Shutdown in progress")) { + if (log.isTraceEnabled()) { + log.trace("Could not remove shutdown hook "+hook+": "+text); + } + } else { + log.warn("Could not remove shutdown hook "+hook+": "+text); + log.debug("Shutdown hook removal details: "+e, e); + } return false; } }
