Repository: aurora Updated Branches: refs/heads/master eb3660d9e -> 3bdf0274c
Expose stats on ZooKeeper connection state * zk_connection_state_STATE shows 1 if STATE is current connection state, otherwise 0. * zk_connection_state_STATE_counter represents occurence times of the STATE since scheduler state Bugs closed: AURORA-1838 Reviewed at https://reviews.apache.org/r/54624/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/3bdf0274 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/3bdf0274 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/3bdf0274 Branch: refs/heads/master Commit: 3bdf0274c18b77df354d3e0bf138e25072ed4706 Parents: eb3660d Author: Jing Chen <milantr...@gmail.com> Authored: Tue Jan 10 23:35:21 2017 +0100 Committer: Stephan Erb <s...@apache.org> Committed: Tue Jan 10 23:35:21 2017 +0100 ---------------------------------------------------------------------- .../CuratorServiceDiscoveryModule.java | 69 +++++++++++++++++++- .../discovery/AbstractDiscoveryModuleTest.java | 4 ++ 2 files changed, 72 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/3bdf0274/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java b/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java index 999a542..ea167a8 100644 --- a/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java +++ b/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java @@ -15,11 +15,13 @@ package org.apache.aurora.scheduler.discovery; import java.net.InetSocketAddress; import java.util.List; +import java.util.concurrent.atomic.AtomicLong; import java.util.function.Predicate; import javax.inject.Singleton; import com.google.common.base.Joiner; +import com.google.common.base.Supplier; import com.google.common.collect.FluentIterable; import com.google.inject.Exposed; import com.google.inject.PrivateModule; @@ -32,6 +34,7 @@ import org.apache.aurora.common.io.Codec; import org.apache.aurora.common.net.InetSocketAddressHelper; import org.apache.aurora.common.quantity.Amount; import org.apache.aurora.common.quantity.Time; +import org.apache.aurora.common.stats.StatsProvider; import org.apache.aurora.common.thrift.ServiceInstance; import org.apache.aurora.common.zookeeper.Credentials; import org.apache.aurora.common.zookeeper.ServerSet; @@ -41,7 +44,10 @@ import org.apache.curator.RetryPolicy; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.ACLProvider; +import org.apache.curator.framework.listen.Listenable; import org.apache.curator.framework.recipes.cache.PathChildrenCache; +import org.apache.curator.framework.state.ConnectionState; +import org.apache.curator.framework.state.ConnectionStateListener; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.curator.utils.PathUtils; import org.apache.zookeeper.data.ACL; @@ -58,6 +64,8 @@ class CuratorServiceDiscoveryModule extends PrivateModule { private final String discoveryPath; private final ZooKeeperConfig zooKeeperConfig; + private ConnectionState currentState; + CuratorServiceDiscoveryModule(String discoveryPath, ZooKeeperConfig zooKeeperConfig) { this.discoveryPath = PathUtils.validatePath(discoveryPath); this.zooKeeperConfig = requireNonNull(zooKeeperConfig); @@ -76,7 +84,8 @@ class CuratorServiceDiscoveryModule extends PrivateModule { CuratorFramework provideCuratorFramework( ShutdownRegistry shutdownRegistry, @ServiceDiscoveryBindings.ZooKeeper Iterable<InetSocketAddress> zooKeeperCluster, - ACLProvider aclProvider) { + ACLProvider aclProvider, + StatsProvider statsProvider) { String connectString = FluentIterable.from(zooKeeperCluster) @@ -87,6 +96,31 @@ class CuratorServiceDiscoveryModule extends PrivateModule { connectString = connectString + zooKeeperConfig.getChrootPath().get(); } + // export current connection state + for (ConnectionState connectionState : ConnectionState.values()) { + statsProvider.makeGauge( + zkConnectionGaugeName(connectionState), + new Supplier<Integer>() { + @Override + public Integer get() { + return connectionState.equals(currentState) ? 1 : 0; + } + } + ); + } + + // connection state counter + AtomicLong zkConnectionConnectedCounter = + statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.CONNECTED)); + AtomicLong zkConnectionReadonlyCounter = + statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.READ_ONLY)); + AtomicLong zkConnectionSuspendedCounter = + statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.SUSPENDED)); + AtomicLong zkConnectionReconnectedCounter = + statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.RECONNECTED)); + AtomicLong zkConnectionLostCounter = + statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.LOST)); + // This emulates the default BackoffHelper configuration used by the legacy commons/zookeeper // stack. BackoffHelper is unbounded, this dies after around 5 minutes using the 10 retries. // NB: BoundedExponentialBackoffRetry caps max retries at 29 if you send it a larger value. @@ -110,6 +144,31 @@ class CuratorServiceDiscoveryModule extends PrivateModule { } CuratorFramework curatorFramework = builder.build(); + Listenable<ConnectionStateListener> connectionStateListener = curatorFramework + .getConnectionStateListenable(); + connectionStateListener.addListener((CuratorFramework client, ConnectionState newState) -> { + currentState = newState; + switch (newState) { + case CONNECTED: + zkConnectionConnectedCounter.getAndIncrement(); + break; + case READ_ONLY: + zkConnectionReadonlyCounter.getAndIncrement(); + break; + case SUSPENDED: + zkConnectionSuspendedCounter.getAndIncrement(); + break; + case RECONNECTED: + zkConnectionReconnectedCounter.getAndIncrement(); + break; + case LOST: + zkConnectionLostCounter.getAndIncrement(); + break; + default: + currentState = null; + break; + } + }); // TODO(John Sirois): It would be nice to use a Service to control the lifecycle here, but other // services (org.apache.aurora.scheduler.http.JettyServerModule.RedirectMonitor) rely on this @@ -182,4 +241,12 @@ class CuratorServiceDiscoveryModule extends PrivateModule { SingletonService provideSingletonService(CuratorFramework client, Codec<ServiceInstance> codec) { return new CuratorSingletonService(client, discoveryPath, MEMBER_TOKEN, codec); } + + private String zkConnectionStateCounterName(ConnectionState state) { + return zkConnectionGaugeName(state) + "_counter"; + } + + private String zkConnectionGaugeName(ConnectionState state) { + return "zk_connection_state_" + state; + } } http://git-wip-us.apache.org/repos/asf/aurora/blob/3bdf0274/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java b/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java index d90192b..0f2121e 100644 --- a/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java +++ b/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java @@ -25,11 +25,13 @@ import com.google.inject.Module; import org.apache.aurora.common.quantity.Amount; import org.apache.aurora.common.quantity.Time; +import org.apache.aurora.common.stats.StatsProvider; import org.apache.aurora.common.testing.TearDownTestCase; import org.apache.aurora.common.zookeeper.Credentials; import org.apache.aurora.common.zookeeper.SingletonService; import org.apache.aurora.common.zookeeper.ZooKeeperUtils; import org.apache.aurora.scheduler.app.ServiceGroupMonitor; +import org.apache.aurora.scheduler.testing.FakeStatsProvider; import org.junit.Test; import static org.junit.Assert.assertNotNull; @@ -58,6 +60,8 @@ abstract class AbstractDiscoveryModuleTest extends TearDownTestCase { bind(ServiceDiscoveryBindings.ZOO_KEEPER_ACL_KEY) .toInstance(ZooKeeperUtils.OPEN_ACL_UNSAFE); + bind(StatsProvider.class).toInstance(new FakeStatsProvider()); + bindExtraRequirements(binder()); } },