Repository: aurora
Updated Branches:
  refs/heads/master eb3660d9e -> 3bdf0274c


Expose stats on ZooKeeper connection state

* zk_connection_state_STATE shows 1 if STATE is current connection state, 
otherwise 0.
* zk_connection_state_STATE_counter represents occurence times of the STATE 
since scheduler state

Bugs closed: AURORA-1838

Reviewed at https://reviews.apache.org/r/54624/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/3bdf0274
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/3bdf0274
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/3bdf0274

Branch: refs/heads/master
Commit: 3bdf0274c18b77df354d3e0bf138e25072ed4706
Parents: eb3660d
Author: Jing Chen <milantr...@gmail.com>
Authored: Tue Jan 10 23:35:21 2017 +0100
Committer: Stephan Erb <s...@apache.org>
Committed: Tue Jan 10 23:35:21 2017 +0100

----------------------------------------------------------------------
 .../CuratorServiceDiscoveryModule.java          | 69 +++++++++++++++++++-
 .../discovery/AbstractDiscoveryModuleTest.java  |  4 ++
 2 files changed, 72 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/3bdf0274/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java
 
b/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java
index 999a542..ea167a8 100644
--- 
a/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java
+++ 
b/src/main/java/org/apache/aurora/scheduler/discovery/CuratorServiceDiscoveryModule.java
@@ -15,11 +15,13 @@ package org.apache.aurora.scheduler.discovery;
 
 import java.net.InetSocketAddress;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Predicate;
 
 import javax.inject.Singleton;
 
 import com.google.common.base.Joiner;
+import com.google.common.base.Supplier;
 import com.google.common.collect.FluentIterable;
 import com.google.inject.Exposed;
 import com.google.inject.PrivateModule;
@@ -32,6 +34,7 @@ import org.apache.aurora.common.io.Codec;
 import org.apache.aurora.common.net.InetSocketAddressHelper;
 import org.apache.aurora.common.quantity.Amount;
 import org.apache.aurora.common.quantity.Time;
+import org.apache.aurora.common.stats.StatsProvider;
 import org.apache.aurora.common.thrift.ServiceInstance;
 import org.apache.aurora.common.zookeeper.Credentials;
 import org.apache.aurora.common.zookeeper.ServerSet;
@@ -41,7 +44,10 @@ import org.apache.curator.RetryPolicy;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
 import org.apache.curator.framework.api.ACLProvider;
+import org.apache.curator.framework.listen.Listenable;
 import org.apache.curator.framework.recipes.cache.PathChildrenCache;
+import org.apache.curator.framework.state.ConnectionState;
+import org.apache.curator.framework.state.ConnectionStateListener;
 import org.apache.curator.retry.BoundedExponentialBackoffRetry;
 import org.apache.curator.utils.PathUtils;
 import org.apache.zookeeper.data.ACL;
@@ -58,6 +64,8 @@ class CuratorServiceDiscoveryModule extends PrivateModule {
   private final String discoveryPath;
   private final ZooKeeperConfig zooKeeperConfig;
 
+  private ConnectionState currentState;
+
   CuratorServiceDiscoveryModule(String discoveryPath, ZooKeeperConfig 
zooKeeperConfig) {
     this.discoveryPath = PathUtils.validatePath(discoveryPath);
     this.zooKeeperConfig = requireNonNull(zooKeeperConfig);
@@ -76,7 +84,8 @@ class CuratorServiceDiscoveryModule extends PrivateModule {
   CuratorFramework provideCuratorFramework(
       ShutdownRegistry shutdownRegistry,
       @ServiceDiscoveryBindings.ZooKeeper Iterable<InetSocketAddress> 
zooKeeperCluster,
-      ACLProvider aclProvider) {
+      ACLProvider aclProvider,
+      StatsProvider statsProvider) {
 
     String connectString =
         FluentIterable.from(zooKeeperCluster)
@@ -87,6 +96,31 @@ class CuratorServiceDiscoveryModule extends PrivateModule {
       connectString = connectString + zooKeeperConfig.getChrootPath().get();
     }
 
+    // export current connection state
+    for (ConnectionState connectionState : ConnectionState.values()) {
+      statsProvider.makeGauge(
+          zkConnectionGaugeName(connectionState),
+          new Supplier<Integer>() {
+            @Override
+            public Integer get() {
+              return connectionState.equals(currentState) ? 1 : 0;
+            }
+          }
+      );
+    }
+
+    // connection state counter
+    AtomicLong zkConnectionConnectedCounter =
+        
statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.CONNECTED));
+    AtomicLong zkConnectionReadonlyCounter =
+        
statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.READ_ONLY));
+    AtomicLong zkConnectionSuspendedCounter =
+        
statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.SUSPENDED));
+    AtomicLong zkConnectionReconnectedCounter =
+        
statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.RECONNECTED));
+    AtomicLong zkConnectionLostCounter =
+        
statsProvider.makeCounter(zkConnectionStateCounterName(ConnectionState.LOST));
+
     // This emulates the default BackoffHelper configuration used by the 
legacy commons/zookeeper
     // stack. BackoffHelper is unbounded, this dies after around 5 minutes 
using the 10 retries.
     // NB: BoundedExponentialBackoffRetry caps max retries at 29 if you send 
it a larger value.
@@ -110,6 +144,31 @@ class CuratorServiceDiscoveryModule extends PrivateModule {
     }
 
     CuratorFramework curatorFramework = builder.build();
+    Listenable<ConnectionStateListener> connectionStateListener = 
curatorFramework
+        .getConnectionStateListenable();
+    connectionStateListener.addListener((CuratorFramework client, 
ConnectionState newState) -> {
+      currentState = newState;
+      switch (newState) {
+        case CONNECTED:
+          zkConnectionConnectedCounter.getAndIncrement();
+          break;
+        case READ_ONLY:
+          zkConnectionReadonlyCounter.getAndIncrement();
+          break;
+        case SUSPENDED:
+          zkConnectionSuspendedCounter.getAndIncrement();
+          break;
+        case RECONNECTED:
+          zkConnectionReconnectedCounter.getAndIncrement();
+          break;
+        case LOST:
+          zkConnectionLostCounter.getAndIncrement();
+          break;
+        default:
+          currentState = null;
+          break;
+      }
+    });
 
     // TODO(John Sirois): It would be nice to use a Service to control the 
lifecycle here, but other
     // services 
(org.apache.aurora.scheduler.http.JettyServerModule.RedirectMonitor) rely on 
this
@@ -182,4 +241,12 @@ class CuratorServiceDiscoveryModule extends PrivateModule {
   SingletonService provideSingletonService(CuratorFramework client, 
Codec<ServiceInstance> codec) {
     return new CuratorSingletonService(client, discoveryPath, MEMBER_TOKEN, 
codec);
   }
+
+  private String zkConnectionStateCounterName(ConnectionState state) {
+    return zkConnectionGaugeName(state) + "_counter";
+  }
+
+  private String zkConnectionGaugeName(ConnectionState state) {
+    return "zk_connection_state_" + state;
+  }
 }

http://git-wip-us.apache.org/repos/asf/aurora/blob/3bdf0274/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java
 
b/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java
index d90192b..0f2121e 100644
--- 
a/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java
+++ 
b/src/test/java/org/apache/aurora/scheduler/discovery/AbstractDiscoveryModuleTest.java
@@ -25,11 +25,13 @@ import com.google.inject.Module;
 
 import org.apache.aurora.common.quantity.Amount;
 import org.apache.aurora.common.quantity.Time;
+import org.apache.aurora.common.stats.StatsProvider;
 import org.apache.aurora.common.testing.TearDownTestCase;
 import org.apache.aurora.common.zookeeper.Credentials;
 import org.apache.aurora.common.zookeeper.SingletonService;
 import org.apache.aurora.common.zookeeper.ZooKeeperUtils;
 import org.apache.aurora.scheduler.app.ServiceGroupMonitor;
+import org.apache.aurora.scheduler.testing.FakeStatsProvider;
 import org.junit.Test;
 
 import static org.junit.Assert.assertNotNull;
@@ -58,6 +60,8 @@ abstract class AbstractDiscoveryModuleTest extends 
TearDownTestCase {
                 bind(ServiceDiscoveryBindings.ZOO_KEEPER_ACL_KEY)
                     .toInstance(ZooKeeperUtils.OPEN_ACL_UNSAFE);
 
+                bind(StatsProvider.class).toInstance(new FakeStatsProvider());
+
                 bindExtraRequirements(binder());
               }
             },

Reply via email to